multi: add tor connection healthcheck

This commit adds a new health check, tor connection, to our liveness
monitor. A monitor refactor is applied to the server creation such that
the scope of health check creation is managed within one function.
This commit is contained in:
yyforyongyu 2021-09-27 20:40:41 +08:00
parent ca13750b2c
commit f41f5c7fa6
No known key found for this signature in database
GPG Key ID: 9BCD95C4FF296868
4 changed files with 98 additions and 29 deletions

View File

@ -139,6 +139,15 @@ const (
defaultTLSBackoff = time.Minute
defaultTLSAttempts = 0
// Set defaults for a health check which ensures that the tor
// connection is alive. Although this check is off by default (not all
// setups require it), we still set the other default values so that
// the health check can be easily enabled with sane defaults.
defaultTCInterval = time.Minute
defaultTCTimeout = time.Second * 5
defaultTCBackoff = time.Minute
defaultTCAttempts = 0
// defaultRemoteMaxHtlcs specifies the default limit for maximum
// concurrent HTLCs the remote party may add to commitment transactions.
// This value can be overridden with --default-remote-max-htlcs.
@ -541,6 +550,12 @@ func DefaultConfig() Config {
Attempts: defaultTLSAttempts,
Backoff: defaultTLSBackoff,
},
TorConnection: &lncfg.CheckConfig{
Interval: defaultTCInterval,
Timeout: defaultTCTimeout,
Attempts: defaultTCAttempts,
Backoff: defaultTCBackoff,
},
},
Gossip: &lncfg.Gossip{
MaxChannelUpdateBurst: discovery.DefaultMaxChannelUpdateBurst,

View File

@ -28,6 +28,8 @@ type HealthCheckConfig struct {
DiskCheck *DiskCheckConfig `group:"diskspace" namespace:"diskspace"`
TLSCheck *CheckConfig `group:"tls" namespace:"tls"`
TorConnection *CheckConfig `group:"torconnection" namespace:"torconnection"`
}
// Validate checks the values configured for our health checks.
@ -50,6 +52,10 @@ func (h *HealthCheckConfig) Validate() error {
return errors.New("disk required ratio must be in [0:1)")
}
if err := h.TorConnection.validate("tor connection"); err != nil {
return err
}
return nil
}

View File

@ -990,6 +990,22 @@ litecoin.node=ltcd
; This value must be >= 1m.
; healthcheck.tls.interval=1m
; The number of times we should attempt to check our tor connection before
; gracefully shutting down. Set this value to 0 to disable this health check.
; healthcheck.torconnection.attempts=3
; The amount of time we allow a call to our tor connection to take before we
; fail the attempt. This value must be >= 1s.
; healthcheck.torconnection.timeout=10s
; The amount of time we should backoff between failed attempts to check tor
; connection. This value must be >= 1s.
; healthcheck.torconnection.backoff=30s
; The amount of time we should wait between tor connection health checks. This
; value must be >= 1m.
; healthcheck.torconnection.interval=1m
[signrpc]

View File

@ -1470,9 +1470,40 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
})
}
// Create a set of health checks using our configured values. If a
// health check has been disabled by setting attempts to 0, our monitor
// will not run it.
// Create liveliness monitor.
s.createLivenessMonitor(cfg, cc)
// Create the connection manager which will be responsible for
// maintaining persistent outbound connections and also accepting new
// incoming connections
cmgr, err := connmgr.New(&connmgr.Config{
Listeners: listeners,
OnAccept: s.InboundPeerConnected,
RetryDuration: time.Second * 5,
TargetOutbound: 100,
Dial: noiseDial(
nodeKeyECDH, s.cfg.net, s.cfg.ConnectionTimeout,
),
OnConnection: s.OutboundPeerConnected,
})
if err != nil {
return nil, err
}
s.connMgr = cmgr
return s, nil
}
// createLivenessMonitor creates a set of health checks using our configured
// values and uses these checks to create a liveliness monitor. Available
// health checks,
// - chainHealthCheck
// - diskCheck
// - tlsHealthCheck
// - torController, only created when tor is enabled.
// If a health check has been disabled by setting attempts to 0, our monitor
// will not run it.
func (s *server) createLivenessMonitor(cfg *Config, cc *chainreg.ChainControl) {
chainHealthCheck := healthcheck.NewObservation(
"chain backend",
cc.HealthCheck,
@ -1521,11 +1552,12 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
// If the current time is passed the certificate's
// expiry time, then it is considered expired
if time.Now().After(parsedCert.NotAfter) {
return fmt.Errorf("TLS certificate is expired as of %v", parsedCert.NotAfter)
return fmt.Errorf("TLS certificate is "+
"expired as of %v", parsedCert.NotAfter)
}
// If the certificate is not outdated, no error needs to
// be returned
// If the certificate is not outdated, no error needs
// to be returned
return nil
},
cfg.HealthChecks.TLSCheck.Interval,
@ -1534,36 +1566,36 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
cfg.HealthChecks.TLSCheck.Attempts,
)
checks := []*healthcheck.Observation{
chainHealthCheck, diskCheck, tlsHealthCheck,
}
// If Tor is enabled, add the healthcheck for tor connection.
if s.torController != nil {
torConnectionCheck := healthcheck.NewObservation(
"tor connection",
func() error {
return healthcheck.CheckTorServiceStatus(
s.torController,
s.createNewHiddenService,
)
},
cfg.HealthChecks.TorConnection.Interval,
cfg.HealthChecks.TorConnection.Timeout,
cfg.HealthChecks.TorConnection.Backoff,
cfg.HealthChecks.TorConnection.Attempts,
)
checks = append(checks, torConnectionCheck)
}
// If we have not disabled all of our health checks, we create a
// liveliness monitor with our configured checks.
s.livelinessMonitor = healthcheck.NewMonitor(
&healthcheck.Config{
Checks: []*healthcheck.Observation{
chainHealthCheck, diskCheck, tlsHealthCheck,
},
Checks: checks,
Shutdown: srvrLog.Criticalf,
},
)
// Create the connection manager which will be responsible for
// maintaining persistent outbound connections and also accepting new
// incoming connections
cmgr, err := connmgr.New(&connmgr.Config{
Listeners: listeners,
OnAccept: s.InboundPeerConnected,
RetryDuration: time.Second * 5,
TargetOutbound: 100,
Dial: noiseDial(
nodeKeyECDH, s.cfg.net, s.cfg.ConnectionTimeout,
),
OnConnection: s.OutboundPeerConnected,
})
if err != nil {
return nil, err
}
s.connMgr = cmgr
return s, nil
}
// Started returns true if the server has been started, and false otherwise.