From 653e2f36670477a64351cbe3a4aa223e6ab50107 Mon Sep 17 00:00:00 2001 From: ziggie Date: Wed, 8 May 2024 20:25:49 +0100 Subject: [PATCH] multi: Allow interrupt of server startup. This commit does two things. It starts up the server in a way that it can be interrupted and shutdown gracefully. Moreover it makes sure that subsystems clean themselves up when they fail to start. This makes sure that depending subsytems can shutdown gracefully as well and the shutdown process is not stuck. --- lnd.go | 28 +++++++++++++-- server.go | 86 ++++++++++++++++++++++----------------------- sweep/fee_bumper.go | 1 - 3 files changed, 68 insertions(+), 47 deletions(-) diff --git a/lnd.go b/lnd.go index 0a85e1bf3..f45b3e630 100644 --- a/lnd.go +++ b/lnd.go @@ -674,11 +674,33 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg, bestHeight) // With all the relevant chains initialized, we can finally start the - // server itself. - if err := server.Start(); err != nil { + // server itself. We start the server in an asynchronous goroutine so + // that we are able to interrupt and shutdown the daemon gracefully in + // case the startup of the subservers do not behave as expected. + errChan := make(chan error) + go func() { + errChan <- server.Start() + }() + + defer func() { + err := server.Stop() + if err != nil { + ltndLog.Warnf("Stopping the server including all "+ + "its subsystems failed with %v", err) + } + }() + + select { + case err := <-errChan: + if err == nil { + break + } + return mkErr("unable to start server: %v", err) + + case <-interceptor.ShutdownChannel(): + return nil } - defer server.Stop() // We transition the server state to Active, as the server is up. interceptorChain.SetServerActive() diff --git a/server.go b/server.go index ce875c255..8cd0f0b13 100644 --- a/server.go +++ b/server.go @@ -1877,6 +1877,8 @@ func (c cleaner) run() { // Start starts the main daemon server, all requested listeners, and any helper // goroutines. // NOTE: This function is safe for concurrent access. +// +//nolint:funlen func (s *server) Start() error { var startErr error @@ -1886,26 +1888,26 @@ func (s *server) Start() error { cleanup := cleaner{} s.start.Do(func() { + cleanup = cleanup.add(s.customMessageServer.Stop) if err := s.customMessageServer.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.customMessageServer.Stop) if s.hostAnn != nil { + cleanup = cleanup.add(s.hostAnn.Stop) if err := s.hostAnn.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.hostAnn.Stop) } if s.livenessMonitor != nil { + cleanup = cleanup.add(s.livenessMonitor.Stop) if err := s.livenessMonitor.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.livenessMonitor.Stop) } // Start the notification server. This is used so channel @@ -1913,167 +1915,162 @@ func (s *server) Start() error { // transaction reaches a sufficient number of confirmations, or // when the input for the funding transaction is spent in an // attempt at an uncooperative close by the counterparty. + cleanup = cleanup.add(s.sigPool.Stop) if err := s.sigPool.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.sigPool.Stop) + cleanup = cleanup.add(s.writePool.Stop) if err := s.writePool.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.writePool.Stop) + cleanup = cleanup.add(s.readPool.Stop) if err := s.readPool.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.readPool.Stop) + cleanup = cleanup.add(s.cc.ChainNotifier.Stop) if err := s.cc.ChainNotifier.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.cc.ChainNotifier.Stop) + cleanup = cleanup.add(s.cc.BestBlockTracker.Stop) if err := s.cc.BestBlockTracker.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.cc.BestBlockTracker.Stop) + cleanup = cleanup.add(s.channelNotifier.Stop) if err := s.channelNotifier.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.channelNotifier.Stop) + cleanup = cleanup.add(func() error { + return s.peerNotifier.Stop() + }) if err := s.peerNotifier.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(func() error { - return s.peerNotifier.Stop() - }) + + cleanup = cleanup.add(s.htlcNotifier.Stop) if err := s.htlcNotifier.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.htlcNotifier.Stop) if s.towerClientMgr != nil { + cleanup = cleanup.add(s.towerClientMgr.Stop) if err := s.towerClientMgr.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.towerClientMgr.Stop) } + cleanup = cleanup.add(s.txPublisher.Stop) if err := s.txPublisher.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(func() error { - s.txPublisher.Stop() - return nil - }) + cleanup = cleanup.add(s.sweeper.Stop) if err := s.sweeper.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.sweeper.Stop) + cleanup = cleanup.add(s.utxoNursery.Stop) if err := s.utxoNursery.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.utxoNursery.Stop) + cleanup = cleanup.add(s.breachArbitrator.Stop) if err := s.breachArbitrator.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.breachArbitrator.Stop) + cleanup = cleanup.add(s.fundingMgr.Stop) if err := s.fundingMgr.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.fundingMgr.Stop) // htlcSwitch must be started before chainArb since the latter // relies on htlcSwitch to deliver resolution message upon // start. + cleanup = cleanup.add(s.htlcSwitch.Stop) if err := s.htlcSwitch.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.htlcSwitch.Stop) + cleanup = cleanup.add(s.interceptableSwitch.Stop) if err := s.interceptableSwitch.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.interceptableSwitch.Stop) + cleanup = cleanup.add(s.chainArb.Stop) if err := s.chainArb.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.chainArb.Stop) + cleanup = cleanup.add(s.authGossiper.Stop) if err := s.authGossiper.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.authGossiper.Stop) + cleanup = cleanup.add(s.graphBuilder.Stop) if err := s.graphBuilder.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.graphBuilder.Stop) + cleanup = cleanup.add(s.chanRouter.Stop) if err := s.chanRouter.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.chanRouter.Stop) + cleanup = cleanup.add(s.invoices.Stop) if err := s.invoices.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.invoices.Stop) + cleanup = cleanup.add(s.sphinx.Stop) if err := s.sphinx.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.sphinx.Stop) + cleanup = cleanup.add(s.chanStatusMgr.Stop) if err := s.chanStatusMgr.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.chanStatusMgr.Stop) + cleanup = cleanup.add(s.chanEventStore.Stop) if err := s.chanEventStore.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(func() error { - s.chanEventStore.Stop() - return nil - }) - s.missionControl.RunStoreTicker() cleanup.add(func() error { s.missionControl.StopStoreTicker() return nil }) + s.missionControl.RunStoreTicker() // Before we start the connMgr, we'll check to see if we have // any backups to recover. We do this now as we want to ensure @@ -2107,18 +2104,18 @@ func (s *server) Start() error { } } + cleanup = cleanup.add(s.chanSubSwapper.Stop) if err := s.chanSubSwapper.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.chanSubSwapper.Stop) if s.torController != nil { + cleanup = cleanup.add(s.torController.Stop) if err := s.createNewHiddenService(); err != nil { startErr = err return } - cleanup = cleanup.add(s.torController.Stop) } if s.natTraversal != nil { @@ -2127,11 +2124,11 @@ func (s *server) Start() error { } // Start connmgr last to prevent connections before init. - s.connMgr.Start() cleanup = cleanup.add(func() error { s.connMgr.Stop() return nil }) + s.connMgr.Start() // If peers are specified as a config option, we'll add those // peers first. @@ -2318,9 +2315,9 @@ func (s *server) Stop() error { if err := s.sweeper.Stop(); err != nil { srvrLog.Warnf("failed to stop sweeper: %v", err) } - - s.txPublisher.Stop() - + if err := s.txPublisher.Stop(); err != nil { + srvrLog.Warnf("failed to stop txPublisher: %v", err) + } if err := s.channelNotifier.Stop(); err != nil { srvrLog.Warnf("failed to stop channelNotifier: %v", err) } @@ -2340,7 +2337,10 @@ func (s *server) Stop() error { srvrLog.Warnf("Unable to stop BestBlockTracker: %v", err) } - s.chanEventStore.Stop() + if err := s.chanEventStore.Stop(); err != nil { + srvrLog.Warnf("Unable to stop ChannelEventStore: %v", + err) + } s.missionControl.StopStoreTicker() // Disconnect from each active peers to ensure that diff --git a/sweep/fee_bumper.go b/sweep/fee_bumper.go index e59fc5b11..1d3fa5aed 100644 --- a/sweep/fee_bumper.go +++ b/sweep/fee_bumper.go @@ -701,7 +701,6 @@ func (t *TxPublisher) Stop() error { log.Debug("TxPublisher stopped") return nil - } // monitor is the main loop driven by new blocks. Whevenr a new block arrives,