From ecd44803315dbee2a06d1132bcf710077d6775ca Mon Sep 17 00:00:00 2001 From: Animesh Bilthare Date: Mon, 15 Jan 2024 01:46:49 +0530 Subject: [PATCH] watchtower: Add retry logic for fetching blocks By default, try to fetch the blocks 3 more times in case of error. --- docs/release-notes/release-notes-0.19.0.md | 7 ++- watchtower/lookout/lookout.go | 70 ++++++++++++++++++++-- watchtower/lookout/lookout_test.go | 3 + watchtower/standalone.go | 4 ++ 4 files changed, 79 insertions(+), 5 deletions(-) diff --git a/docs/release-notes/release-notes-0.19.0.md b/docs/release-notes/release-notes-0.19.0.md index 3b20a054e..5e06bae60 100644 --- a/docs/release-notes/release-notes-0.19.0.md +++ b/docs/release-notes/release-notes-0.19.0.md @@ -71,6 +71,10 @@ ## Code Health +* [Add retry logic](https://github.com/lightningnetwork/lnd/pull/8381) for + watchtower block fetching with a max number of attempts and exponential + back-off. + * [Moved](https://github.com/lightningnetwork/lnd/pull/9138) profile related config settings to its own dedicated group. The old ones still work but will be removed in a future release. @@ -124,9 +128,10 @@ # Contributors (Alphabetical Order) +* Animesh Bilthare * Boris Nagaev * CharlieZKSmith * Elle Mouton * Pins * Viktor Tigerström -* Ziggie +* Ziggie \ No newline at end of file diff --git a/watchtower/lookout/lookout.go b/watchtower/lookout/lookout.go index 28f52cccd..b40911995 100644 --- a/watchtower/lookout/lookout.go +++ b/watchtower/lookout/lookout.go @@ -1,14 +1,22 @@ package lookout import ( + "errors" + "fmt" "sync" "sync/atomic" + "time" + "github.com/btcsuite/btcd/chaincfg/chainhash" "github.com/btcsuite/btcd/wire" "github.com/lightningnetwork/lnd/chainntnfs" "github.com/lightningnetwork/lnd/watchtower/blob" ) +// ErrLookoutExiting is an error that is returned when the lookout server is +// in the process of shutting down. +var ErrLookoutExiting = errors.New("lookout server is shutting down") + // Config houses the Lookout's required resources to properly fulfill it's duty, // including block fetching, querying accepted state updates, and construction // and publication of justice transactions. @@ -29,6 +37,18 @@ type Config struct { // Punisher handles the responsibility of crafting and broadcasting // justice transaction for any breached transactions. Punisher Punisher + + // MinBackoff is the minimum amount of time to back-off before + // re-attempting to fetch a block. + MinBackoff time.Duration + + // MaxBackoff is the maximum amount of time to back-off before + // re-attempting to fetch a block. + MaxBackoff time.Duration + + // MaxNumRetries is the maximum number of times that we should + // re-attempt fetching a block before moving on. + MaxNumRetries int } // Lookout will check any incoming blocks against the transactions found in the @@ -102,6 +122,49 @@ func (l *Lookout) Stop() error { return nil } +// fetchBlockWithRetries attempts to fetch a block from the blockchain using +// its hash. If it fails to fetch the block, it will back-off and retry up to +// MaxNumRetries times. +func (l *Lookout) fetchBlockWithRetries(hash *chainhash.Hash) (*wire.MsgBlock, + error) { + + backoff := l.cfg.MinBackoff + + updateBackoff := func() { + backoff *= 2 + if backoff > l.cfg.MaxBackoff { + backoff = l.cfg.MaxBackoff + } + } + + var attempt int + for { + attempt++ + + block, err := l.cfg.BlockFetcher.GetBlock(hash) + if err == nil { + return block, nil + } + + if attempt > l.cfg.MaxNumRetries { + return nil, fmt.Errorf("failed to fetch block %s "+ + "after %d attempts: %v", hash, attempt, err) + } + + log.Errorf("Failed to fetch block %s (attempt %d): %v. "+ + "Retrying in %v seconds", hash, attempt, err, + backoff.Seconds()) + + select { + case <-time.After(backoff): + case <-l.quit: + return nil, ErrLookoutExiting + } + + updateBackoff() + } +} + // watchBlocks serially pulls incoming epochs from the epoch source and searches // our accepted state updates for any breached transactions. If any are found, // we will attempt to decrypt the state updates' encrypted blobs and exact @@ -118,11 +181,10 @@ func (l *Lookout) watchBlocks(epochs *chainntnfs.BlockEpochEvent) { log.Debugf("Fetching block for (height=%d, hash=%s)", epoch.Height, epoch.Hash) - // Fetch the full block from the backend corresponding - // to the newly arriving epoch. - block, err := l.cfg.BlockFetcher.GetBlock(epoch.Hash) + // Fetch the full block corresponding to the newly + // arriving epoch from the backend. + block, err := l.fetchBlockWithRetries(epoch.Hash) if err != nil { - // TODO(conner): add retry logic? log.Errorf("Unable to fetch block for "+ "(height=%x, hash=%s): %v", epoch.Height, epoch.Hash, err) diff --git a/watchtower/lookout/lookout_test.go b/watchtower/lookout/lookout_test.go index 3800a5682..64d182aa2 100644 --- a/watchtower/lookout/lookout_test.go +++ b/watchtower/lookout/lookout_test.go @@ -90,6 +90,9 @@ func TestLookoutBreachMatching(t *testing.T) { DB: db, EpochRegistrar: backend, Punisher: punisher, + MinBackoff: time.Second, + MaxBackoff: time.Minute, + MaxNumRetries: 1, }) if err := watcher.Start(); err != nil { t.Fatalf("unable to start watcher: %v", err) diff --git a/watchtower/standalone.go b/watchtower/standalone.go index c652d9c76..b973c9ce4 100644 --- a/watchtower/standalone.go +++ b/watchtower/standalone.go @@ -3,6 +3,7 @@ package watchtower import ( "net" "sync/atomic" + "time" "github.com/btcsuite/btcd/btcec/v2" "github.com/lightningnetwork/lnd/brontide" @@ -65,6 +66,9 @@ func New(cfg *Config) (*Standalone, error) { DB: cfg.DB, EpochRegistrar: cfg.EpochRegistrar, Punisher: punisher, + MinBackoff: time.Second, + MaxBackoff: time.Minute, + MaxNumRetries: 5, }) // Create a brontide listener on each of the provided listening