watchtower: Add retry logic for fetching blocks

By default, try to fetch the blocks 3 more times
in case of error.
This commit is contained in:
Animesh Bilthare 2024-01-15 01:46:49 +05:30
parent acbb33bb7b
commit ecd4480331
No known key found for this signature in database
GPG Key ID: 09BB99849ADF6212
4 changed files with 79 additions and 5 deletions

View File

@ -71,6 +71,10 @@
## Code Health ## Code Health
* [Add retry logic](https://github.com/lightningnetwork/lnd/pull/8381) for
watchtower block fetching with a max number of attempts and exponential
back-off.
* [Moved](https://github.com/lightningnetwork/lnd/pull/9138) profile related * [Moved](https://github.com/lightningnetwork/lnd/pull/9138) profile related
config settings to its own dedicated group. The old ones still work but will config settings to its own dedicated group. The old ones still work but will
be removed in a future release. be removed in a future release.
@ -124,9 +128,10 @@
# Contributors (Alphabetical Order) # Contributors (Alphabetical Order)
* Animesh Bilthare
* Boris Nagaev * Boris Nagaev
* CharlieZKSmith * CharlieZKSmith
* Elle Mouton * Elle Mouton
* Pins * Pins
* Viktor Tigerström * Viktor Tigerström
* Ziggie * Ziggie

View File

@ -1,14 +1,22 @@
package lookout package lookout
import ( import (
"errors"
"fmt"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time"
"github.com/btcsuite/btcd/chaincfg/chainhash"
"github.com/btcsuite/btcd/wire" "github.com/btcsuite/btcd/wire"
"github.com/lightningnetwork/lnd/chainntnfs" "github.com/lightningnetwork/lnd/chainntnfs"
"github.com/lightningnetwork/lnd/watchtower/blob" "github.com/lightningnetwork/lnd/watchtower/blob"
) )
// ErrLookoutExiting is an error that is returned when the lookout server is
// in the process of shutting down.
var ErrLookoutExiting = errors.New("lookout server is shutting down")
// Config houses the Lookout's required resources to properly fulfill it's duty, // Config houses the Lookout's required resources to properly fulfill it's duty,
// including block fetching, querying accepted state updates, and construction // including block fetching, querying accepted state updates, and construction
// and publication of justice transactions. // and publication of justice transactions.
@ -29,6 +37,18 @@ type Config struct {
// Punisher handles the responsibility of crafting and broadcasting // Punisher handles the responsibility of crafting and broadcasting
// justice transaction for any breached transactions. // justice transaction for any breached transactions.
Punisher Punisher Punisher Punisher
// MinBackoff is the minimum amount of time to back-off before
// re-attempting to fetch a block.
MinBackoff time.Duration
// MaxBackoff is the maximum amount of time to back-off before
// re-attempting to fetch a block.
MaxBackoff time.Duration
// MaxNumRetries is the maximum number of times that we should
// re-attempt fetching a block before moving on.
MaxNumRetries int
} }
// Lookout will check any incoming blocks against the transactions found in the // Lookout will check any incoming blocks against the transactions found in the
@ -102,6 +122,49 @@ func (l *Lookout) Stop() error {
return nil return nil
} }
// fetchBlockWithRetries attempts to fetch a block from the blockchain using
// its hash. If it fails to fetch the block, it will back-off and retry up to
// MaxNumRetries times.
func (l *Lookout) fetchBlockWithRetries(hash *chainhash.Hash) (*wire.MsgBlock,
error) {
backoff := l.cfg.MinBackoff
updateBackoff := func() {
backoff *= 2
if backoff > l.cfg.MaxBackoff {
backoff = l.cfg.MaxBackoff
}
}
var attempt int
for {
attempt++
block, err := l.cfg.BlockFetcher.GetBlock(hash)
if err == nil {
return block, nil
}
if attempt > l.cfg.MaxNumRetries {
return nil, fmt.Errorf("failed to fetch block %s "+
"after %d attempts: %v", hash, attempt, err)
}
log.Errorf("Failed to fetch block %s (attempt %d): %v. "+
"Retrying in %v seconds", hash, attempt, err,
backoff.Seconds())
select {
case <-time.After(backoff):
case <-l.quit:
return nil, ErrLookoutExiting
}
updateBackoff()
}
}
// watchBlocks serially pulls incoming epochs from the epoch source and searches // watchBlocks serially pulls incoming epochs from the epoch source and searches
// our accepted state updates for any breached transactions. If any are found, // our accepted state updates for any breached transactions. If any are found,
// we will attempt to decrypt the state updates' encrypted blobs and exact // we will attempt to decrypt the state updates' encrypted blobs and exact
@ -118,11 +181,10 @@ func (l *Lookout) watchBlocks(epochs *chainntnfs.BlockEpochEvent) {
log.Debugf("Fetching block for (height=%d, hash=%s)", log.Debugf("Fetching block for (height=%d, hash=%s)",
epoch.Height, epoch.Hash) epoch.Height, epoch.Hash)
// Fetch the full block from the backend corresponding // Fetch the full block corresponding to the newly
// to the newly arriving epoch. // arriving epoch from the backend.
block, err := l.cfg.BlockFetcher.GetBlock(epoch.Hash) block, err := l.fetchBlockWithRetries(epoch.Hash)
if err != nil { if err != nil {
// TODO(conner): add retry logic?
log.Errorf("Unable to fetch block for "+ log.Errorf("Unable to fetch block for "+
"(height=%x, hash=%s): %v", "(height=%x, hash=%s): %v",
epoch.Height, epoch.Hash, err) epoch.Height, epoch.Hash, err)

View File

@ -90,6 +90,9 @@ func TestLookoutBreachMatching(t *testing.T) {
DB: db, DB: db,
EpochRegistrar: backend, EpochRegistrar: backend,
Punisher: punisher, Punisher: punisher,
MinBackoff: time.Second,
MaxBackoff: time.Minute,
MaxNumRetries: 1,
}) })
if err := watcher.Start(); err != nil { if err := watcher.Start(); err != nil {
t.Fatalf("unable to start watcher: %v", err) t.Fatalf("unable to start watcher: %v", err)

View File

@ -3,6 +3,7 @@ package watchtower
import ( import (
"net" "net"
"sync/atomic" "sync/atomic"
"time"
"github.com/btcsuite/btcd/btcec/v2" "github.com/btcsuite/btcd/btcec/v2"
"github.com/lightningnetwork/lnd/brontide" "github.com/lightningnetwork/lnd/brontide"
@ -65,6 +66,9 @@ func New(cfg *Config) (*Standalone, error) {
DB: cfg.DB, DB: cfg.DB,
EpochRegistrar: cfg.EpochRegistrar, EpochRegistrar: cfg.EpochRegistrar,
Punisher: punisher, Punisher: punisher,
MinBackoff: time.Second,
MaxBackoff: time.Minute,
MaxNumRetries: 5,
}) })
// Create a brontide listener on each of the provided listening // Create a brontide listener on each of the provided listening