lnd/watchtower/wtclient/manager.go

package wtclient

import (
	"errors"
	"fmt"
	"net"
	"sync"
	"time"

	"github.com/btcsuite/btcd/btcec/v2"
	"github.com/btcsuite/btcd/chaincfg/chainhash"
	"github.com/lightningnetwork/lnd/chainntnfs"
	"github.com/lightningnetwork/lnd/channeldb"
	"github.com/lightningnetwork/lnd/channelnotifier"
	"github.com/lightningnetwork/lnd/fn"
	"github.com/lightningnetwork/lnd/input"
	"github.com/lightningnetwork/lnd/lnwire"
	"github.com/lightningnetwork/lnd/subscribe"
	"github.com/lightningnetwork/lnd/tor"
	"github.com/lightningnetwork/lnd/watchtower/blob"
	"github.com/lightningnetwork/lnd/watchtower/wtdb"
	"github.com/lightningnetwork/lnd/watchtower/wtpolicy"
)

// ClientManager is the primary interface used by the daemon to control a
// client's lifecycle and backup revoked states.
type ClientManager interface {
	// AddTower adds a new watchtower reachable at the given address and
	// considers it for new sessions. If the watchtower already exists, then
	// any new addresses included will be considered when dialing it for
	// session negotiations and backups.
	AddTower(*lnwire.NetAddress) error

	// RemoveTower removes a watchtower from being considered for future
	// session negotiations and from being used for any subsequent backups
	// until it's added again. If an address is provided, then this call
	// only serves as a way of removing the address from the watchtower
	// instead.
	RemoveTower(*btcec.PublicKey, net.Addr) error

	// Stats returns the in-memory statistics of the client since startup.
	Stats() ClientStats

	// Policy returns the active client policy configuration.
	Policy(blob.Type) (wtpolicy.Policy, error)

	// RegisteredTowers retrieves the list of watchtowers registered with
	// the client. It returns a set of registered towers per client policy
	// type.
	RegisteredTowers(opts ...wtdb.ClientSessionListOption) (
		map[blob.Type][]*RegisteredTower, error)

	// LookupTower retrieves a registered watchtower through its public key.
	LookupTower(*btcec.PublicKey, ...wtdb.ClientSessionListOption) (
		map[blob.Type]*RegisteredTower, error)

	// RegisterChannel persistently initializes any channel-dependent
	// parameters within the client. This should be called during link
	// startup to ensure that the client is able to support the link during
	// operation.
	RegisterChannel(lnwire.ChannelID, channeldb.ChannelType) error

	// BackupState initiates a request to back up a particular revoked
	// state. If the method returns nil, the backup is guaranteed to be
	// successful unless the justice transaction would create dust outputs
	// when trying to abide by the negotiated policy.
	BackupState(chanID *lnwire.ChannelID, stateNum uint64) error
}

// Config provides the client with access to the resources it requires to
// perform its duty. All nillable fields must be non-nil for the tower to be
// initialized properly.
type Config struct {
	// Signer provides access to the wallet so that the client can sign
	// justice transactions that spend from a remote party's commitment
	// transaction.
	Signer input.Signer

	// SubscribeChannelEvents can be used to subscribe to channel event
	// notifications.
	SubscribeChannelEvents func() (subscribe.Subscription, error)

	// FetchClosedChannel can be used to fetch the info about a closed
	// channel. If the channel is not found or not yet closed then
	// channeldb.ErrClosedChannelNotFound will be returned.
	FetchClosedChannel func(cid lnwire.ChannelID) (
		*channeldb.ChannelCloseSummary, error)

	// ChainNotifier can be used to subscribe to block notifications.
	ChainNotifier chainntnfs.ChainNotifier

	// BuildBreachRetribution is a function closure that allows the client
	// fetch the breach retribution info for a certain channel at a certain
	// revoked commitment height.
	BuildBreachRetribution BreachRetributionBuilder

	// NewAddress generates a new on-chain sweep pkscript.
	NewAddress func() ([]byte, error)

	// SecretKeyRing is used to derive the session keys used to communicate
	// with the tower. The client only stores the KeyLocators internally so
	// that we never store private keys on disk.
	SecretKeyRing ECDHKeyRing

	// Dial connects to an addr using the specified net and returns the
	// connection object.
	Dial tor.DialFunc

	// AuthDialer establishes a brontide connection over an onion or clear
	// network.
	AuthDial AuthDialer

	// DB provides access to the client's stable storage medium.
	DB DB

	// ChainHash identifies the chain that the client is on and for which
	// the tower must be watching to monitor for breaches.
	ChainHash chainhash.Hash

	// ReadTimeout is the duration we will wait during a read before
	// breaking out of a blocking read. If the value is less than or equal
	// to zero, the default will be used instead.
	ReadTimeout time.Duration

	// WriteTimeout is the duration we will wait during a write before
	// breaking out of a blocking write. If the value is less than or equal
	// to zero, the default will be used instead.
	WriteTimeout time.Duration

	// MinBackoff defines the initial backoff applied to connections with
	// watchtowers. Subsequent backoff durations will grow exponentially up
	// until MaxBackoff.
	MinBackoff time.Duration

	// MaxBackoff defines the maximum backoff applied to connections with
	// watchtowers. If the exponential backoff produces a timeout greater
	// than this value, the backoff will be clamped to MaxBackoff.
	MaxBackoff time.Duration

	// SessionCloseRange is the range over which we will generate a random
	// number of blocks to delay closing a session after its last channel
	// has been closed.
	SessionCloseRange uint32

	// MaxTasksInMemQueue is the maximum number of backup tasks that should
	// be kept in-memory. Any more tasks will overflow to disk.
	MaxTasksInMemQueue uint64
}

// Manager manages the various tower clients that are active. A client is
// required for each different commitment transaction type. The Manager acts as
// a tower client multiplexer.
type Manager struct {
	started sync.Once
	stopped sync.Once

	cfg *Config

	clients   map[blob.Type]*client
	clientsMu sync.Mutex

	backupMu     sync.Mutex
	chanInfos    wtdb.ChannelInfos
	chanBlobType map[lnwire.ChannelID]blob.Type

	closableSessionQueue *sessionCloseMinHeap

	wg   sync.WaitGroup
	quit chan struct{}
}

var _ ClientManager = (*Manager)(nil)

// NewManager constructs a new Manager.
func NewManager(config *Config, policies ...wtpolicy.Policy) (*Manager, error) {
	// Copy the config to prevent side effects from modifying both the
	// internal and external version of the Config.
	cfg := *config

	// Set the read timeout to the default if none was provided.
	if cfg.ReadTimeout <= 0 {
		cfg.ReadTimeout = DefaultReadTimeout
	}

	// Set the write timeout to the default if none was provided.
	if cfg.WriteTimeout <= 0 {
		cfg.WriteTimeout = DefaultWriteTimeout
	}

	chanInfos, err := cfg.DB.FetchChanInfos()
	if err != nil {
		return nil, err
	}

	m := &Manager{
		cfg:                  &cfg,
		clients:              make(map[blob.Type]*client),
		chanBlobType:         make(map[lnwire.ChannelID]blob.Type),
		chanInfos:            chanInfos,
		closableSessionQueue: newSessionCloseMinHeap(),
		quit:                 make(chan struct{}),
	}

	for _, policy := range policies {
		if err = policy.Validate(); err != nil {
			return nil, err
		}

		if err = m.newClient(policy); err != nil {
			return nil, err
		}
	}

	return m, nil
}

// newClient constructs a new client and adds it to the set of clients that
// the Manager is keeping track of.
func (m *Manager) newClient(policy wtpolicy.Policy) error {
	m.clientsMu.Lock()
	defer m.clientsMu.Unlock()

	_, ok := m.clients[policy.BlobType]
	if ok {
		return fmt.Errorf("a client with blob type %s has "+
			"already been registered", policy.BlobType)
	}

	cfg := &clientCfg{
		Config:         m.cfg,
		Policy:         policy,
		getSweepScript: m.getSweepScript,
	}

	client, err := newClient(cfg)
	if err != nil {
		return err
	}

	m.clients[policy.BlobType] = client

	return nil
}

// Start starts all the clients that have been registered with the Manager.
func (m *Manager) Start() error {
	var returnErr error
	m.started.Do(func() {
		chanSub, err := m.cfg.SubscribeChannelEvents()
		if err != nil {
			returnErr = err

			return
		}

		// Iterate over the list of registered channels and check if any
		// of them can be marked as closed.
		for id := range m.chanInfos {
			isClosed, closedHeight, err := m.isChannelClosed(id)
			if err != nil {
				returnErr = err

				return
			}

			if !isClosed {
				continue
			}

			_, err = m.cfg.DB.MarkChannelClosed(id, closedHeight)
			if err != nil {
				log.Errorf("could not mark channel(%s) as "+
					"closed: %v", id, err)

				continue
			}

			// Since the channel has been marked as closed, we can
			// also remove it from the channel summaries map.
			delete(m.chanInfos, id)
		}

		// Load all closable sessions.
		closableSessions, err := m.cfg.DB.ListClosableSessions()
		if err != nil {
			returnErr = err

			return
		}

		err = m.trackClosableSessions(closableSessions)
		if err != nil {
			returnErr = err

			return
		}

		m.wg.Add(1)
		go m.handleChannelCloses(chanSub)

		// Subscribe to new block events.
		blockEvents, err := m.cfg.ChainNotifier.RegisterBlockEpochNtfn(
			nil,
		)
		if err != nil {
			returnErr = err

			return
		}

		m.wg.Add(1)
		go m.handleClosableSessions(blockEvents)

		m.clientsMu.Lock()
		defer m.clientsMu.Unlock()

		for _, client := range m.clients {
			if err := client.start(); err != nil {
				returnErr = err
				return
			}
		}
	})

	return returnErr
}

// Stop stops all the clients that the Manger is managing.
func (m *Manager) Stop() error {
	var returnErr error
	m.stopped.Do(func() {
		m.clientsMu.Lock()
		defer m.clientsMu.Unlock()

		close(m.quit)
		m.wg.Wait()

		for _, client := range m.clients {
			if err := client.stop(); err != nil {
				returnErr = err
			}
		}
	})

	return returnErr
}

// AddTower adds a new watchtower reachable at the given address and considers
// it for new sessions. If the watchtower already exists, then any new addresses
// included will be considered when dialing it for session negotiations and
// backups.
func (m *Manager) AddTower(address *lnwire.NetAddress) error {
	// We'll start by updating our persisted state, followed by the
	// in-memory state of each client, with the new tower. This might not
	// actually be a new tower, but it might include a new address at which
	// it can be reached.
	dbTower, err := m.cfg.DB.CreateTower(address)
	if err != nil {
		return err
	}

	tower, err := NewTowerFromDBTower(dbTower)
	if err != nil {
		return err
	}

	m.clientsMu.Lock()
	defer m.clientsMu.Unlock()

	for blobType, client := range m.clients {
		clientType, err := blobType.Identifier()
		if err != nil {
			return err
		}

		if err := client.addTower(tower); err != nil {
			return fmt.Errorf("could not add tower(%x) to the %s "+
				"tower client: %w",
				tower.IdentityKey.SerializeCompressed(),
				clientType, err)
		}
	}

	return nil
}

// RemoveTower removes a watchtower from being considered for future session
// negotiations and from being used for any subsequent backups until it's added
// again. If an address is provided, then this call only serves as a way of
// removing the address from the watchtower instead.
func (m *Manager) RemoveTower(key *btcec.PublicKey, addr net.Addr) error {
	// We'll load the tower before potentially removing it in order to
	// retrieve its ID within the database.
	dbTower, err := m.cfg.DB.LoadTower(key)
	if err != nil {
		return err
	}

	m.clientsMu.Lock()
	defer m.clientsMu.Unlock()

	for _, client := range m.clients {
		err := client.removeTower(dbTower.ID, key, addr)
		if err != nil {
			return err
		}
	}

	if err := m.cfg.DB.RemoveTower(key, addr); err != nil {
		// If the persisted state update fails, re-add the address to
		// our client's in-memory state.
		tower, newTowerErr := NewTowerFromDBTower(dbTower)
		if newTowerErr != nil {
			log.Errorf("could not create new in-memory tower: %v",
				newTowerErr)

			return err
		}

		for _, client := range m.clients {
			addTowerErr := client.addTower(tower)
			if addTowerErr != nil {
				log.Errorf("could not re-add tower: %v",
					addTowerErr)
			}
		}

		return err
	}

	return nil
}

// Stats returns the in-memory statistics of the clients managed by the Manager
// since startup.
func (m *Manager) Stats() ClientStats {
	m.clientsMu.Lock()
	defer m.clientsMu.Unlock()

	var resp ClientStats
	for _, client := range m.clients {
		stats := client.getStats()
		resp.NumTasksAccepted += stats.NumTasksAccepted
		resp.NumTasksIneligible += stats.NumTasksIneligible
		resp.NumTasksPending += stats.NumTasksPending
		resp.NumSessionsAcquired += stats.NumSessionsAcquired
		resp.NumSessionsExhausted += stats.NumSessionsExhausted
	}

	return resp
}

// RegisteredTowers retrieves the list of watchtowers being used by the various
// clients.
func (m *Manager) RegisteredTowers(opts ...wtdb.ClientSessionListOption) (
	map[blob.Type][]*RegisteredTower, error) {

	towers, err := m.cfg.DB.ListTowers()
	if err != nil {
		return nil, err
	}

	m.clientsMu.Lock()
	defer m.clientsMu.Unlock()

	resp := make(map[blob.Type][]*RegisteredTower)
	for _, client := range m.clients {
		towers, err := client.registeredTowers(towers, opts...)
		if err != nil {
			return nil, err
		}

		resp[client.policy().BlobType] = towers
	}

	return resp, nil
}

// LookupTower retrieves a registered watchtower through its public key.
func (m *Manager) LookupTower(key *btcec.PublicKey,
	opts ...wtdb.ClientSessionListOption) (map[blob.Type]*RegisteredTower,
	error) {

	tower, err := m.cfg.DB.LoadTower(key)
	if err != nil {
		return nil, err
	}

	m.clientsMu.Lock()
	defer m.clientsMu.Unlock()

	resp := make(map[blob.Type]*RegisteredTower)
	for _, client := range m.clients {
		tower, err := client.lookupTower(tower, opts...)
		if err != nil {
			return nil, err
		}

		resp[client.policy().BlobType] = tower
	}

	return resp, nil
}

// Policy returns the active client policy configuration for the client using
// the given blob type.
func (m *Manager) Policy(blobType blob.Type) (wtpolicy.Policy, error) {
	m.clientsMu.Lock()
	defer m.clientsMu.Unlock()

	var policy wtpolicy.Policy
	client, ok := m.clients[blobType]
	if !ok {
		return policy, fmt.Errorf("no client for the given blob type")
	}

	return client.policy(), nil
}

// RegisterChannel persistently initializes any channel-dependent parameters
// within the client. This should be called during link startup to ensure that
// the client is able to support the link during operation.
func (m *Manager) RegisterChannel(id lnwire.ChannelID,
	chanType channeldb.ChannelType) error {

	blobType := blob.TypeFromChannel(chanType)

	m.clientsMu.Lock()
	if _, ok := m.clients[blobType]; !ok {
		m.clientsMu.Unlock()

		return fmt.Errorf("no client registered for blob type %s",
			blobType)
	}
	m.clientsMu.Unlock()

	m.backupMu.Lock()
	defer m.backupMu.Unlock()

	// If a pkscript for this channel already exists, the channel has been
	// previously registered.
	if _, ok := m.chanInfos[id]; ok {
		// Keep track of which blob type this channel will use for
		// updates.
		m.chanBlobType[id] = blobType

		return nil
	}

	// Otherwise, generate a new sweep pkscript used to sweep funds for this
	// channel.
	pkScript, err := m.cfg.NewAddress()
	if err != nil {
		return err
	}

	// Persist the sweep pkscript so that restarts will not introduce
	// address inflation when the channel is reregistered after a restart.
	err = m.cfg.DB.RegisterChannel(id, pkScript)
	if err != nil {
		return err
	}

	// Finally, cache the pkscript in our in-memory cache to avoid db
	// lookups for the remainder of the daemon's execution.
	m.chanInfos[id] = &wtdb.ChannelInfo{
		ClientChanSummary: wtdb.ClientChanSummary{
			SweepPkScript: pkScript,
		},
	}

	// Keep track of which blob type this channel will use for updates.
	m.chanBlobType[id] = blobType

	return nil
}

// BackupState initiates a request to back up a particular revoked state. If the
// method returns nil, the backup is guaranteed to be successful unless the
// justice transaction would create dust outputs when trying to abide by the
// negotiated policy.
func (m *Manager) BackupState(chanID *lnwire.ChannelID, stateNum uint64) error {
	select {
	case <-m.quit:
		return ErrClientExiting
	default:
	}

	// Make sure that this channel is registered with the tower client.
	m.backupMu.Lock()
	info, ok := m.chanInfos[*chanID]
	if !ok {
		m.backupMu.Unlock()

		return ErrUnregisteredChannel
	}

	// Ignore backups that have already been presented to the client.
	var duplicate bool
	info.MaxHeight.WhenSome(func(maxHeight uint64) {
		if stateNum <= maxHeight {
			duplicate = true
		}
	})
	if duplicate {
		m.backupMu.Unlock()

		log.Debugf("Ignoring duplicate backup for chanid=%v at "+
			"height=%d", chanID, stateNum)

		return nil
	}

	// This backup has a higher commit height than any known backup for this
	// channel. We'll update our tip so that we won't accept it again if the
	// link flaps.
	m.chanInfos[*chanID].MaxHeight = fn.Some(stateNum)

	blobType, ok := m.chanBlobType[*chanID]
	if !ok {
		m.backupMu.Unlock()

		return ErrUnregisteredChannel
	}
	m.backupMu.Unlock()

	m.clientsMu.Lock()
	client, ok := m.clients[blobType]
	if !ok {
		m.clientsMu.Unlock()

		return fmt.Errorf("no client registered for blob type %s",
			blobType)
	}
	m.clientsMu.Unlock()

	return client.backupState(chanID, stateNum)
}

// isChanClosed can be used to check if the channel with the given ID has been
// closed. If it has been, the block height in which its closing transaction was
// mined will also be returned.
func (m *Manager) isChannelClosed(id lnwire.ChannelID) (bool, uint32,
	error) {

	chanSum, err := m.cfg.FetchClosedChannel(id)
	if errors.Is(err, channeldb.ErrClosedChannelNotFound) {
		return false, 0, nil
	} else if err != nil {
		return false, 0, err
	}

	return true, chanSum.CloseHeight, nil
}

// trackClosableSessions takes in a map of session IDs to the earliest block
// height at which the session should be deleted. For each of the sessions,
// a random delay is added to the block height and the session is added to the
// closableSessionQueue.
func (m *Manager) trackClosableSessions(
	sessions map[wtdb.SessionID]uint32) error {

	// For each closable session, add a random delay to its close
	// height and add it to the closableSessionQueue.
	for sID, blockHeight := range sessions {
		delay, err := newRandomDelay(m.cfg.SessionCloseRange)
		if err != nil {
			return err
		}

		deleteHeight := blockHeight + delay

		m.closableSessionQueue.Push(&sessionCloseItem{
			sessionID:    sID,
			deleteHeight: deleteHeight,
		})
	}

	return nil
}

// handleChannelCloses listens for channel close events and marks channels as
// closed in the DB.
//
// NOTE: This method MUST be run as a goroutine.
func (m *Manager) handleChannelCloses(chanSub subscribe.Subscription) {
	defer m.wg.Done()

	log.Debugf("Starting channel close handler")
	defer log.Debugf("Stopping channel close handler")

	for {
		select {
		case update, ok := <-chanSub.Updates():
			if !ok {
				log.Debugf("Channel notifier has exited")
				return
			}

			// We only care about channel-close events.
			event, ok := update.(channelnotifier.ClosedChannelEvent)
			if !ok {
				continue
			}

			chanID := lnwire.NewChanIDFromOutPoint(
				&event.CloseSummary.ChanPoint,
			)

			log.Debugf("Received ClosedChannelEvent for "+
				"channel: %s", chanID)

			err := m.handleClosedChannel(
				chanID, event.CloseSummary.CloseHeight,
			)
			if err != nil {
				log.Errorf("Could not handle channel close "+
					"event for channel(%s): %v", chanID,
					err)
			}

		case <-m.quit:
			return
		}
	}
}

// handleClosedChannel handles the closure of a single channel. It will mark the
// channel as closed in the DB, then it will handle all the sessions that are
// now closable due to the channel closure.
func (m *Manager) handleClosedChannel(chanID lnwire.ChannelID,
	closeHeight uint32) error {

	m.backupMu.Lock()
	defer m.backupMu.Unlock()

	// We only care about channels registered with the tower client.
	if _, ok := m.chanInfos[chanID]; !ok {
		return nil
	}

	log.Debugf("Marking channel(%s) as closed", chanID)

	sessions, err := m.cfg.DB.MarkChannelClosed(chanID, closeHeight)
	if err != nil {
		return fmt.Errorf("could not mark channel(%s) as closed: %w",
			chanID, err)
	}

	closableSessions := make(map[wtdb.SessionID]uint32, len(sessions))
	for _, sess := range sessions {
		closableSessions[sess] = closeHeight
	}

	log.Debugf("Tracking %d new closable sessions as a result of "+
		"closing channel %s", len(closableSessions), chanID)

	err = m.trackClosableSessions(closableSessions)
	if err != nil {
		return fmt.Errorf("could not track closable sessions: %w", err)
	}

	delete(m.chanInfos, chanID)

	return nil
}

// handleClosableSessions listens for new block notifications. For each block,
// it checks the closableSessionQueue to see if there is a closable session with
// a delete-height smaller than or equal to the new block, if there is then the
// tower is informed that it can delete the session, and then we also delete it
// from our DB.
func (m *Manager) handleClosableSessions(
	blocksChan *chainntnfs.BlockEpochEvent) {

	defer m.wg.Done()

	log.Debug("Starting closable sessions handler")
	defer log.Debug("Stopping closable sessions handler")

	for {
		select {
		case newBlock := <-blocksChan.Epochs:
			if newBlock == nil {
				return
			}

			height := uint32(newBlock.Height)
			for {
				select {
				case <-m.quit:
					return
				default:
				}

				// If there are no closable sessions that we
				// need to handle, then we are done and can
				// reevaluate when the next block comes.
				item := m.closableSessionQueue.Top()
				if item == nil {
					break
				}

				// If there is closable session but the delete
				// height we have set for it is after the
				// current block height, then our work is done.
				if item.deleteHeight > height {
					break
				}

				// Otherwise, we pop this item from the heap
				// and handle it.
				m.closableSessionQueue.Pop()

				// Fetch the session from the DB so that we can
				// extract the Tower info.
				sess, err := m.cfg.DB.GetClientSession(
					item.sessionID,
				)
				if err != nil {
					log.Errorf("error calling "+
						"GetClientSession for "+
						"session %s: %v",
						item.sessionID, err)

					continue
				}

				// get appropriate client.
				m.clientsMu.Lock()
				client, ok := m.clients[sess.Policy.BlobType]
				if !ok {
					m.clientsMu.Unlock()
					log.Errorf("no client currently " +
						"active for the session type")

					continue
				}
				m.clientsMu.Unlock()

				clientName, err := client.policy().BlobType.
					Identifier()
				if err != nil {
					log.Errorf("could not get client "+
						"identifier: %v", err)

					continue
				}

				// Stop the session and remove it from the
				// in-memory set.
				err = client.stopAndRemoveSession(
					item.sessionID,
				)
				if err != nil {
					log.Errorf("could not remove "+
						"session(%s) from in-memory "+
						"set of the %s client: %v",
						item.sessionID, clientName, err)

					continue
				}

				err = client.deleteSessionFromTower(sess)
				if err != nil {
					log.Errorf("error deleting "+
						"session %s from tower: %v",
						sess.ID, err)

					continue
				}

				err = m.cfg.DB.DeleteSession(item.sessionID)
				if err != nil {
					log.Errorf("could not delete "+
						"session(%s) from DB: %w",
						sess.ID, err)

					continue
				}
			}

		case <-m.quit:
			return
		}
	}
}

func (m *Manager) getSweepScript(id lnwire.ChannelID) ([]byte, bool) {
	m.backupMu.Lock()
	defer m.backupMu.Unlock()

	summary, ok := m.chanInfos[id]
	if !ok {
		return nil, false
	}

	return summary.SweepPkScript, true
}