Merge pull request #2740 from wpaulino/gossip-sync-manager

discovery: introduce gossiper syncManager subsystem
This commit is contained in:
Olaoluwa Osuntokun 2019-04-03 15:46:12 -07:00 committed by GitHub
commit 30f2b1ca01
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 2834 additions and 1000 deletions

View File

@ -248,7 +248,7 @@ type config struct {
Color string `long:"color" description:"The color of the node in hex format (i.e. '#3399FF'). Used to customize node appearance in intelligence services"`
MinChanSize int64 `long:"minchansize" description:"The smallest channel size (in satoshis) that we should accept. Incoming channels smaller than this will be rejected"`
NoChanUpdates bool `long:"nochanupdates" description:"If specified, lnd will not request real-time channel updates from connected peers. This option should be used by routing nodes to save bandwidth."`
NumGraphSyncPeers int `long:"numgraphsyncpeers" description:"The number of peers that we should receive new graph updates from. This option can be tuned to save bandwidth for light clients or routing nodes."`
RejectPush bool `long:"rejectpush" description:"If true, lnd will not accept channel opening requests with non-zero push amounts. This should prevent accidental pushes to merchant nodes."`
@ -335,6 +335,7 @@ func loadConfig() (*config, error) {
Alias: defaultAlias,
Color: defaultColor,
MinChanSize: int64(minChanFundingSize),
NumGraphSyncPeers: defaultMinPeers,
Tor: &torConfig{
SOCKS: defaultTorSOCKS,
DNS: defaultTorDNS,

View File

@ -20,6 +20,7 @@ import (
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/multimutex"
"github.com/lightningnetwork/lnd/routing"
"github.com/lightningnetwork/lnd/ticker"
)
var (
@ -75,7 +76,7 @@ type Config struct {
Router routing.ChannelGraphSource
// ChanSeries is an interfaces that provides access to a time series
// view of the current known channel graph. Each gossipSyncer enabled
// view of the current known channel graph. Each GossipSyncer enabled
// peer will utilize this in order to create and respond to channel
// graph time series queries.
ChanSeries ChannelGraphTimeSeries
@ -143,6 +144,28 @@ type Config struct {
// TODO(roasbeef): extract ann crafting + sign from fundingMgr into
// here?
AnnSigner lnwallet.MessageSigner
// NumActiveSyncers is the number of peers for which we should have
// active syncers with. After reaching NumActiveSyncers, any future
// gossip syncers will be passive.
NumActiveSyncers int
// RotateTicker is a ticker responsible for notifying the SyncManager
// when it should rotate its active syncers. A single active syncer with
// a chansSynced state will be exchanged for a passive syncer in order
// to ensure we don't keep syncing with the same peers.
RotateTicker ticker.Ticker
// HistoricalSyncTicker is a ticker responsible for notifying the
// syncManager when it should attempt a historical sync with a gossip
// sync peer.
HistoricalSyncTicker ticker.Ticker
// ActiveSyncerTimeoutTicker is a ticker responsible for notifying the
// syncManager when it should attempt to start the next pending
// activeSyncer due to the current one not completing its state machine
// within the timeout.
ActiveSyncerTimeoutTicker ticker.Ticker
}
// AuthenticatedGossiper is a subsystem which is responsible for receiving
@ -212,13 +235,14 @@ type AuthenticatedGossiper struct {
rejectMtx sync.RWMutex
recentRejects map[uint64]struct{}
// peerSyncers keeps track of all the gossip syncers we're maintain for
// peers that understand this mode of operation. When we go to send out
// new updates, for all peers in the map, we'll send the messages
// directly to their gossiper, rather than broadcasting them. With this
// change, we ensure we filter out all updates properly.
syncerMtx sync.RWMutex
peerSyncers map[routing.Vertex]*gossipSyncer
// syncMgr is a subsystem responsible for managing the gossip syncers
// for peers currently connected. When a new peer is connected, the
// manager will create its accompanying gossip syncer and determine
// whether it should have an activeSync or passiveSync sync type based
// on how many other gossip syncers are currently active. Any activeSync
// gossip syncers are started in a round-robin manner to ensure we're
// not syncing with multiple peers at the same time.
syncMgr *SyncManager
// reliableSender is a subsystem responsible for handling reliable
// message send requests to peers. This should only be used for channels
@ -243,7 +267,14 @@ func New(cfg Config, selfKey *btcec.PublicKey) *AuthenticatedGossiper {
prematureChannelUpdates: make(map[uint64][]*networkMsg),
channelMtx: multimutex.NewMutex(),
recentRejects: make(map[uint64]struct{}),
peerSyncers: make(map[routing.Vertex]*gossipSyncer),
syncMgr: newSyncManager(&SyncManagerCfg{
ChainHash: cfg.ChainHash,
ChanSeries: cfg.ChanSeries,
RotateTicker: cfg.RotateTicker,
HistoricalSyncTicker: cfg.HistoricalSyncTicker,
ActiveSyncerTimeoutTicker: cfg.ActiveSyncerTimeoutTicker,
NumActiveSyncers: cfg.NumActiveSyncers,
}),
}
gossiper.reliableSender = newReliableSender(&reliableSenderCfg{
@ -419,6 +450,8 @@ func (d *AuthenticatedGossiper) Start() error {
return err
}
d.syncMgr.Start()
d.wg.Add(1)
go d.networkHandler()
@ -435,11 +468,7 @@ func (d *AuthenticatedGossiper) Stop() {
d.blockEpochs.Cancel()
d.syncerMtx.RLock()
for _, syncer := range d.peerSyncers {
syncer.Stop()
}
d.syncerMtx.RUnlock()
d.syncMgr.Stop()
close(d.quit)
d.wg.Wait()
@ -463,7 +492,7 @@ func (d *AuthenticatedGossiper) ProcessRemoteAnnouncement(msg lnwire.Message,
errChan := make(chan error, 1)
// For messages in the known set of channel series queries, we'll
// dispatch the message directly to the gossipSyncer, and skip the main
// dispatch the message directly to the GossipSyncer, and skip the main
// processing loop.
switch m := msg.(type) {
case *lnwire.QueryShortChanIDs,
@ -471,12 +500,12 @@ func (d *AuthenticatedGossiper) ProcessRemoteAnnouncement(msg lnwire.Message,
*lnwire.ReplyChannelRange,
*lnwire.ReplyShortChanIDsEnd:
syncer, err := d.findGossipSyncer(peer.IdentityKey())
if err != nil {
log.Warnf("Unable to find gossip syncer for "+
"peer=%x: %v", peer.PubKey(), err)
syncer, ok := d.syncMgr.GossipSyncer(peer.PubKey())
if !ok {
log.Warnf("Gossip syncer for peer=%x not found",
peer.PubKey())
errChan <- err
errChan <- ErrGossipSyncerNotFound
return errChan
}
@ -488,24 +517,22 @@ func (d *AuthenticatedGossiper) ProcessRemoteAnnouncement(msg lnwire.Message,
return errChan
// If a peer is updating its current update horizon, then we'll dispatch
// that directly to the proper gossipSyncer.
// that directly to the proper GossipSyncer.
case *lnwire.GossipTimestampRange:
syncer, err := d.findGossipSyncer(peer.IdentityKey())
if err != nil {
log.Warnf("Unable to find gossip syncer for "+
"peer=%x: %v", peer.PubKey(), err)
syncer, ok := d.syncMgr.GossipSyncer(peer.PubKey())
if !ok {
log.Warnf("Gossip syncer for peer=%x not found",
peer.PubKey())
errChan <- err
errChan <- ErrGossipSyncerNotFound
return errChan
}
// If we've found the message target, then we'll dispatch the
// message directly to it.
err = syncer.ApplyGossipFilter(m)
if err != nil {
log.Warnf("unable to apply gossip "+
"filter for peer=%x: %v",
peer.PubKey(), err)
if err := syncer.ApplyGossipFilter(m); err != nil {
log.Warnf("Unable to apply gossip filter for peer=%x: "+
"%v", peer.PubKey(), err)
errChan <- err
return errChan
@ -590,10 +617,10 @@ type msgWithSenders struct {
}
// mergeSyncerMap is used to merge the set of senders of a particular message
// with peers that we have an active gossipSyncer with. We do this to ensure
// with peers that we have an active GossipSyncer with. We do this to ensure
// that we don't broadcast messages to any peers that we have active gossip
// syncers for.
func (m *msgWithSenders) mergeSyncerMap(syncers map[routing.Vertex]*gossipSyncer) {
func (m *msgWithSenders) mergeSyncerMap(syncers map[routing.Vertex]*GossipSyncer) {
for peerPub := range syncers {
m.senders[peerPub] = struct{}{}
}
@ -812,28 +839,6 @@ func (d *deDupedAnnouncements) Emit() []msgWithSenders {
return msgs
}
// findGossipSyncer is a utility method used by the gossiper to locate the
// gossip syncer for an inbound message so we can properly dispatch the
// incoming message. If a gossip syncer isn't found, then one will be created
// for the target peer.
func (d *AuthenticatedGossiper) findGossipSyncer(pub *btcec.PublicKey) (
*gossipSyncer, error) {
target := routing.NewVertex(pub)
// First, we'll try to find an existing gossiper for this peer.
d.syncerMtx.RLock()
syncer, ok := d.peerSyncers[target]
d.syncerMtx.RUnlock()
// If one exists, then we'll return it directly.
if ok {
return syncer, nil
}
return nil, ErrGossipSyncerNotFound
}
// networkHandler is the primary goroutine that drives this service. The roles
// of this goroutine includes answering queries related to the state of the
// network, syncing up newly connected peers, and also periodically
@ -1028,12 +1033,7 @@ func (d *AuthenticatedGossiper) networkHandler() {
// For the set of peers that have an active gossip
// syncers, we'll collect their pubkeys so we can avoid
// sending them the full message blast below.
d.syncerMtx.RLock()
syncerPeers := make(map[routing.Vertex]*gossipSyncer)
for peerPub, syncer := range d.peerSyncers {
syncerPeers[peerPub] = syncer
}
d.syncerMtx.RUnlock()
syncerPeers := d.syncMgr.GossipSyncers()
log.Infof("Broadcasting batch of %v new announcements",
len(announcementBatch))
@ -1088,62 +1088,16 @@ func (d *AuthenticatedGossiper) networkHandler() {
// InitSyncState is called by outside sub-systems when a connection is
// established to a new peer that understands how to perform channel range
// queries. We'll allocate a new gossip syncer for it, and start any goroutines
// needed to handle new queries. The recvUpdates bool indicates if we should
// continue to receive real-time updates from the remote peer once we've synced
// channel state.
func (d *AuthenticatedGossiper) InitSyncState(syncPeer lnpeer.Peer,
recvUpdates bool) {
d.syncerMtx.Lock()
defer d.syncerMtx.Unlock()
// If we already have a syncer, then we'll exit early as we don't want
// to override it.
nodeID := routing.Vertex(syncPeer.PubKey())
if _, ok := d.peerSyncers[nodeID]; ok {
return
}
log.Infof("Creating new gossipSyncer for peer=%x", nodeID[:])
encoding := lnwire.EncodingSortedPlain
syncer := newGossiperSyncer(gossipSyncerCfg{
chainHash: d.cfg.ChainHash,
syncChanUpdates: recvUpdates,
channelSeries: d.cfg.ChanSeries,
encodingType: encoding,
chunkSize: encodingTypeToChunkSize[encoding],
sendToPeer: func(msgs ...lnwire.Message) error {
return syncPeer.SendMessageLazy(false, msgs...)
},
})
copy(syncer.peerPub[:], nodeID[:])
d.peerSyncers[nodeID] = syncer
syncer.Start()
// needed to handle new queries.
func (d *AuthenticatedGossiper) InitSyncState(syncPeer lnpeer.Peer) {
d.syncMgr.InitSyncState(syncPeer)
}
// PruneSyncState is called by outside sub-systems once a peer that we were
// previously connected to has been disconnected. In this case we can stop the
// existing gossipSyncer assigned to the peer and free up resources.
func (d *AuthenticatedGossiper) PruneSyncState(peer *btcec.PublicKey) {
d.syncerMtx.Lock()
defer d.syncerMtx.Unlock()
log.Infof("Removing gossipSyncer for peer=%x",
peer.SerializeCompressed())
vertex := routing.NewVertex(peer)
syncer, ok := d.peerSyncers[vertex]
if !ok {
return
}
syncer.Stop()
delete(d.peerSyncers, vertex)
return
// existing GossipSyncer assigned to the peer and free up resources.
func (d *AuthenticatedGossiper) PruneSyncState(peer routing.Vertex) {
d.syncMgr.PruneSyncState(peer)
}
// isRecentlyRejectedMsg returns true if we recently rejected a message, and
@ -2514,3 +2468,8 @@ func (d *AuthenticatedGossiper) updateChannel(info *channeldb.ChannelEdgeInfo,
return chanAnn, chanUpdate, err
}
// SyncManager returns the gossiper's SyncManager instance.
func (d *AuthenticatedGossiper) SyncManager() *SyncManager {
return d.syncMgr
}

View File

@ -27,6 +27,7 @@ import (
"github.com/lightningnetwork/lnd/lntest"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/routing"
"github.com/lightningnetwork/lnd/ticker"
)
var (
@ -713,12 +714,16 @@ func createTestCtx(startHeight uint32) (*testCtx, func(), error) {
c := make(chan struct{})
return c
},
Router: router,
TrickleDelay: trickleDelay,
RetransmitDelay: retransmitDelay,
ProofMatureDelta: proofMatureDelta,
WaitingProofStore: waitingProofStore,
MessageStore: newMockMessageStore(),
Router: router,
TrickleDelay: trickleDelay,
RetransmitDelay: retransmitDelay,
ProofMatureDelta: proofMatureDelta,
WaitingProofStore: waitingProofStore,
MessageStore: newMockMessageStore(),
RotateTicker: ticker.NewForce(DefaultSyncerRotationInterval),
HistoricalSyncTicker: ticker.NewForce(DefaultHistoricalSyncInterval),
ActiveSyncerTimeoutTicker: ticker.NewForce(DefaultActiveSyncerTimeout),
NumActiveSyncers: 3,
}, nodeKeyPub1)
if err := gossiper.Start(); err != nil {
@ -1447,16 +1452,20 @@ func TestSignatureAnnouncementRetryAtStartup(t *testing.T) {
// the message to the peer.
ctx.gossiper.Stop()
gossiper := New(Config{
Notifier: ctx.gossiper.cfg.Notifier,
Broadcast: ctx.gossiper.cfg.Broadcast,
NotifyWhenOnline: ctx.gossiper.reliableSender.cfg.NotifyWhenOnline,
NotifyWhenOffline: ctx.gossiper.reliableSender.cfg.NotifyWhenOffline,
Router: ctx.gossiper.cfg.Router,
TrickleDelay: trickleDelay,
RetransmitDelay: retransmitDelay,
ProofMatureDelta: proofMatureDelta,
WaitingProofStore: ctx.gossiper.cfg.WaitingProofStore,
MessageStore: ctx.gossiper.cfg.MessageStore,
Notifier: ctx.gossiper.cfg.Notifier,
Broadcast: ctx.gossiper.cfg.Broadcast,
NotifyWhenOnline: ctx.gossiper.reliableSender.cfg.NotifyWhenOnline,
NotifyWhenOffline: ctx.gossiper.reliableSender.cfg.NotifyWhenOffline,
Router: ctx.gossiper.cfg.Router,
TrickleDelay: trickleDelay,
RetransmitDelay: retransmitDelay,
ProofMatureDelta: proofMatureDelta,
WaitingProofStore: ctx.gossiper.cfg.WaitingProofStore,
MessageStore: ctx.gossiper.cfg.MessageStore,
RotateTicker: ticker.NewForce(DefaultSyncerRotationInterval),
HistoricalSyncTicker: ticker.NewForce(DefaultHistoricalSyncInterval),
ActiveSyncerTimeoutTicker: ticker.NewForce(DefaultActiveSyncerTimeout),
NumActiveSyncers: 3,
}, ctx.gossiper.selfKey)
if err != nil {
t.Fatalf("unable to recreate gossiper: %v", err)

View File

@ -8,6 +8,7 @@ import (
"github.com/btcsuite/btcd/btcec"
"github.com/davecgh/go-spew/spew"
"github.com/lightningnetwork/lnd/lnpeer"
"github.com/lightningnetwork/lnd/lntest"
"github.com/lightningnetwork/lnd/lnwire"
)
@ -59,29 +60,6 @@ func assertMsgsSent(t *testing.T, msgChan chan lnwire.Message,
}
}
// waitPredicate is a helper test function that will wait for a timeout period
// of time until the passed predicate returns true.
func waitPredicate(t *testing.T, timeout time.Duration, pred func() bool) {
t.Helper()
const pollInterval = 20 * time.Millisecond
exitTimer := time.After(timeout)
for {
<-time.After(pollInterval)
select {
case <-exitTimer:
t.Fatalf("predicate not satisfied after timeout")
default:
}
if pred() {
return
}
}
}
// TestReliableSenderFlow ensures that the flow for sending messages reliably to
// a peer while taking into account its connection lifecycle works as expected.
func TestReliableSenderFlow(t *testing.T) {
@ -262,27 +240,23 @@ func TestReliableSenderStaleMessages(t *testing.T) {
// message store since it is seen as stale and has been sent at least
// once. Once the message is removed, the peerHandler should be torn
// down as there are no longer any pending messages within the store.
var predErr error
waitPredicate(t, time.Second, func() bool {
err := lntest.WaitNoError(func() error {
msgs, err := reliableSender.cfg.MessageStore.MessagesForPeer(
peerPubKey,
)
if err != nil {
predErr = fmt.Errorf("unable to retrieve messages for "+
return fmt.Errorf("unable to retrieve messages for "+
"peer: %v", err)
return false
}
if len(msgs) != 0 {
predErr = fmt.Errorf("expected to not find any "+
return fmt.Errorf("expected to not find any "+
"messages for peer, found %d", len(msgs))
return false
}
predErr = nil
return true
})
if predErr != nil {
t.Fatal(predErr)
return nil
}, time.Second)
if err != nil {
t.Fatal(err)
}
// Override IsMsgStale to no longer mark messages as stale.

735
discovery/sync_manager.go Normal file
View File

@ -0,0 +1,735 @@
package discovery
import (
"container/list"
"errors"
"sync"
"time"
"github.com/btcsuite/btcd/chaincfg/chainhash"
"github.com/lightningnetwork/lnd/lnpeer"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/routing"
"github.com/lightningnetwork/lnd/ticker"
)
const (
// DefaultSyncerRotationInterval is the default interval in which we'll
// rotate a single active syncer.
DefaultSyncerRotationInterval = 20 * time.Minute
// DefaultHistoricalSyncInterval is the default interval in which we'll
// force a historical sync to ensure we have as much of the public
// network as possible.
DefaultHistoricalSyncInterval = 20 * time.Minute
// DefaultActiveSyncerTimeout is the default timeout interval in which
// we'll wait until an active syncer has completed its state machine and
// reached its final chansSynced state.
DefaultActiveSyncerTimeout = 5 * time.Minute
)
var (
// ErrSyncManagerExiting is an error returned when we attempt to
// start/stop a gossip syncer for a connected/disconnected peer, but the
// SyncManager has already been stopped.
ErrSyncManagerExiting = errors.New("sync manager exiting")
)
// staleActiveSyncer is an internal message the SyncManager will use in order to
// handle a peer corresponding to an active syncer being disconnected.
type staleActiveSyncer struct {
// syncer is the active syncer to be removed.
syncer *GossipSyncer
// transitioned, if true, signals that the active GossipSyncer is stale
// due to being transitioned to a PassiveSync state.
transitioned bool
// done serves as a signal to the caller that the SyncManager's internal
// state correctly reflects the stale active syncer. This is needed to
// ensure we always create a new syncer for a flappy peer after they
// disconnect if they happened to be an active syncer.
done chan struct{}
}
// SyncManagerCfg contains all of the dependencies required for the SyncManager
// to carry out its duties.
type SyncManagerCfg struct {
// ChainHash is a hash that indicates the specific network of the active
// chain.
ChainHash chainhash.Hash
// ChanSeries is an interface that provides access to a time series view
// of the current known channel graph. Each GossipSyncer enabled peer
// will utilize this in order to create and respond to channel graph
// time series queries.
ChanSeries ChannelGraphTimeSeries
// NumActiveSyncers is the number of peers for which we should have
// active syncers with. After reaching NumActiveSyncers, any future
// gossip syncers will be passive.
NumActiveSyncers int
// RotateTicker is a ticker responsible for notifying the SyncManager
// when it should rotate its active syncers. A single active syncer with
// a chansSynced state will be exchanged for a passive syncer in order
// to ensure we don't keep syncing with the same peers.
RotateTicker ticker.Ticker
// HistoricalSyncTicker is a ticker responsible for notifying the
// SyncManager when it should attempt a historical sync with a gossip
// sync peer.
HistoricalSyncTicker ticker.Ticker
// ActiveSyncerTimeoutTicker is a ticker responsible for notifying the
// SyncManager when it should attempt to start the next pending
// activeSyncer due to the current one not completing its state machine
// within the timeout.
ActiveSyncerTimeoutTicker ticker.Ticker
}
// SyncManager is a subsystem of the gossiper that manages the gossip syncers
// for peers currently connected. When a new peer is connected, the manager will
// create its accompanying gossip syncer and determine whether it should have an
// ActiveSync or PassiveSync sync type based on how many other gossip syncers
// are currently active. Any ActiveSync gossip syncers are started in a
// round-robin manner to ensure we're not syncing with multiple peers at the
// same time. The first GossipSyncer registered with the SyncManager will
// attempt a historical sync to ensure we have as much of the public channel
// graph as possible.
type SyncManager struct {
start sync.Once
stop sync.Once
cfg SyncManagerCfg
// historicalSync allows us to perform an initial historical sync only
// _once_ with a peer during the SyncManager's startup.
historicalSync sync.Once
// activeSyncers is the set of all syncers for which we are currently
// receiving graph updates from. The number of possible active syncers
// is bounded by NumActiveSyncers.
activeSyncers map[routing.Vertex]*GossipSyncer
// inactiveSyncers is the set of all syncers for which we are not
// currently receiving new graph updates from.
inactiveSyncers map[routing.Vertex]*GossipSyncer
// pendingActiveSyncers is a map that tracks our set of pending active
// syncers. This map will be queried when choosing the next pending
// active syncer in the queue to ensure it is not stale.
pendingActiveSyncers map[routing.Vertex]*GossipSyncer
// pendingActiveSyncerQueue is the list of active syncers which are
// pending to be started. Syncers will be added to this list through the
// newActiveSyncers and staleActiveSyncers channels.
pendingActiveSyncerQueue *list.List
// newActiveSyncers is a channel that will serve as a signal to the
// roundRobinHandler to allow it to transition the next pending active
// syncer in the queue.
newActiveSyncers chan struct{}
// staleActiveSyncers is a channel through which we'll send any stale
// active syncers that should be removed from the round-robin.
staleActiveSyncers chan *staleActiveSyncer
sync.Mutex
wg sync.WaitGroup
quit chan struct{}
}
// newSyncManager constructs a new SyncManager backed by the given config.
func newSyncManager(cfg *SyncManagerCfg) *SyncManager {
return &SyncManager{
cfg: *cfg,
activeSyncers: make(
map[routing.Vertex]*GossipSyncer, cfg.NumActiveSyncers,
),
inactiveSyncers: make(map[routing.Vertex]*GossipSyncer),
pendingActiveSyncers: make(map[routing.Vertex]*GossipSyncer),
pendingActiveSyncerQueue: list.New(),
newActiveSyncers: make(chan struct{}),
staleActiveSyncers: make(chan *staleActiveSyncer),
quit: make(chan struct{}),
}
}
// Start starts the SyncManager in order to properly carry out its duties.
func (m *SyncManager) Start() {
m.start.Do(func() {
m.wg.Add(2)
go m.syncerHandler()
go m.roundRobinHandler()
})
}
// Stop stops the SyncManager from performing its duties.
func (m *SyncManager) Stop() {
m.stop.Do(func() {
close(m.quit)
m.wg.Wait()
m.Lock()
defer m.Unlock()
for _, syncer := range m.inactiveSyncers {
syncer.Stop()
}
for _, syncer := range m.pendingActiveSyncers {
syncer.Stop()
}
for _, syncer := range m.activeSyncers {
syncer.Stop()
}
})
}
// syncerHandler is the SyncManager's main event loop responsible for:
//
// 1. Finding new peers to receive graph updates from to ensure we don't only
// receive them from the same set of peers.
//
// 2. Finding new peers to force a historical sync with to ensure we have as
// much of the public network as possible.
//
// NOTE: This must be run as a goroutine.
func (m *SyncManager) syncerHandler() {
defer m.wg.Done()
m.cfg.RotateTicker.Resume()
defer m.cfg.RotateTicker.Stop()
m.cfg.HistoricalSyncTicker.Resume()
defer m.cfg.HistoricalSyncTicker.Stop()
for {
select {
// Our RotateTicker has ticked, so we'll attempt to rotate a
// single active syncer with a passive one.
case <-m.cfg.RotateTicker.Ticks():
m.rotateActiveSyncerCandidate()
// Our HistoricalSyncTicker has ticked, so we'll randomly select
// a peer and force a historical sync with them.
case <-m.cfg.HistoricalSyncTicker.Ticks():
m.forceHistoricalSync()
case <-m.quit:
return
}
}
}
// signalNewActiveSyncer sends a signal to the roundRobinHandler to ensure it
// transitions any pending active syncers.
func (m *SyncManager) signalNewActiveSyncer() {
select {
case m.newActiveSyncers <- struct{}{}:
case <-m.quit:
}
}
// signalStaleActiveSyncer removes the syncer for the given peer from the
// round-robin queue.
func (m *SyncManager) signalStaleActiveSyncer(s *GossipSyncer, transitioned bool) {
done := make(chan struct{})
select {
case m.staleActiveSyncers <- &staleActiveSyncer{
syncer: s,
transitioned: transitioned,
done: done,
}:
case <-m.quit:
}
// Before returning to the caller, we'll wait for the roundRobinHandler
// to signal us that the SyncManager has correctly updated its internal
// state after handling the stale active syncer.
select {
case <-done:
case <-m.quit:
}
}
// roundRobinHandler is the SyncManager's event loop responsible for managing
// the round-robin queue of our active syncers to ensure they don't overlap and
// request the same set of channels, which significantly reduces bandwidth
// usage.
//
// NOTE: This must be run as a goroutine.
func (m *SyncManager) roundRobinHandler() {
defer m.wg.Done()
defer m.cfg.ActiveSyncerTimeoutTicker.Stop()
var (
// current will hold the current active syncer we're waiting for
// to complete its state machine.
current *GossipSyncer
// transitionNext will be responsible for containing the signal
// of when the current active syncer has completed its state
// machine. This signal allows us to transition the next pending
// active syncer, if any.
transitionNext chan struct{}
)
// transitionNextSyncer is a helper closure that we'll use to transition
// the next syncer queued up. If there aren't any, this will act as a
// NOP.
transitionNextSyncer := func() {
m.Lock()
current = m.nextPendingActiveSyncer()
m.Unlock()
for current != nil {
// We'll avoid performing the transition with the lock
// as it can potentially stall the SyncManager due to
// the syncTransitionTimeout.
err := m.transitionPassiveSyncer(current)
// If we timed out attempting to transition the syncer,
// we'll re-queue it to retry at a later time and move
// on to the next.
if err == ErrSyncTransitionTimeout {
log.Debugf("Timed out attempting to "+
"transition pending active "+
"GossipSyncer(%x)", current.cfg.peerPub)
m.Lock()
m.queueActiveSyncer(current)
current = m.nextPendingActiveSyncer()
m.Unlock()
continue
}
if err != nil {
log.Errorf("Unable to transition pending "+
"active GossipSyncer(%x): %v",
current.cfg.peerPub, err)
m.Lock()
current = m.nextPendingActiveSyncer()
m.Unlock()
continue
}
// The transition succeeded, so we'll set our signal to
// know when we should attempt to transition the next
// pending active syncer in our queue.
transitionNext = current.ResetSyncedSignal()
m.cfg.ActiveSyncerTimeoutTicker.Resume()
return
}
transitionNext = nil
m.cfg.ActiveSyncerTimeoutTicker.Pause()
}
for {
select {
// A new active syncer signal has been received, which indicates
// a new pending active syncer has been added to our queue.
// We'll only attempt to transition it now if we're not already
// in the middle of transitioning another one. We do this to
// ensure we don't overlap when requesting channels from
// different peers.
case <-m.newActiveSyncers:
if current == nil {
transitionNextSyncer()
}
// A stale active syncer has been received, so we'll need to
// remove them from our queue. If we are currently waiting for
// its state machine to complete, we'll move on to the next
// active syncer in the queue.
case staleActiveSyncer := <-m.staleActiveSyncers:
s := staleActiveSyncer.syncer
m.Lock()
// If the syncer has transitioned from an ActiveSync
// type, rather than disconnecting, we'll include it in
// the set of inactive syncers.
if staleActiveSyncer.transitioned {
m.inactiveSyncers[s.cfg.peerPub] = s
}
// Remove the internal active syncer references for this
// peer.
delete(m.pendingActiveSyncers, s.cfg.peerPub)
delete(m.activeSyncers, s.cfg.peerPub)
// We'll then attempt to find a passive syncer that can
// replace the stale active syncer.
newActiveSyncer := m.chooseRandomSyncer(nil, false)
if newActiveSyncer != nil {
m.queueActiveSyncer(newActiveSyncer)
}
m.Unlock()
// Signal to the caller that they can now proceed since
// the SyncManager's state correctly reflects the
// stale active syncer.
close(staleActiveSyncer.done)
// If we're not currently waiting for an active syncer
// to reach its terminal state, or if we are but we are
// currently waiting for the peer being
// disconnected/transitioned, then we'll move on to the
// next active syncer in our queue.
if current == nil || (current != nil &&
current.cfg.peerPub == s.cfg.peerPub) {
transitionNextSyncer()
}
// Our current active syncer has reached its terminal
// chansSynced state, so we'll proceed to transitioning the next
// pending active syncer if there is one.
case <-transitionNext:
transitionNextSyncer()
// We've timed out waiting for the current active syncer to
// reach its terminal chansSynced state, so we'll just
// move on to the next and avoid retrying as its already been
// transitioned.
case <-m.cfg.ActiveSyncerTimeoutTicker.Ticks():
log.Warnf("Timed out waiting for GossipSyncer(%x) to "+
"be fully synced", current.cfg.peerPub)
transitionNextSyncer()
case <-m.quit:
return
}
}
}
// queueActiveSyncer queues the given pending active gossip syncer to the end of
// the round-robin queue.
func (m *SyncManager) queueActiveSyncer(s *GossipSyncer) {
log.Debugf("Queueing next pending active GossipSyncer(%x)",
s.cfg.peerPub)
delete(m.inactiveSyncers, s.cfg.peerPub)
m.pendingActiveSyncers[s.cfg.peerPub] = s
m.pendingActiveSyncerQueue.PushBack(s)
}
// nextPendingActiveSyncer returns the next active syncer pending to be
// transitioned. If there aren't any, then `nil` is returned.
func (m *SyncManager) nextPendingActiveSyncer() *GossipSyncer {
next := m.pendingActiveSyncerQueue.Front()
for next != nil {
s := m.pendingActiveSyncerQueue.Remove(next).(*GossipSyncer)
// If the next pending active syncer is no longer in our lookup
// map, then the corresponding peer has disconnected, so we'll
// skip them.
if _, ok := m.pendingActiveSyncers[s.cfg.peerPub]; !ok {
next = m.pendingActiveSyncerQueue.Front()
continue
}
return s
}
return nil
}
// rotateActiveSyncerCandidate rotates a single active syncer. In order to
// achieve this, the active syncer must be in a chansSynced state in order to
// process the sync transition.
func (m *SyncManager) rotateActiveSyncerCandidate() {
// If we don't have a candidate to rotate with, we can return early.
m.Lock()
candidate := m.chooseRandomSyncer(nil, false)
if candidate == nil {
m.Unlock()
log.Debug("No eligible candidate to rotate active syncer")
return
}
// We'll choose an active syncer at random that's within a chansSynced
// state to rotate.
var activeSyncer *GossipSyncer
for _, s := range m.activeSyncers {
// The active syncer must be in a chansSynced state in order to
// process sync transitions.
if s.syncState() != chansSynced {
continue
}
activeSyncer = s
break
}
m.Unlock()
// If we couldn't find an eligible one, we can return early.
if activeSyncer == nil {
log.Debug("No eligible active syncer to rotate")
return
}
// Otherwise, we'll attempt to transition each syncer to their
// respective new sync type. We'll avoid performing the transition with
// the lock as it can potentially stall the SyncManager due to the
// syncTransitionTimeout.
if err := m.transitionActiveSyncer(activeSyncer); err != nil {
log.Errorf("Unable to transition active "+
"GossipSyncer(%x): %v", activeSyncer.cfg.peerPub, err)
return
}
m.Lock()
m.queueActiveSyncer(candidate)
m.Unlock()
m.signalNewActiveSyncer()
}
// transitionActiveSyncer transitions an active syncer to a passive one.
func (m *SyncManager) transitionActiveSyncer(s *GossipSyncer) error {
log.Debugf("Transitioning active GossipSyncer(%x) to passive",
s.cfg.peerPub)
if err := s.ProcessSyncTransition(PassiveSync); err != nil {
return err
}
m.signalStaleActiveSyncer(s, true)
return nil
}
// transitionPassiveSyncer transitions a passive syncer to an active one.
func (m *SyncManager) transitionPassiveSyncer(s *GossipSyncer) error {
log.Debugf("Transitioning passive GossipSyncer(%x) to active",
s.cfg.peerPub)
if err := s.ProcessSyncTransition(ActiveSync); err != nil {
return err
}
m.Lock()
m.activeSyncers[s.cfg.peerPub] = s
delete(m.pendingActiveSyncers, s.cfg.peerPub)
m.Unlock()
return nil
}
// forceHistoricalSync chooses a syncer with a remote peer at random and forces
// a historical sync with it.
func (m *SyncManager) forceHistoricalSync() {
m.Lock()
defer m.Unlock()
// We'll choose a random peer with whom we can perform a historical sync
// with. We'll set useActive to true to make sure we can still do one if
// we don't happen to have any non-active syncers.
candidatesChosen := make(map[routing.Vertex]struct{})
s := m.chooseRandomSyncer(candidatesChosen, true)
for s != nil {
// Blacklist the candidate to ensure it's not chosen again.
candidatesChosen[s.cfg.peerPub] = struct{}{}
err := s.historicalSync()
if err == nil {
return
}
log.Errorf("Unable to perform historical sync with "+
"GossipSyncer(%x): %v", s.cfg.peerPub, err)
s = m.chooseRandomSyncer(candidatesChosen, true)
}
}
// chooseRandomSyncer returns a random non-active syncer that's eligible for a
// sync transition. A blacklist can be used to skip any previously chosen
// candidates. The useActive boolean can be used to also filter active syncers.
//
// NOTE: It's possible for a nil value to be returned if there are no eligible
// candidate syncers.
//
// NOTE: This method must be called with the syncersMtx lock held.
func (m *SyncManager) chooseRandomSyncer(blacklist map[routing.Vertex]struct{},
useActive bool) *GossipSyncer {
eligible := func(s *GossipSyncer) bool {
// Skip any syncers that exist within the blacklist.
if blacklist != nil {
if _, ok := blacklist[s.cfg.peerPub]; ok {
return false
}
}
// Only syncers in a chansSynced state are viable for sync
// transitions, so skip any that aren't.
return s.syncState() == chansSynced
}
for _, s := range m.inactiveSyncers {
if !eligible(s) {
continue
}
return s
}
if useActive {
for _, s := range m.activeSyncers {
if !eligible(s) {
continue
}
return s
}
}
return nil
}
// InitSyncState is called by outside sub-systems when a connection is
// established to a new peer that understands how to perform channel range
// queries. We'll allocate a new GossipSyncer for it, and start any goroutines
// needed to handle new queries. The first GossipSyncer registered with the
// SyncManager will attempt a historical sync to ensure we have as much of the
// public channel graph as possible.
//
// TODO(wilmer): Only mark as ActiveSync if this isn't a channel peer.
func (m *SyncManager) InitSyncState(peer lnpeer.Peer) {
// If we already have a syncer, then we'll exit early as we don't want
// to override it.
nodeID := routing.Vertex(peer.PubKey())
if _, ok := m.GossipSyncer(nodeID); ok {
return
}
log.Infof("Creating new GossipSyncer for peer=%x", nodeID[:])
encoding := lnwire.EncodingSortedPlain
s := newGossipSyncer(gossipSyncerCfg{
chainHash: m.cfg.ChainHash,
peerPub: nodeID,
channelSeries: m.cfg.ChanSeries,
encodingType: encoding,
chunkSize: encodingTypeToChunkSize[encoding],
sendToPeer: func(msgs ...lnwire.Message) error {
return peer.SendMessage(false, msgs...)
},
})
// Gossip syncers are initialized by default as passive and in a
// chansSynced state so that they can reply to any peer queries or
// handle any sync transitions.
s.setSyncType(PassiveSync)
s.setSyncState(chansSynced)
s.Start()
m.Lock()
m.inactiveSyncers[nodeID] = s
// We'll force a historical sync with the first peer we connect to
// ensure we get as much of the graph as possible.
var err error
m.historicalSync.Do(func() {
log.Infof("Attempting historical sync with GossipSyncer(%x)",
s.cfg.peerPub)
err = s.historicalSync()
})
if err != nil {
log.Errorf("Unable to perform historical sync with "+
"GossipSyncer(%x): %v", s.cfg.peerPub, err)
// Reset historicalSync to ensure it is tried again with a
// different peer.
m.historicalSync = sync.Once{}
}
// If we've yet to reach our desired number of active syncers, then
// we'll use this one.
numActiveSyncers := len(m.activeSyncers) + len(m.pendingActiveSyncers)
if numActiveSyncers < m.cfg.NumActiveSyncers {
m.queueActiveSyncer(s)
m.Unlock()
m.signalNewActiveSyncer()
return
}
m.Unlock()
}
// PruneSyncState is called by outside sub-systems once a peer that we were
// previously connected to has been disconnected. In this case we can stop the
// existing GossipSyncer assigned to the peer and free up resources.
func (m *SyncManager) PruneSyncState(peer routing.Vertex) {
s, ok := m.GossipSyncer(peer)
if !ok {
return
}
log.Infof("Removing GossipSyncer for peer=%v", peer)
// We'll start by stopping the GossipSyncer for the disconnected peer.
s.Stop()
// If it's a non-active syncer, then we can just exit now.
m.Lock()
if _, ok := m.inactiveSyncers[s.cfg.peerPub]; ok {
delete(m.inactiveSyncers, s.cfg.peerPub)
m.Unlock()
return
}
m.Unlock()
// Otherwise, we'll need to dequeue it from our pending active syncers
// queue and find a new one to replace it, if any.
m.signalStaleActiveSyncer(s, false)
}
// GossipSyncer returns the associated gossip syncer of a peer. The boolean
// returned signals whether there exists a gossip syncer for the peer.
func (m *SyncManager) GossipSyncer(peer routing.Vertex) (*GossipSyncer, bool) {
m.Lock()
defer m.Unlock()
return m.gossipSyncer(peer)
}
// gossipSyncer returns the associated gossip syncer of a peer. The boolean
// returned signals whether there exists a gossip syncer for the peer.
func (m *SyncManager) gossipSyncer(peer routing.Vertex) (*GossipSyncer, bool) {
syncer, ok := m.inactiveSyncers[peer]
if ok {
return syncer, true
}
syncer, ok = m.pendingActiveSyncers[peer]
if ok {
return syncer, true
}
syncer, ok = m.activeSyncers[peer]
if ok {
return syncer, true
}
return nil, false
}
// GossipSyncers returns all of the currently initialized gossip syncers.
func (m *SyncManager) GossipSyncers() map[routing.Vertex]*GossipSyncer {
m.Lock()
defer m.Unlock()
numSyncers := len(m.inactiveSyncers) + len(m.activeSyncers) +
len(m.inactiveSyncers)
syncers := make(map[routing.Vertex]*GossipSyncer, numSyncers)
for _, syncer := range m.inactiveSyncers {
syncers[syncer.cfg.peerPub] = syncer
}
for _, syncer := range m.pendingActiveSyncers {
syncers[syncer.cfg.peerPub] = syncer
}
for _, syncer := range m.activeSyncers {
syncers[syncer.cfg.peerPub] = syncer
}
return syncers
}

View File

@ -0,0 +1,549 @@
package discovery
import (
"fmt"
"math"
"reflect"
"sync/atomic"
"testing"
"time"
"github.com/davecgh/go-spew/spew"
"github.com/lightningnetwork/lnd/lntest"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/ticker"
)
// randPeer creates a random peer.
func randPeer(t *testing.T, quit chan struct{}) *mockPeer {
t.Helper()
return &mockPeer{
pk: randPubKey(t),
sentMsgs: make(chan lnwire.Message),
quit: quit,
}
}
// newTestSyncManager creates a new test SyncManager using mock implementations
// of its dependencies.
func newTestSyncManager(numActiveSyncers int) *SyncManager {
hID := lnwire.ShortChannelID{BlockHeight: latestKnownHeight}
return newSyncManager(&SyncManagerCfg{
ChanSeries: newMockChannelGraphTimeSeries(hID),
RotateTicker: ticker.NewForce(DefaultSyncerRotationInterval),
HistoricalSyncTicker: ticker.NewForce(DefaultHistoricalSyncInterval),
ActiveSyncerTimeoutTicker: ticker.NewForce(DefaultActiveSyncerTimeout),
NumActiveSyncers: numActiveSyncers,
})
}
// TestSyncManagerNumActiveSyncers ensures that we are unable to have more than
// NumActiveSyncers active syncers.
func TestSyncManagerNumActiveSyncers(t *testing.T) {
t.Parallel()
// We'll start by creating our test sync manager which will hold up to
// 3 active syncers.
const numActiveSyncers = 3
const numSyncers = numActiveSyncers + 1
syncMgr := newTestSyncManager(numActiveSyncers)
syncMgr.Start()
defer syncMgr.Stop()
// We'll go ahead and create our syncers. We'll gather the ones which
// should be active and passive to check them later on.
for i := 0; i < numActiveSyncers; i++ {
peer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(peer)
// The first syncer registered always attempts a historical
// sync.
if i == 0 {
assertTransitionToChansSynced(t, syncMgr, peer, true)
}
assertPassiveSyncerTransition(t, syncMgr, peer)
assertSyncerStatus(t, syncMgr, peer, chansSynced, ActiveSync)
}
for i := 0; i < numSyncers-numActiveSyncers; i++ {
peer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(peer)
assertSyncerStatus(t, syncMgr, peer, chansSynced, PassiveSync)
}
}
// TestSyncManagerNewActiveSyncerAfterDisconnect ensures that we can regain an
// active syncer after losing one due to the peer disconnecting.
func TestSyncManagerNewActiveSyncerAfterDisconnect(t *testing.T) {
t.Parallel()
// We'll create our test sync manager to only have one active syncer.
syncMgr := newTestSyncManager(1)
syncMgr.Start()
defer syncMgr.Stop()
// peer1 will represent an active syncer that performs a historical
// sync since it is the first registered peer with the SyncManager.
peer1 := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(peer1)
assertTransitionToChansSynced(t, syncMgr, peer1, true)
assertPassiveSyncerTransition(t, syncMgr, peer1)
// It will then be torn down to simulate a disconnection. Since there
// are no other candidate syncers available, the active syncer won't be
// replaced.
syncMgr.PruneSyncState(peer1.PubKey())
// Then, we'll start our active syncer again, but this time we'll also
// have a passive syncer available to replace the active syncer after
// the peer disconnects.
syncMgr.InitSyncState(peer1)
assertPassiveSyncerTransition(t, syncMgr, peer1)
// Create our second peer, which should be initialized as a passive
// syncer.
peer2 := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(peer2)
assertSyncerStatus(t, syncMgr, peer2, chansSynced, PassiveSync)
// Disconnect our active syncer, which should trigger the SyncManager to
// replace it with our passive syncer.
syncMgr.PruneSyncState(peer1.PubKey())
assertPassiveSyncerTransition(t, syncMgr, peer2)
}
// TestSyncManagerRotateActiveSyncerCandidate tests that we can successfully
// rotate our active syncers after a certain interval.
func TestSyncManagerRotateActiveSyncerCandidate(t *testing.T) {
t.Parallel()
// We'll create our sync manager with three active syncers.
syncMgr := newTestSyncManager(1)
syncMgr.Start()
defer syncMgr.Stop()
// The first syncer registered always performs a historical sync.
activeSyncPeer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(activeSyncPeer)
assertTransitionToChansSynced(t, syncMgr, activeSyncPeer, true)
assertPassiveSyncerTransition(t, syncMgr, activeSyncPeer)
// We'll send a tick to force a rotation. Since there aren't any
// candidates, none of the active syncers will be rotated.
syncMgr.cfg.RotateTicker.(*ticker.Force).Force <- time.Time{}
assertNoMsgSent(t, activeSyncPeer)
assertSyncerStatus(t, syncMgr, activeSyncPeer, chansSynced, ActiveSync)
// We'll then go ahead and add a passive syncer.
passiveSyncPeer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(passiveSyncPeer)
assertSyncerStatus(t, syncMgr, passiveSyncPeer, chansSynced, PassiveSync)
// We'll force another rotation - this time, since we have a passive
// syncer available, they should be rotated.
syncMgr.cfg.RotateTicker.(*ticker.Force).Force <- time.Time{}
// The transition from an active syncer to a passive syncer causes the
// peer to send out a new GossipTimestampRange in the past so that they
// don't receive new graph updates.
assertActiveSyncerTransition(t, syncMgr, activeSyncPeer)
// The transition from a passive syncer to an active syncer causes the
// peer to send a new GossipTimestampRange with the current timestamp to
// signal that they would like to receive new graph updates from their
// peers. This will also cause the gossip syncer to redo its state
// machine, starting from its initial syncingChans state. We'll then
// need to transition it to its final chansSynced state to ensure the
// next syncer is properly started in the round-robin.
assertPassiveSyncerTransition(t, syncMgr, passiveSyncPeer)
}
// TestSyncManagerHistoricalSync ensures that we only attempt a single
// historical sync during the SyncManager's startup, and that we can routinely
// force historical syncs whenever the HistoricalSyncTicker fires.
func TestSyncManagerHistoricalSync(t *testing.T) {
t.Parallel()
syncMgr := newTestSyncManager(0)
syncMgr.Start()
defer syncMgr.Stop()
// We should expect to see a QueryChannelRange message with a
// FirstBlockHeight of the genesis block, signaling that a historical
// sync is being attempted.
peer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(peer)
assertMsgSent(t, peer, &lnwire.QueryChannelRange{
FirstBlockHeight: 0,
NumBlocks: math.MaxUint32,
})
// If an additional peer connects, then a historical sync should not be
// attempted again.
extraPeer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(extraPeer)
assertNoMsgSent(t, extraPeer)
// Then, we'll send a tick to force a historical sync. This should
// trigger the extra peer to also perform a historical sync since the
// first peer is not eligible due to not being in a chansSynced state.
syncMgr.cfg.HistoricalSyncTicker.(*ticker.Force).Force <- time.Time{}
assertMsgSent(t, extraPeer, &lnwire.QueryChannelRange{
FirstBlockHeight: 0,
NumBlocks: math.MaxUint32,
})
}
// TestSyncManagerRoundRobinQueue ensures that any subsequent active syncers can
// only be started after the previous one has completed its state machine.
func TestSyncManagerRoundRobinQueue(t *testing.T) {
t.Parallel()
const numActiveSyncers = 3
// We'll start by creating our sync manager with support for three
// active syncers.
syncMgr := newTestSyncManager(numActiveSyncers)
syncMgr.Start()
defer syncMgr.Stop()
peers := make([]*mockPeer, 0, numActiveSyncers)
// The first syncer registered always attempts a historical sync.
firstPeer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(firstPeer)
peers = append(peers, firstPeer)
assertTransitionToChansSynced(t, syncMgr, firstPeer, true)
// After completing the historical sync, a sync transition to ActiveSync
// should happen. It should transition immediately since it has no
// dependents.
assertActiveGossipTimestampRange(t, firstPeer)
// We'll create the remaining numActiveSyncers. These will be queued in
// the round robin since the first syncer has yet to reach chansSynced.
queuedPeers := make([]*mockPeer, 0, numActiveSyncers-1)
for i := 0; i < numActiveSyncers-1; i++ {
peer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(peer)
peers = append(peers, peer)
queuedPeers = append(queuedPeers, peer)
}
// Ensure they cannot transition without sending a GossipTimestampRange
// message first.
for _, peer := range queuedPeers {
assertNoMsgSent(t, peer)
}
// Transition the first syncer to chansSynced, which should allow the
// second to transition next.
assertTransitionToChansSynced(t, syncMgr, firstPeer, false)
// assertSyncerTransitioned ensures the target peer's syncer is the only
// that has transitioned.
assertSyncerTransitioned := func(target *mockPeer) {
t.Helper()
for _, peer := range peers {
if peer.PubKey() != target.PubKey() {
assertNoMsgSent(t, peer)
continue
}
assertActiveGossipTimestampRange(t, target)
}
}
// For each queued syncer, we'll ensure they have transitioned to an
// ActiveSync type and reached their final chansSynced state to allow
// the next one to transition.
for _, peer := range queuedPeers {
assertSyncerTransitioned(peer)
assertTransitionToChansSynced(t, syncMgr, peer, false)
}
}
// TestSyncManagerRoundRobinTimeout ensures that if we timeout while waiting for
// an active syncer to reach its final chansSynced state, then we will go on to
// start the next.
func TestSyncManagerRoundRobinTimeout(t *testing.T) {
t.Parallel()
// Create our sync manager with support for two active syncers.
syncMgr := newTestSyncManager(2)
syncMgr.Start()
defer syncMgr.Stop()
// peer1 will be the first peer we start, which will time out and cause
// peer2 to start.
peer1 := randPeer(t, syncMgr.quit)
peer2 := randPeer(t, syncMgr.quit)
// The first syncer registered always attempts a historical sync.
syncMgr.InitSyncState(peer1)
assertTransitionToChansSynced(t, syncMgr, peer1, true)
// We assume the syncer for peer1 has transitioned once we see it send a
// lnwire.GossipTimestampRange message.
assertActiveGossipTimestampRange(t, peer1)
// We'll then create the syncer for peer2. This should cause it to be
// queued so that it starts once the syncer for peer1 is done.
syncMgr.InitSyncState(peer2)
assertNoMsgSent(t, peer2)
// Send a force tick to pretend the sync manager has timed out waiting
// for peer1's syncer to reach chansSynced.
syncMgr.cfg.ActiveSyncerTimeoutTicker.(*ticker.Force).Force <- time.Time{}
// Finally, ensure that the syncer for peer2 has transitioned.
assertActiveGossipTimestampRange(t, peer2)
}
// TestSyncManagerRoundRobinStaleSyncer ensures that any stale active syncers we
// are currently waiting for or are queued up to start are properly removed and
// stopped.
func TestSyncManagerRoundRobinStaleSyncer(t *testing.T) {
t.Parallel()
const numActiveSyncers = 4
// We'll create and start our sync manager with some active syncers.
syncMgr := newTestSyncManager(numActiveSyncers)
syncMgr.Start()
defer syncMgr.Stop()
peers := make([]*mockPeer, 0, numActiveSyncers)
// The first syncer registered always attempts a historical sync.
firstPeer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(firstPeer)
peers = append(peers, firstPeer)
assertTransitionToChansSynced(t, syncMgr, firstPeer, true)
// After completing the historical sync, a sync transition to ActiveSync
// should happen. It should transition immediately since it has no
// dependents.
assertActiveGossipTimestampRange(t, firstPeer)
assertMsgSent(t, firstPeer, &lnwire.QueryChannelRange{
FirstBlockHeight: startHeight,
NumBlocks: math.MaxUint32 - startHeight,
})
// We'll create the remaining numActiveSyncers. These will be queued in
// the round robin since the first syncer has yet to reach chansSynced.
queuedPeers := make([]*mockPeer, 0, numActiveSyncers-1)
for i := 0; i < numActiveSyncers-1; i++ {
peer := randPeer(t, syncMgr.quit)
syncMgr.InitSyncState(peer)
peers = append(peers, peer)
queuedPeers = append(queuedPeers, peer)
}
// Ensure they cannot transition without sending a GossipTimestampRange
// message first.
for _, peer := range queuedPeers {
assertNoMsgSent(t, peer)
}
// assertSyncerTransitioned ensures the target peer's syncer is the only
// that has transitioned.
assertSyncerTransitioned := func(target *mockPeer) {
t.Helper()
for _, peer := range peers {
if peer.PubKey() != target.PubKey() {
assertNoMsgSent(t, peer)
continue
}
assertPassiveSyncerTransition(t, syncMgr, target)
}
}
// We'll then remove the syncers in the middle to cover the case where
// they are queued up in the sync manager's pending list.
for i, peer := range peers {
if i == 0 || i == len(peers)-1 {
continue
}
syncMgr.PruneSyncState(peer.PubKey())
}
// We'll then remove the syncer we are currently waiting for. This
// should prompt the last syncer to start since it is the only one left
// pending. We'll do this in a goroutine since the peer behind the new
// active syncer will need to send out its new GossipTimestampRange.
go syncMgr.PruneSyncState(peers[0].PubKey())
assertSyncerTransitioned(peers[len(peers)-1])
}
// assertNoMsgSent is a helper function that ensures a peer hasn't sent any
// messages.
func assertNoMsgSent(t *testing.T, peer *mockPeer) {
t.Helper()
select {
case msg := <-peer.sentMsgs:
t.Fatalf("peer %x sent unexpected message %v", peer.PubKey(),
spew.Sdump(msg))
case <-time.After(time.Second):
}
}
// assertMsgSent asserts that the peer has sent the given message.
func assertMsgSent(t *testing.T, peer *mockPeer, msg lnwire.Message) {
t.Helper()
var msgSent lnwire.Message
select {
case msgSent = <-peer.sentMsgs:
case <-time.After(time.Second):
t.Fatalf("expected peer %x to send %T message", peer.PubKey(),
msg)
}
if !reflect.DeepEqual(msgSent, msg) {
t.Fatalf("expected peer %x to send message: %v\ngot: %v",
peer.PubKey(), spew.Sdump(msg), spew.Sdump(msgSent))
}
}
// assertActiveGossipTimestampRange is a helper function that ensures a peer has
// sent a lnwire.GossipTimestampRange message indicating that it would like to
// receive new graph updates.
func assertActiveGossipTimestampRange(t *testing.T, peer *mockPeer) {
t.Helper()
var msgSent lnwire.Message
select {
case msgSent = <-peer.sentMsgs:
case <-time.After(time.Second):
t.Fatalf("expected peer %x to send lnwire.GossipTimestampRange "+
"message", peer.PubKey())
}
msg, ok := msgSent.(*lnwire.GossipTimestampRange)
if !ok {
t.Fatalf("expected peer %x to send %T message", peer.PubKey(),
msg)
}
if msg.FirstTimestamp == 0 {
t.Fatalf("expected *lnwire.GossipTimestampRange message with " +
"non-zero FirstTimestamp")
}
if msg.TimestampRange == 0 {
t.Fatalf("expected *lnwire.GossipTimestampRange message with " +
"non-zero TimestampRange")
}
}
// assertSyncerStatus asserts that the gossip syncer for the given peer matches
// the expected sync state and type.
func assertSyncerStatus(t *testing.T, syncMgr *SyncManager, peer *mockPeer,
syncState syncerState, syncType SyncerType) {
t.Helper()
s, ok := syncMgr.GossipSyncer(peer.PubKey())
if !ok {
t.Fatalf("gossip syncer for peer %x not found", peer.PubKey())
}
// We'll check the status of our syncer within a WaitPredicate as some
// sync transitions might cause this to be racy.
err := lntest.WaitNoError(func() error {
state := s.syncState()
if s.syncState() != syncState {
return fmt.Errorf("expected syncState %v for peer "+
"%x, got %v", syncState, peer.PubKey(), state)
}
typ := s.SyncType()
if s.SyncType() != syncType {
return fmt.Errorf("expected syncType %v for peer "+
"%x, got %v", syncType, peer.PubKey(), typ)
}
return nil
}, time.Second)
if err != nil {
t.Fatal(err)
}
}
// assertTransitionToChansSynced asserts the transition of an ActiveSync
// GossipSyncer to its final chansSynced state.
func assertTransitionToChansSynced(t *testing.T, syncMgr *SyncManager,
peer *mockPeer, historicalSync bool) {
t.Helper()
s, ok := syncMgr.GossipSyncer(peer.PubKey())
if !ok {
t.Fatalf("gossip syncer for peer %x not found", peer.PubKey())
}
firstBlockHeight := uint32(startHeight)
if historicalSync {
firstBlockHeight = 0
}
assertMsgSent(t, peer, &lnwire.QueryChannelRange{
FirstBlockHeight: firstBlockHeight,
NumBlocks: math.MaxUint32 - firstBlockHeight,
})
s.ProcessQueryMsg(&lnwire.ReplyChannelRange{Complete: 1}, nil)
chanSeries := syncMgr.cfg.ChanSeries.(*mockChannelGraphTimeSeries)
select {
case <-chanSeries.filterReq:
chanSeries.filterResp <- nil
case <-time.After(2 * time.Second):
t.Fatal("expected to receive FilterKnownChanIDs request")
}
err := lntest.WaitNoError(func() error {
state := syncerState(atomic.LoadUint32(&s.state))
if state != chansSynced {
return fmt.Errorf("expected syncerState %v, got %v",
chansSynced, state)
}
return nil
}, time.Second)
if err != nil {
t.Fatal(err)
}
}
// assertPassiveSyncerTransition asserts that a gossip syncer goes through all
// of its expected steps when transitioning from passive to active.
func assertPassiveSyncerTransition(t *testing.T, syncMgr *SyncManager,
peer *mockPeer) {
t.Helper()
assertActiveGossipTimestampRange(t, peer)
assertTransitionToChansSynced(t, syncMgr, peer, false)
}
// assertActiveSyncerTransition asserts that a gossip syncer goes through all of
// its expected steps when transitioning from active to passive.
func assertActiveSyncerTransition(t *testing.T, syncMgr *SyncManager,
peer *mockPeer) {
t.Helper()
assertMsgSent(t, peer, &lnwire.GossipTimestampRange{
FirstTimestamp: uint32(zeroTimestamp.Unix()),
TimestampRange: 0,
})
assertSyncerStatus(t, syncMgr, peer, chansSynced, PassiveSync)
}

View File

@ -13,18 +13,51 @@ import (
"golang.org/x/time/rate"
)
// syncerState is an enum that represents the current state of the
// gossipSyncer. As the syncer is a state machine, we'll gate our actions
// based off of the current state and the next incoming message.
// SyncerType encapsulates the different types of syncing mechanisms for a
// gossip syncer.
type SyncerType uint8
const (
// ActiveSync denotes that a gossip syncer should exercise its default
// behavior. This includes reconciling the set of missing graph updates
// with the remote peer _and_ receiving new updates from them.
ActiveSync SyncerType = iota
// PassiveSync denotes that a gossip syncer:
//
// 1. Should not attempt to query the remote peer for graph updates.
// 2. Should respond to queries from the remote peer.
// 3. Should not receive new updates from the remote peer.
//
// They are started in a chansSynced state in order to accomplish their
// responsibilities above.
PassiveSync
)
// String returns a human readable string describing the target SyncerType.
func (t SyncerType) String() string {
switch t {
case ActiveSync:
return "ActiveSync"
case PassiveSync:
return "PassiveSync"
default:
return fmt.Sprintf("unknown sync type %d", t)
}
}
// syncerState is an enum that represents the current state of the GossipSyncer.
// As the syncer is a state machine, we'll gate our actions based off of the
// current state and the next incoming message.
type syncerState uint32
const (
// syncingChans is the default state of the gossipSyncer. We start in
// syncingChans is the default state of the GossipSyncer. We start in
// this state when a new peer first connects and we don't yet know if
// we're fully synchronized.
syncingChans syncerState = iota
// waitingQueryRangeReply is the second main phase of the gossipSyncer.
// waitingQueryRangeReply is the second main phase of the GossipSyncer.
// We enter this state after we send out our first QueryChannelRange
// reply. We'll stay in this state until the remote party sends us a
// ReplyShortChanIDsEnd message that indicates they've responded to our
@ -33,19 +66,19 @@ const (
// chan ID's to us.
waitingQueryRangeReply
// queryNewChannels is the third main phase of the gossipSyncer. In
// queryNewChannels is the third main phase of the GossipSyncer. In
// this phase we'll send out all of our QueryShortChanIDs messages in
// response to the new channels that we don't yet know about.
queryNewChannels
// waitingQueryChanReply is the fourth main phase of the gossipSyncer.
// waitingQueryChanReply is the fourth main phase of the GossipSyncer.
// We enter this phase once we've sent off a query chink to the remote
// peer. We'll stay in this phase until we receive a
// ReplyShortChanIDsEnd message which indicates that the remote party
// has responded to all of our requests.
waitingQueryChanReply
// chansSynced is the terminal stage of the gossipSyncer. Once we enter
// chansSynced is the terminal stage of the GossipSyncer. Once we enter
// this phase, we'll send out our update horizon, which filters out the
// set of channel updates that we're interested in. In this state,
// we'll be able to accept any outgoing messages from the
@ -54,17 +87,6 @@ const (
chansSynced
)
const (
// DefaultMaxUndelayedQueryReplies specifies how many gossip queries we
// will respond to immediately before starting to delay responses.
DefaultMaxUndelayedQueryReplies = 10
// DefaultDelayedQueryReplyInterval is the length of time we will wait
// before responding to gossip queries after replying to
// maxUndelayedQueryReplies queries.
DefaultDelayedQueryReplyInterval = 5 * time.Second
)
// String returns a human readable string describing the target syncerState.
func (s syncerState) String() string {
switch s {
@ -88,6 +110,26 @@ func (s syncerState) String() string {
}
}
const (
// DefaultMaxUndelayedQueryReplies specifies how many gossip queries we
// will respond to immediately before starting to delay responses.
DefaultMaxUndelayedQueryReplies = 10
// DefaultDelayedQueryReplyInterval is the length of time we will wait
// before responding to gossip queries after replying to
// maxUndelayedQueryReplies queries.
DefaultDelayedQueryReplyInterval = 5 * time.Second
// chanRangeQueryBuffer is the number of blocks back that we'll go when
// asking the remote peer for their any channels they know of beyond
// our highest known channel ID.
chanRangeQueryBuffer = 144
// syncTransitionTimeout is the default timeout in which we'll wait up
// to when attempting to perform a sync transition.
syncTransitionTimeout = 5 * time.Second
)
var (
// encodingTypeToChunkSize maps an encoding type, to the max number of
// short chan ID's using the encoding type that we can fit into a
@ -98,21 +140,33 @@ var (
// ErrGossipSyncerExiting signals that the syncer has been killed.
ErrGossipSyncerExiting = errors.New("gossip syncer exiting")
// ErrSyncTransitionTimeout is an error returned when we've timed out
// attempting to perform a sync transition.
ErrSyncTransitionTimeout = errors.New("timed out attempting to " +
"transition sync type")
// zeroTimestamp is the timestamp we'll use when we want to indicate to
// peers that we do not want to receive any new graph updates.
zeroTimestamp time.Time
)
const (
// chanRangeQueryBuffer is the number of blocks back that we'll go when
// asking the remote peer for their any channels they know of beyond
// our highest known channel ID.
chanRangeQueryBuffer = 144
)
// syncTransitionReq encapsulates a request for a gossip syncer sync transition.
type syncTransitionReq struct {
newSyncType SyncerType
errChan chan error
}
// gossipSyncerCfg is a struct that packages all the information a gossipSyncer
// gossipSyncerCfg is a struct that packages all the information a GossipSyncer
// needs to carry out its duties.
type gossipSyncerCfg struct {
// chainHash is the chain that this syncer is responsible for.
chainHash chainhash.Hash
// peerPub is the public key of the peer we're syncing with, serialized
// in compressed format.
peerPub [33]byte
// syncChanUpdates is a bool that indicates if we should request a
// continual channel update stream or not.
syncChanUpdates bool
@ -144,18 +198,27 @@ type gossipSyncerCfg struct {
delayedQueryReplyInterval time.Duration
}
// gossipSyncer is a struct that handles synchronizing the channel graph state
// with a remote peer. The gossipSyncer implements a state machine that will
// GossipSyncer is a struct that handles synchronizing the channel graph state
// with a remote peer. The GossipSyncer implements a state machine that will
// progressively ensure we're synchronized with the channel state of the remote
// node. Once both nodes have been synchronized, we'll use an update filter to
// filter out which messages should be sent to a remote peer based on their
// update horizon. If the update horizon isn't specified, then we won't send
// them any channel updates at all.
//
// TODO(roasbeef): modify to only sync from one peer at a time?
type gossipSyncer struct {
started uint32
stopped uint32
type GossipSyncer struct {
started sync.Once
stopped sync.Once
// state is the current state of the GossipSyncer.
//
// NOTE: This variable MUST be used atomically.
state uint32
// syncType denotes the SyncerType the gossip syncer is currently
// exercising.
//
// NOTE: This variable MUST be used atomically.
syncType uint32
// remoteUpdateHorizon is the update horizon of the remote peer. We'll
// use this to properly filter out any messages.
@ -165,10 +228,25 @@ type gossipSyncer struct {
// determine if we've already sent out our update.
localUpdateHorizon *lnwire.GossipTimestampRange
// state is the current state of the gossipSyncer.
//
// NOTE: This variable MUST be used atomically.
state uint32
// syncTransitions is a channel through which new sync type transition
// requests will be sent through. These requests should only be handled
// when the gossip syncer is in a chansSynced state to ensure its state
// machine behaves as expected.
syncTransitionReqs chan *syncTransitionReq
// historicalSyncReqs is a channel that serves as a signal for the
// gossip syncer to perform a historical sync. Theese can only be done
// once the gossip syncer is in a chansSynced state to ensure its state
// machine behaves as expected.
historicalSyncReqs chan struct{}
// genHistoricalChanRangeQuery when true signals to the gossip syncer
// that it should request the remote peer for all of its known channel
// IDs starting from the genesis block of the chain. This can only
// happen if the gossip syncer receives a request to attempt a
// historical sync. It can be unset if the syncer ever transitions from
// PassiveSync to ActiveSync.
genHistoricalChanRangeQuery bool
// gossipMsgs is a channel that all messages from the target peer will
// be sent over.
@ -183,10 +261,6 @@ type gossipSyncer struct {
// state.
newChansToQuery []lnwire.ShortChannelID
// peerPub is the public key of the peer we're syncing with, serialized
// in compressed format.
peerPub [33]byte
cfg gossipSyncerCfg
// rateLimiter dictates the frequency with which we will reply to gossip
@ -195,15 +269,19 @@ type gossipSyncer struct {
// number of queries.
rateLimiter *rate.Limiter
// syncedSignal is a channel that, if set, will be closed when the
// GossipSyncer reaches its terminal chansSynced state.
syncedSignal chan struct{}
sync.Mutex
quit chan struct{}
wg sync.WaitGroup
}
// newGossiperSyncer returns a new instance of the gossipSyncer populated using
// newGossipSyncer returns a new instance of the GossipSyncer populated using
// the passed config.
func newGossiperSyncer(cfg gossipSyncerCfg) *gossipSyncer {
func newGossipSyncer(cfg gossipSyncerCfg) *GossipSyncer {
// If no parameter was specified for max undelayed query replies, set it
// to the default of 5 queries.
if cfg.maxUndelayedQueryReplies <= 0 {
@ -225,57 +303,48 @@ func newGossiperSyncer(cfg gossipSyncerCfg) *gossipSyncer {
interval, cfg.maxUndelayedQueryReplies,
)
return &gossipSyncer{
cfg: cfg,
rateLimiter: rateLimiter,
gossipMsgs: make(chan lnwire.Message, 100),
quit: make(chan struct{}),
return &GossipSyncer{
cfg: cfg,
rateLimiter: rateLimiter,
syncTransitionReqs: make(chan *syncTransitionReq),
historicalSyncReqs: make(chan struct{}),
gossipMsgs: make(chan lnwire.Message, 100),
quit: make(chan struct{}),
}
}
// Start starts the gossipSyncer and any goroutines that it needs to carry out
// Start starts the GossipSyncer and any goroutines that it needs to carry out
// its duties.
func (g *gossipSyncer) Start() error {
if !atomic.CompareAndSwapUint32(&g.started, 0, 1) {
return nil
}
func (g *GossipSyncer) Start() {
g.started.Do(func() {
log.Debugf("Starting GossipSyncer(%x)", g.cfg.peerPub[:])
log.Debugf("Starting gossipSyncer(%x)", g.peerPub[:])
g.wg.Add(1)
go g.channelGraphSyncer()
return nil
g.wg.Add(1)
go g.channelGraphSyncer()
})
}
// Stop signals the gossipSyncer for a graceful exit, then waits until it has
// Stop signals the GossipSyncer for a graceful exit, then waits until it has
// exited.
func (g *gossipSyncer) Stop() error {
if !atomic.CompareAndSwapUint32(&g.stopped, 0, 1) {
return nil
}
close(g.quit)
g.wg.Wait()
return nil
func (g *GossipSyncer) Stop() {
g.stopped.Do(func() {
close(g.quit)
g.wg.Wait()
})
}
// channelGraphSyncer is the main goroutine responsible for ensuring that we
// properly channel graph state with the remote peer, and also that we only
// send them messages which actually pass their defined update horizon.
func (g *gossipSyncer) channelGraphSyncer() {
func (g *GossipSyncer) channelGraphSyncer() {
defer g.wg.Done()
// TODO(roasbeef): also add ability to force transition back to syncing
// chans
// * needed if we want to sync chan state very few blocks?
for {
state := atomic.LoadUint32(&g.state)
log.Debugf("gossipSyncer(%x): state=%v", g.peerPub[:],
syncerState(state))
state := g.syncState()
syncType := g.SyncType()
log.Debugf("GossipSyncer(%x): state=%v, type=%v",
g.cfg.peerPub[:], state, syncType)
switch syncerState(state) {
// When we're in this state, we're trying to synchronize our
@ -286,7 +355,9 @@ func (g *gossipSyncer) channelGraphSyncer() {
case syncingChans:
// If we're in this state, then we'll send the remote
// peer our opening QueryChannelRange message.
queryRangeMsg, err := g.genChanRangeQuery()
queryRangeMsg, err := g.genChanRangeQuery(
g.genHistoricalChanRangeQuery,
)
if err != nil {
log.Errorf("unable to gen chan range "+
"query: %v", err)
@ -302,7 +373,7 @@ func (g *gossipSyncer) channelGraphSyncer() {
// With the message sent successfully, we'll transition
// into the next state where we wait for their reply.
atomic.StoreUint32(&g.state, uint32(waitingQueryRangeReply))
g.setSyncState(waitingQueryRangeReply)
// In this state, we've sent out our initial channel range
// query and are waiting for the final response from the remote
@ -359,13 +430,13 @@ func (g *gossipSyncer) channelGraphSyncer() {
// If this wasn't our last query, then we'll need to
// transition to our waiting state.
if !done {
atomic.StoreUint32(&g.state, uint32(waitingQueryChanReply))
g.setSyncState(waitingQueryChanReply)
continue
}
// If we're fully synchronized, then we can transition
// to our terminal state.
atomic.StoreUint32(&g.state, uint32(chansSynced))
g.setSyncState(chansSynced)
// In this state, we've just sent off a new query for channels
// that we don't yet know of. We'll remain in this state until
@ -382,7 +453,7 @@ func (g *gossipSyncer) channelGraphSyncer() {
// state to send of the remaining query chunks.
_, ok := msg.(*lnwire.ReplyShortChanIDsEnd)
if ok {
atomic.StoreUint32(&g.state, uint32(queryNewChannels))
g.setSyncState(queryNewChannels)
continue
}
@ -401,35 +472,30 @@ func (g *gossipSyncer) channelGraphSyncer() {
// This is our final terminal state where we'll only reply to
// any further queries by the remote peer.
case chansSynced:
g.Lock()
if g.syncedSignal != nil {
close(g.syncedSignal)
g.syncedSignal = nil
}
g.Unlock()
// If we haven't yet sent out our update horizon, and
// we want to receive real-time channel updates, we'll
// do so now.
if g.localUpdateHorizon == nil && g.cfg.syncChanUpdates {
// TODO(roasbeef): query DB for most recent
// update?
// We'll give an hours room in our update
// horizon to ensure we don't miss any newer
// items.
updateHorizon := time.Now().Add(-time.Hour * 1)
log.Infof("gossipSyncer(%x): applying "+
"gossipFilter(start=%v)", g.peerPub[:],
updateHorizon)
g.localUpdateHorizon = &lnwire.GossipTimestampRange{
ChainHash: g.cfg.chainHash,
FirstTimestamp: uint32(updateHorizon.Unix()),
TimestampRange: math.MaxUint32,
}
err := g.cfg.sendToPeer(g.localUpdateHorizon)
if g.localUpdateHorizon == nil && syncType == ActiveSync {
err := g.sendGossipTimestampRange(
time.Now(), math.MaxUint32,
)
if err != nil {
log.Errorf("unable to send update "+
"horizon: %v", err)
log.Errorf("Unable to send update "+
"horizon to %x: %v",
g.cfg.peerPub, err)
}
}
// With our horizon set, we'll simply reply to any new
// message and exit if needed.
// messages or process any state transitions and exit if
// needed.
select {
case msg := <-g.gossipMsgs:
err := g.replyPeerQueries(msg)
@ -438,6 +504,12 @@ func (g *gossipSyncer) channelGraphSyncer() {
"query: %v", err)
}
case req := <-g.syncTransitionReqs:
req.errChan <- g.handleSyncTransition(req)
case <-g.historicalSyncReqs:
g.handleHistoricalSync()
case <-g.quit:
return
}
@ -445,19 +517,50 @@ func (g *gossipSyncer) channelGraphSyncer() {
}
}
// sendGossipTimestampRange constructs and sets a GossipTimestampRange for the
// syncer and sends it to the remote peer.
func (g *GossipSyncer) sendGossipTimestampRange(firstTimestamp time.Time,
timestampRange uint32) error {
endTimestamp := firstTimestamp.Add(
time.Duration(timestampRange) * time.Second,
)
log.Infof("GossipSyncer(%x): applying gossipFilter(start=%v, end=%v)",
g.cfg.peerPub[:], firstTimestamp, endTimestamp)
localUpdateHorizon := &lnwire.GossipTimestampRange{
ChainHash: g.cfg.chainHash,
FirstTimestamp: uint32(firstTimestamp.Unix()),
TimestampRange: timestampRange,
}
if err := g.cfg.sendToPeer(localUpdateHorizon); err != nil {
return err
}
if firstTimestamp == zeroTimestamp && timestampRange == 0 {
g.localUpdateHorizon = nil
} else {
g.localUpdateHorizon = localUpdateHorizon
}
return nil
}
// synchronizeChanIDs is called by the channelGraphSyncer when we need to query
// the remote peer for its known set of channel IDs within a particular block
// range. This method will be called continually until the entire range has
// been queried for with a response received. We'll chunk our requests as
// required to ensure they fit into a single message. We may re-renter this
// state in the case that chunking is required.
func (g *gossipSyncer) synchronizeChanIDs() (bool, error) {
func (g *GossipSyncer) synchronizeChanIDs() (bool, error) {
// If we're in this state yet there are no more new channels to query
// for, then we'll transition to our final synced state and return true
// to signal that we're fully synchronized.
if len(g.newChansToQuery) == 0 {
log.Infof("gossipSyncer(%x): no more chans to query",
g.peerPub[:])
log.Infof("GossipSyncer(%x): no more chans to query",
g.cfg.peerPub[:])
return true, nil
}
@ -479,8 +582,8 @@ func (g *gossipSyncer) synchronizeChanIDs() (bool, error) {
g.newChansToQuery = g.newChansToQuery[g.cfg.chunkSize:]
}
log.Infof("gossipSyncer(%x): querying for %v new channels",
g.peerPub[:], len(queryChunk))
log.Infof("GossipSyncer(%x): querying for %v new channels",
g.cfg.peerPub[:], len(queryChunk))
// With our chunk obtained, we'll send over our next query, then return
// false indicating that we're net yet fully synced.
@ -493,16 +596,16 @@ func (g *gossipSyncer) synchronizeChanIDs() (bool, error) {
return false, err
}
// processChanRangeReply is called each time the gossipSyncer receives a new
// processChanRangeReply is called each time the GossipSyncer receives a new
// reply to the initial range query to discover new channels that it didn't
// previously know of.
func (g *gossipSyncer) processChanRangeReply(msg *lnwire.ReplyChannelRange) error {
func (g *GossipSyncer) processChanRangeReply(msg *lnwire.ReplyChannelRange) error {
g.bufferedChanRangeReplies = append(
g.bufferedChanRangeReplies, msg.ShortChanIDs...,
)
log.Infof("gossipSyncer(%x): buffering chan range reply of size=%v",
g.peerPub[:], len(msg.ShortChanIDs))
log.Infof("GossipSyncer(%x): buffering chan range reply of size=%v",
g.cfg.peerPub[:], len(msg.ShortChanIDs))
// If this isn't the last response, then we can exit as we've already
// buffered the latest portion of the streaming reply.
@ -510,8 +613,8 @@ func (g *gossipSyncer) processChanRangeReply(msg *lnwire.ReplyChannelRange) erro
return nil
}
log.Infof("gossipSyncer(%x): filtering through %v chans", g.peerPub[:],
len(g.bufferedChanRangeReplies))
log.Infof("GossipSyncer(%x): filtering through %v chans",
g.cfg.peerPub[:], len(g.bufferedChanRangeReplies))
// Otherwise, this is the final response, so we'll now check to see
// which channels they know of that we don't.
@ -530,28 +633,31 @@ func (g *gossipSyncer) processChanRangeReply(msg *lnwire.ReplyChannelRange) erro
// If there aren't any channels that we don't know of, then we can
// switch straight to our terminal state.
if len(newChans) == 0 {
log.Infof("gossipSyncer(%x): remote peer has no new chans",
g.peerPub[:])
log.Infof("GossipSyncer(%x): remote peer has no new chans",
g.cfg.peerPub[:])
atomic.StoreUint32(&g.state, uint32(chansSynced))
g.setSyncState(chansSynced)
return nil
}
// Otherwise, we'll set the set of channels that we need to query for
// the next state, and also transition our state.
g.newChansToQuery = newChans
atomic.StoreUint32(&g.state, uint32(queryNewChannels))
g.setSyncState(queryNewChannels)
log.Infof("gossipSyncer(%x): starting query for %v new chans",
g.peerPub[:], len(newChans))
log.Infof("GossipSyncer(%x): starting query for %v new chans",
g.cfg.peerPub[:], len(newChans))
return nil
}
// genChanRangeQuery generates the initial message we'll send to the remote
// party when we're kicking off the channel graph synchronization upon
// connection.
func (g *gossipSyncer) genChanRangeQuery() (*lnwire.QueryChannelRange, error) {
// connection. The historicalQuery boolean can be used to generate a query from
// the genesis block of the chain.
func (g *GossipSyncer) genChanRangeQuery(
historicalQuery bool) (*lnwire.QueryChannelRange, error) {
// First, we'll query our channel graph time series for its highest
// known channel ID.
newestChan, err := g.cfg.channelSeries.HighestChanID(g.cfg.chainHash)
@ -559,23 +665,23 @@ func (g *gossipSyncer) genChanRangeQuery() (*lnwire.QueryChannelRange, error) {
return nil, err
}
// Once we have the chan ID of the newest, we'll obtain the block
// height of the channel, then subtract our default horizon to ensure
// we don't miss any channels. By default, we go back 1 day from the
// newest channel.
// Once we have the chan ID of the newest, we'll obtain the block height
// of the channel, then subtract our default horizon to ensure we don't
// miss any channels. By default, we go back 1 day from the newest
// channel, unless we're attempting a historical sync, where we'll
// actually start from the genesis block instead.
var startHeight uint32
switch {
case newestChan.BlockHeight <= chanRangeQueryBuffer:
case historicalQuery:
fallthrough
case newestChan.BlockHeight == 0:
case newestChan.BlockHeight <= chanRangeQueryBuffer:
startHeight = 0
default:
startHeight = uint32(newestChan.BlockHeight - chanRangeQueryBuffer)
}
log.Infof("gossipSyncer(%x): requesting new chans from height=%v "+
"and %v blocks after", g.peerPub[:], startHeight,
log.Infof("GossipSyncer(%x): requesting new chans from height=%v "+
"and %v blocks after", g.cfg.peerPub[:], startHeight,
math.MaxUint32-startHeight)
// Finally, we'll craft the channel range query, using our starting
@ -590,7 +696,7 @@ func (g *gossipSyncer) genChanRangeQuery() (*lnwire.QueryChannelRange, error) {
// replyPeerQueries is called in response to any query by the remote peer.
// We'll examine our state and send back our best response.
func (g *gossipSyncer) replyPeerQueries(msg lnwire.Message) error {
func (g *GossipSyncer) replyPeerQueries(msg lnwire.Message) error {
reservation := g.rateLimiter.Reserve()
delay := reservation.Delay()
@ -598,8 +704,8 @@ func (g *gossipSyncer) replyPeerQueries(msg lnwire.Message) error {
// responses back to the remote peer. This can help prevent DOS attacks
// where the remote peer spams us endlessly.
if delay > 0 {
log.Infof("gossipSyncer(%x): rate limiting gossip replies, "+
"responding in %s", g.peerPub[:], delay)
log.Infof("GossipSyncer(%x): rate limiting gossip replies, "+
"responding in %s", g.cfg.peerPub[:], delay)
select {
case <-time.After(delay):
@ -630,9 +736,9 @@ func (g *gossipSyncer) replyPeerQueries(msg lnwire.Message) error {
// meet the channel range, then chunk our responses to the remote node. We also
// ensure that our final fragment carries the "complete" bit to indicate the
// end of our streaming response.
func (g *gossipSyncer) replyChanRangeQuery(query *lnwire.QueryChannelRange) error {
log.Infof("gossipSyncer(%x): filtering chan range: start_height=%v, "+
"num_blocks=%v", g.peerPub[:], query.FirstBlockHeight,
func (g *GossipSyncer) replyChanRangeQuery(query *lnwire.QueryChannelRange) error {
log.Infof("GossipSyncer(%x): filtering chan range: start_height=%v, "+
"num_blocks=%v", g.cfg.peerPub[:], query.FirstBlockHeight,
query.NumBlocks)
// Next, we'll consult the time series to obtain the set of known
@ -666,16 +772,16 @@ func (g *gossipSyncer) replyChanRangeQuery(query *lnwire.QueryChannelRange) erro
if isFinalChunk {
channelChunk = channelRange[numChansSent:]
log.Infof("gossipSyncer(%x): sending final chan "+
"range chunk, size=%v", g.peerPub[:], len(channelChunk))
log.Infof("GossipSyncer(%x): sending final chan "+
"range chunk, size=%v", g.cfg.peerPub[:],
len(channelChunk))
} else {
// Otherwise, we'll only send off a fragment exactly
// sized to the proper chunk size.
channelChunk = channelRange[numChansSent : numChansSent+g.cfg.chunkSize]
log.Infof("gossipSyncer(%x): sending range chunk of "+
"size=%v", g.peerPub[:], len(channelChunk))
log.Infof("GossipSyncer(%x): sending range chunk of "+
"size=%v", g.cfg.peerPub[:], len(channelChunk))
}
// With our chunk assembled, we'll now send to the remote peer
@ -707,7 +813,7 @@ func (g *gossipSyncer) replyChanRangeQuery(query *lnwire.QueryChannelRange) erro
// node for information concerning a set of short channel ID's. Our response
// will be sent in a streaming chunked manner to ensure that we remain below
// the current transport level message size.
func (g *gossipSyncer) replyShortChanIDs(query *lnwire.QueryShortChanIDs) error {
func (g *GossipSyncer) replyShortChanIDs(query *lnwire.QueryShortChanIDs) error {
// Before responding, we'll check to ensure that the remote peer is
// querying for the same chain that we're on. If not, we'll send back a
// response with a complete value of zero to indicate we're on a
@ -724,13 +830,13 @@ func (g *gossipSyncer) replyShortChanIDs(query *lnwire.QueryShortChanIDs) error
}
if len(query.ShortChanIDs) == 0 {
log.Infof("gossipSyncer(%x): ignoring query for blank short chan ID's",
g.peerPub[:])
log.Infof("GossipSyncer(%x): ignoring query for blank short chan ID's",
g.cfg.peerPub[:])
return nil
}
log.Infof("gossipSyncer(%x): fetching chan anns for %v chans",
g.peerPub[:], len(query.ShortChanIDs))
log.Infof("GossipSyncer(%x): fetching chan anns for %v chans",
g.cfg.peerPub[:], len(query.ShortChanIDs))
// Now that we know we're on the same chain, we'll query the channel
// time series for the set of messages that we know of which satisfies
@ -766,7 +872,7 @@ func (g *gossipSyncer) replyShortChanIDs(query *lnwire.QueryShortChanIDs) error
// ApplyGossipFilter applies a gossiper filter sent by the remote node to the
// state machine. Once applied, we'll ensure that we don't forward any messages
// to the peer that aren't within the time range of the filter.
func (g *gossipSyncer) ApplyGossipFilter(filter *lnwire.GossipTimestampRange) error {
func (g *GossipSyncer) ApplyGossipFilter(filter *lnwire.GossipTimestampRange) error {
g.Lock()
g.remoteUpdateHorizon = filter
@ -787,8 +893,8 @@ func (g *gossipSyncer) ApplyGossipFilter(filter *lnwire.GossipTimestampRange) er
return err
}
log.Infof("gossipSyncer(%x): applying new update horizon: start=%v, "+
"end=%v, backlog_size=%v", g.peerPub[:], startTime, endTime,
log.Infof("GossipSyncer(%x): applying new update horizon: start=%v, "+
"end=%v, backlog_size=%v", g.cfg.peerPub[:], startTime, endTime,
len(newUpdatestoSend))
// If we don't have any to send, then we can return early.
@ -813,17 +919,19 @@ func (g *gossipSyncer) ApplyGossipFilter(filter *lnwire.GossipTimestampRange) er
// FilterGossipMsgs takes a set of gossip messages, and only send it to a peer
// iff the message is within the bounds of their set gossip filter. If the peer
// doesn't have a gossip filter set, then no messages will be forwarded.
func (g *gossipSyncer) FilterGossipMsgs(msgs ...msgWithSenders) {
func (g *GossipSyncer) FilterGossipMsgs(msgs ...msgWithSenders) {
// If the peer doesn't have an update horizon set, then we won't send
// it any new update messages.
if g.remoteUpdateHorizon == nil {
return
}
// If we've been signalled to exit, or are exiting, then we'll stop
// If we've been signaled to exit, or are exiting, then we'll stop
// short.
if atomic.LoadUint32(&g.stopped) == 1 {
select {
case <-g.quit:
return
default:
}
// TODO(roasbeef): need to ensure that peer still online...send msg to
@ -858,7 +966,8 @@ func (g *gossipSyncer) FilterGossipMsgs(msgs ...msgWithSenders) {
passesFilter := func(timeStamp uint32) bool {
t := time.Unix(int64(timeStamp), 0)
return t.After(startTime) && t.Before(endTime)
return t.Equal(startTime) ||
(t.After(startTime) && t.Before(endTime))
}
msgsToSend := make([]lnwire.Message, 0, len(msgs))
@ -866,7 +975,7 @@ func (g *gossipSyncer) FilterGossipMsgs(msgs ...msgWithSenders) {
// If the target peer is the peer that sent us this message,
// then we'll exit early as we don't need to filter this
// message.
if _, ok := msg.senders[g.peerPub]; ok {
if _, ok := msg.senders[g.cfg.peerPub]; ok {
continue
}
@ -920,8 +1029,8 @@ func (g *gossipSyncer) FilterGossipMsgs(msgs ...msgWithSenders) {
}
}
log.Tracef("gossipSyncer(%x): filtered gossip msgs: set=%v, sent=%v",
g.peerPub[:], len(msgs), len(msgsToSend))
log.Tracef("GossipSyncer(%x): filtered gossip msgs: set=%v, sent=%v",
g.cfg.peerPub[:], len(msgs), len(msgsToSend))
if len(msgsToSend) == 0 {
return
@ -932,7 +1041,7 @@ func (g *gossipSyncer) FilterGossipMsgs(msgs ...msgWithSenders) {
// ProcessQueryMsg is used by outside callers to pass new channel time series
// queries to the internal processing goroutine.
func (g *gossipSyncer) ProcessQueryMsg(msg lnwire.Message, peerQuit <-chan struct{}) {
func (g *GossipSyncer) ProcessQueryMsg(msg lnwire.Message, peerQuit <-chan struct{}) {
select {
case g.gossipMsgs <- msg:
case <-peerQuit:
@ -940,7 +1049,149 @@ func (g *gossipSyncer) ProcessQueryMsg(msg lnwire.Message, peerQuit <-chan struc
}
}
// SyncerState returns the current syncerState of the target gossipSyncer.
func (g *gossipSyncer) SyncState() syncerState {
// setSyncState sets the gossip syncer's state to the given state.
func (g *GossipSyncer) setSyncState(state syncerState) {
atomic.StoreUint32(&g.state, uint32(state))
}
// syncState returns the current syncerState of the target GossipSyncer.
func (g *GossipSyncer) syncState() syncerState {
return syncerState(atomic.LoadUint32(&g.state))
}
// ResetSyncedSignal returns a channel that will be closed in order to serve as
// a signal for when the GossipSyncer has reached its chansSynced state.
func (g *GossipSyncer) ResetSyncedSignal() chan struct{} {
g.Lock()
defer g.Unlock()
syncedSignal := make(chan struct{})
syncState := syncerState(atomic.LoadUint32(&g.state))
if syncState == chansSynced {
close(syncedSignal)
return syncedSignal
}
g.syncedSignal = syncedSignal
return g.syncedSignal
}
// ProcessSyncTransition sends a request to the gossip syncer to transition its
// sync type to a new one.
//
// NOTE: This can only be done once the gossip syncer has reached its final
// chansSynced state.
func (g *GossipSyncer) ProcessSyncTransition(newSyncType SyncerType) error {
errChan := make(chan error, 1)
select {
case g.syncTransitionReqs <- &syncTransitionReq{
newSyncType: newSyncType,
errChan: errChan,
}:
case <-time.After(syncTransitionTimeout):
return ErrSyncTransitionTimeout
case <-g.quit:
return ErrGossipSyncerExiting
}
select {
case err := <-errChan:
return err
case <-g.quit:
return ErrGossipSyncerExiting
}
}
// handleSyncTransition handles a new sync type transition request.
//
// NOTE: The gossip syncer might have another sync state as a result of this
// transition.
func (g *GossipSyncer) handleSyncTransition(req *syncTransitionReq) error {
// Return early from any NOP sync transitions.
syncType := g.SyncType()
if syncType == req.newSyncType {
return nil
}
log.Debugf("GossipSyncer(%x): transitioning from %v to %v",
g.cfg.peerPub, syncType, req.newSyncType)
var (
firstTimestamp time.Time
timestampRange uint32
newState syncerState
)
switch req.newSyncType {
// If an active sync has been requested, then we should resume receiving
// new graph updates from the remote peer.
case ActiveSync:
firstTimestamp = time.Now()
timestampRange = math.MaxUint32
newState = syncingChans
// We'll set genHistoricalChanRangeQuery to false since in order
// to not perform another historical sync if we previously have.
g.genHistoricalChanRangeQuery = false
// If a PassiveSync transition has been requested, then we should no
// longer receive any new updates from the remote peer. We can do this
// by setting our update horizon to a range in the past ensuring no
// graph updates match the timestamp range.
case PassiveSync:
firstTimestamp = zeroTimestamp
timestampRange = 0
newState = chansSynced
default:
return fmt.Errorf("unhandled sync transition %v",
req.newSyncType)
}
err := g.sendGossipTimestampRange(firstTimestamp, timestampRange)
if err != nil {
return fmt.Errorf("unable to send local update horizon: %v", err)
}
g.setSyncState(newState)
g.setSyncType(req.newSyncType)
return nil
}
// setSyncType sets the gossip syncer's sync type to the given type.
func (g *GossipSyncer) setSyncType(syncType SyncerType) {
atomic.StoreUint32(&g.syncType, uint32(syncType))
}
// SyncType returns the current SyncerType of the target GossipSyncer.
func (g *GossipSyncer) SyncType() SyncerType {
return SyncerType(atomic.LoadUint32(&g.syncType))
}
// historicalSync sends a request to the gossip syncer to perofmr a historical
// sync.
//
// NOTE: This can only be done once the gossip syncer has reached its final
// chansSynced state.
func (g *GossipSyncer) historicalSync() error {
select {
case g.historicalSyncReqs <- struct{}{}:
return nil
case <-time.After(syncTransitionTimeout):
return ErrSyncTransitionTimeout
case <-g.quit:
return ErrGossiperShuttingDown
}
}
// handleHistoricalSync handles a request to the gossip syncer to perform a
// historical sync.
func (g *GossipSyncer) handleHistoricalSync() {
// We'll go back to our initial syncingChans state in order to request
// the remote peer to give us all of the channel IDs they know of
// starting from the genesis block.
g.genHistoricalChanRangeQuery = true
g.setSyncState(syncingChans)
}

View File

@ -13,7 +13,9 @@ import (
)
const (
defaultEncoding = lnwire.EncodingSortedPlain
defaultEncoding = lnwire.EncodingSortedPlain
latestKnownHeight = 1337
startHeight = latestKnownHeight - chanRangeQueryBuffer
)
var (
@ -116,21 +118,20 @@ var _ ChannelGraphTimeSeries = (*mockChannelGraphTimeSeries)(nil)
func newTestSyncer(hID lnwire.ShortChannelID,
encodingType lnwire.ShortChanIDEncoding, chunkSize int32,
) (chan []lnwire.Message, *gossipSyncer, *mockChannelGraphTimeSeries) {
) (chan []lnwire.Message, *GossipSyncer, *mockChannelGraphTimeSeries) {
msgChan := make(chan []lnwire.Message, 20)
cfg := gossipSyncerCfg{
syncChanUpdates: true,
channelSeries: newMockChannelGraphTimeSeries(hID),
encodingType: encodingType,
chunkSize: chunkSize,
channelSeries: newMockChannelGraphTimeSeries(hID),
encodingType: encodingType,
chunkSize: chunkSize,
sendToPeer: func(msgs ...lnwire.Message) error {
msgChan <- msgs
return nil
},
delayedQueryReplyInterval: 2 * time.Second,
}
syncer := newGossiperSyncer(cfg)
syncer := newGossipSyncer(cfg)
return msgChan, syncer, cfg.channelSeries.(*mockChannelGraphTimeSeries)
}
@ -140,7 +141,7 @@ func newTestSyncer(hID lnwire.ShortChannelID,
func TestGossipSyncerFilterGossipMsgsNoHorizon(t *testing.T) {
t.Parallel()
// First, we'll create a gossipSyncer instance with a canned sendToPeer
// First, we'll create a GossipSyncer instance with a canned sendToPeer
// message to allow us to intercept their potential sends.
msgChan, syncer, _ := newTestSyncer(
lnwire.NewShortChanIDFromInt(10), defaultEncoding,
@ -185,7 +186,7 @@ func unixStamp(a int64) uint32 {
func TestGossipSyncerFilterGossipMsgsAllInMemory(t *testing.T) {
t.Parallel()
// First, we'll create a gossipSyncer instance with a canned sendToPeer
// First, we'll create a GossipSyncer instance with a canned sendToPeer
// message to allow us to intercept their potential sends.
msgChan, syncer, chanSeries := newTestSyncer(
lnwire.NewShortChanIDFromInt(10), defaultEncoding,
@ -315,7 +316,7 @@ func TestGossipSyncerFilterGossipMsgsAllInMemory(t *testing.T) {
func TestGossipSyncerApplyGossipFilter(t *testing.T) {
t.Parallel()
// First, we'll create a gossipSyncer instance with a canned sendToPeer
// First, we'll create a GossipSyncer instance with a canned sendToPeer
// message to allow us to intercept their potential sends.
msgChan, syncer, chanSeries := newTestSyncer(
lnwire.NewShortChanIDFromInt(10), defaultEncoding,
@ -413,7 +414,7 @@ func TestGossipSyncerApplyGossipFilter(t *testing.T) {
func TestGossipSyncerReplyShortChanIDsWrongChainHash(t *testing.T) {
t.Parallel()
// First, we'll create a gossipSyncer instance with a canned sendToPeer
// First, we'll create a GossipSyncer instance with a canned sendToPeer
// message to allow us to intercept their potential sends.
msgChan, syncer, _ := newTestSyncer(
lnwire.NewShortChanIDFromInt(10), defaultEncoding,
@ -464,7 +465,7 @@ func TestGossipSyncerReplyShortChanIDsWrongChainHash(t *testing.T) {
func TestGossipSyncerReplyShortChanIDs(t *testing.T) {
t.Parallel()
// First, we'll create a gossipSyncer instance with a canned sendToPeer
// First, we'll create a GossipSyncer instance with a canned sendToPeer
// message to allow us to intercept their potential sends.
msgChan, syncer, chanSeries := newTestSyncer(
lnwire.NewShortChanIDFromInt(10), defaultEncoding,
@ -718,7 +719,7 @@ func TestGossipSyncerReplyChanRangeQueryNoNewChans(t *testing.T) {
func TestGossipSyncerGenChanRangeQuery(t *testing.T) {
t.Parallel()
// First, we'll create a gossipSyncer instance with a canned sendToPeer
// First, we'll create a GossipSyncer instance with a canned sendToPeer
// message to allow us to intercept their potential sends.
const startingHeight = 200
_, syncer, _ := newTestSyncer(
@ -729,7 +730,7 @@ func TestGossipSyncerGenChanRangeQuery(t *testing.T) {
// If we now ask the syncer to generate an initial range query, it
// should return a start height that's back chanRangeQueryBuffer
// blocks.
rangeQuery, err := syncer.genChanRangeQuery()
rangeQuery, err := syncer.genChanRangeQuery(false)
if err != nil {
t.Fatalf("unable to resp: %v", err)
}
@ -742,7 +743,22 @@ func TestGossipSyncerGenChanRangeQuery(t *testing.T) {
}
if rangeQuery.NumBlocks != math.MaxUint32-firstHeight {
t.Fatalf("wrong num blocks: expected %v, got %v",
rangeQuery.NumBlocks, math.MaxUint32-firstHeight)
math.MaxUint32-firstHeight, rangeQuery.NumBlocks)
}
// Generating a historical range query should result in a start height
// of 0.
rangeQuery, err = syncer.genChanRangeQuery(true)
if err != nil {
t.Fatalf("unable to resp: %v", err)
}
if rangeQuery.FirstBlockHeight != 0 {
t.Fatalf("incorrect chan range query: expected %v, %v", 0,
rangeQuery.FirstBlockHeight)
}
if rangeQuery.NumBlocks != math.MaxUint32 {
t.Fatalf("wrong num blocks: expected %v, got %v",
math.MaxUint32, rangeQuery.NumBlocks)
}
}
@ -753,7 +769,7 @@ func TestGossipSyncerGenChanRangeQuery(t *testing.T) {
func TestGossipSyncerProcessChanRangeReply(t *testing.T) {
t.Parallel()
// First, we'll create a gossipSyncer instance with a canned sendToPeer
// First, we'll create a GossipSyncer instance with a canned sendToPeer
// message to allow us to intercept their potential sends.
_, syncer, chanSeries := newTestSyncer(
lnwire.NewShortChanIDFromInt(10), defaultEncoding, defaultChunkSize,
@ -827,7 +843,7 @@ func TestGossipSyncerProcessChanRangeReply(t *testing.T) {
t.Fatalf("unable to process reply: %v", err)
}
if syncer.SyncState() != queryNewChannels {
if syncer.syncState() != queryNewChannels {
t.Fatalf("wrong state: expected %v instead got %v",
queryNewChannels, syncer.state)
}
@ -860,7 +876,7 @@ func TestGossipSyncerProcessChanRangeReply(t *testing.T) {
t.Fatalf("unable to process reply: %v", err)
}
if syncer.SyncState() != chansSynced {
if syncer.syncState() != chansSynced {
t.Fatalf("wrong state: expected %v instead got %v",
chansSynced, syncer.state)
}
@ -878,7 +894,7 @@ func TestGossipSyncerSynchronizeChanIDs(t *testing.T) {
// queries: two full chunks, and one lingering chunk.
const chunkSize = 2
// First, we'll create a gossipSyncer instance with a canned sendToPeer
// First, we'll create a GossipSyncer instance with a canned sendToPeer
// message to allow us to intercept their potential sends.
msgChan, syncer, _ := newTestSyncer(
lnwire.NewShortChanIDFromInt(10), defaultEncoding, chunkSize,
@ -997,7 +1013,7 @@ func TestGossipSyncerDelayDOS(t *testing.T) {
const numDelayedQueries = 2
const delayTolerance = time.Millisecond * 200
// First, we'll create two gossipSyncer instances with a canned
// First, we'll create two GossipSyncer instances with a canned
// sendToPeer message to allow us to intercept their potential sends.
startHeight := lnwire.ShortChannelID{
BlockHeight: 1144,
@ -1390,7 +1406,7 @@ func TestGossipSyncerRoutineSync(t *testing.T) {
// queries: two full chunks, and one lingering chunk.
const chunkSize = 2
// First, we'll create two gossipSyncer instances with a canned
// First, we'll create two GossipSyncer instances with a canned
// sendToPeer message to allow us to intercept their potential sends.
startHeight := lnwire.ShortChannelID{
BlockHeight: 1144,
@ -1730,7 +1746,7 @@ func TestGossipSyncerAlreadySynced(t *testing.T) {
// queries: two full chunks, and one lingering chunk.
const chunkSize = 2
// First, we'll create two gossipSyncer instances with a canned
// First, we'll create two GossipSyncer instances with a canned
// sendToPeer message to allow us to intercept their potential sends.
startHeight := lnwire.ShortChannelID{
BlockHeight: 1144,
@ -1941,3 +1957,236 @@ func TestGossipSyncerAlreadySynced(t *testing.T) {
}
}
}
// TestGossipSyncerSyncTransitions ensures that the gossip syncer properly
// carries out its duties when accepting a new sync transition request.
func TestGossipSyncerSyncTransitions(t *testing.T) {
t.Parallel()
assertMsgSent := func(t *testing.T, msgChan chan []lnwire.Message,
msg lnwire.Message) {
t.Helper()
var msgSent lnwire.Message
select {
case msgs := <-msgChan:
if len(msgs) != 1 {
t.Fatal("expected to send a single message at "+
"a time, got %d", len(msgs))
}
msgSent = msgs[0]
case <-time.After(time.Second):
t.Fatalf("expected to send %T message", msg)
}
if !reflect.DeepEqual(msgSent, msg) {
t.Fatalf("expected to send message: %v\ngot: %v",
spew.Sdump(msg), spew.Sdump(msgSent))
}
}
tests := []struct {
name string
entrySyncType SyncerType
finalSyncType SyncerType
assert func(t *testing.T, msgChan chan []lnwire.Message,
syncer *GossipSyncer)
}{
{
name: "active to passive",
entrySyncType: ActiveSync,
finalSyncType: PassiveSync,
assert: func(t *testing.T, msgChan chan []lnwire.Message,
g *GossipSyncer) {
// When transitioning from active to passive, we
// should expect to see a new local update
// horizon sent to the remote peer indicating
// that it would not like to receive any future
// updates.
assertMsgSent(t, msgChan, &lnwire.GossipTimestampRange{
FirstTimestamp: uint32(zeroTimestamp.Unix()),
TimestampRange: 0,
})
syncState := g.syncState()
if syncState != chansSynced {
t.Fatalf("expected syncerState %v, "+
"got %v", chansSynced,
syncState)
}
},
},
{
name: "passive to active",
entrySyncType: PassiveSync,
finalSyncType: ActiveSync,
assert: func(t *testing.T, msgChan chan []lnwire.Message,
g *GossipSyncer) {
// When transitioning from historical to active,
// we should expect to see a new local update
// horizon sent to the remote peer indicating
// that it would like to receive any future
// updates.
firstTimestamp := uint32(time.Now().Unix())
assertMsgSent(t, msgChan, &lnwire.GossipTimestampRange{
FirstTimestamp: firstTimestamp,
TimestampRange: math.MaxUint32,
})
// The local update horizon should be followed
// by a QueryChannelRange message sent to the
// remote peer requesting all channels it
// knows of from the highest height the syncer
// knows of.
assertMsgSent(t, msgChan, &lnwire.QueryChannelRange{
FirstBlockHeight: startHeight,
NumBlocks: math.MaxUint32 - startHeight,
})
syncState := g.syncState()
if syncState != waitingQueryRangeReply {
t.Fatalf("expected syncerState %v, "+
"got %v", waitingQueryRangeReply,
syncState)
}
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
// We'll start each test by creating our syncer. We'll
// initialize it with a state of chansSynced, as that's
// the only time when it can process sync transitions.
msgChan, syncer, _ := newTestSyncer(
lnwire.ShortChannelID{
BlockHeight: latestKnownHeight,
},
defaultEncoding, defaultChunkSize,
)
syncer.setSyncState(chansSynced)
// We'll set the initial syncType to what the test
// demands.
syncer.setSyncType(test.entrySyncType)
// We'll then start the syncer in order to process the
// request.
syncer.Start()
defer syncer.Stop()
syncer.ProcessSyncTransition(test.finalSyncType)
// The syncer should now have the expected final
// SyncerType that the test expects.
syncType := syncer.SyncType()
if syncType != test.finalSyncType {
t.Fatalf("expected syncType %v, got %v",
test.finalSyncType, syncType)
}
// Finally, we'll run a set of assertions for each test
// to ensure the syncer performed its expected duties
// after processing its sync transition.
test.assert(t, msgChan, syncer)
})
}
}
// TestGossipSyncerHistoricalSync tests that a gossip syncer can perform a
// historical sync with the remote peer.
func TestGossipSyncerHistoricalSync(t *testing.T) {
t.Parallel()
// We'll create a new gossip syncer and manually override its state to
// chansSynced. This is necessary as the syncer can only process
// historical sync requests in this state.
msgChan, syncer, _ := newTestSyncer(
lnwire.ShortChannelID{BlockHeight: latestKnownHeight},
defaultEncoding, defaultChunkSize,
)
syncer.setSyncType(PassiveSync)
syncer.setSyncState(chansSynced)
syncer.Start()
defer syncer.Stop()
syncer.historicalSync()
// We should expect to see a single lnwire.QueryChannelRange message be
// sent to the remote peer with a FirstBlockHeight of 0.
expectedMsg := &lnwire.QueryChannelRange{
FirstBlockHeight: 0,
NumBlocks: math.MaxUint32,
}
select {
case msgs := <-msgChan:
if len(msgs) != 1 {
t.Fatalf("expected to send a single "+
"lnwire.QueryChannelRange message, got %d",
len(msgs))
}
if !reflect.DeepEqual(msgs[0], expectedMsg) {
t.Fatalf("expected to send message: %v\ngot: %v",
spew.Sdump(expectedMsg), spew.Sdump(msgs[0]))
}
case <-time.After(time.Second):
t.Fatalf("expected to send a lnwire.QueryChannelRange message")
}
}
// TestGossipSyncerSyncedSignal ensures that we receive a signal when a gossip
// syncer reaches its terminal chansSynced state.
func TestGossipSyncerSyncedSignal(t *testing.T) {
t.Parallel()
// We'll create a new gossip syncer and manually override its state to
// chansSynced.
_, syncer, _ := newTestSyncer(
lnwire.NewShortChanIDFromInt(10), defaultEncoding,
defaultChunkSize,
)
syncer.setSyncState(chansSynced)
// We'll go ahead and request a signal to be notified of when it reaches
// this state.
signalChan := syncer.ResetSyncedSignal()
// Starting the gossip syncer should cause the signal to be delivered.
syncer.Start()
select {
case <-signalChan:
case <-time.After(time.Second):
t.Fatal("expected to receive chansSynced signal")
}
syncer.Stop()
// We'll try this again, but this time we'll request the signal after
// the syncer is active and has already reached its chansSynced state.
_, syncer, _ = newTestSyncer(
lnwire.NewShortChanIDFromInt(10), defaultEncoding,
defaultChunkSize,
)
syncer.setSyncState(chansSynced)
syncer.Start()
defer syncer.Stop()
signalChan = syncer.ResetSyncedSignal()
// The signal should be delivered immediately.
select {
case <-signalChan:
case <-time.After(time.Second):
t.Fatal("expected to receive chansSynced signal")
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1257,6 +1257,26 @@ message Peer {
/// Ping time to this peer
int64 ping_time = 9 [json_name = "ping_time"];
enum SyncType {
/**
Denotes that we cannot determine the peer's current sync type.
*/
UNKNOWN_SYNC = 0;
/**
Denotes that we are actively receiving new graph updates from the peer.
*/
ACTIVE_SYNC = 1;
/**
Denotes that we are not receiving new graph updates from the peer.
*/
PASSIVE_SYNC = 2;
}
// The type of sync we are currently performing with this peer.
SyncType sync_type = 10 [json_name = "sync_type"];
}
message ListPeersRequest {

View File

@ -1305,6 +1305,16 @@
],
"default": "OPEN"
},
"PeerSyncType": {
"type": "string",
"enum": [
"UNKNOWN_SYNC",
"ACTIVE_SYNC",
"PASSIVE_SYNC"
],
"default": "UNKNOWN_SYNC",
"description": " - UNKNOWN_SYNC: *\nDenotes that we cannot determine the peer's current sync type.\n - ACTIVE_SYNC: *\nDenotes that we are actively receiving new graph updates from the peer.\n - PASSIVE_SYNC: *\nDenotes that we are not receiving new graph updates from the peer."
},
"PendingChannelsResponseClosedChannel": {
"type": "object",
"properties": {
@ -2856,6 +2866,10 @@
"type": "string",
"format": "int64",
"title": "/ Ping time to this peer"
},
"sync_type": {
"$ref": "#/definitions/PeerSyncType",
"description": "The type of sync we are currently performing with this peer."
}
}
},

15
peer.go
View File

@ -396,19 +396,16 @@ func (p *peer) initGossipSync() {
srvrLog.Infof("Negotiated chan series queries with %x",
p.pubKeyBytes[:])
// We'll only request channel updates from the remote peer if
// its enabled in the config, or we're already getting updates
// from enough peers.
//
// TODO(roasbeef): craft s.t. we only get updates from a few
// peers
recvUpdates := !cfg.NoChanUpdates
// Register the this peer's for gossip syncer with the gossiper.
// This is blocks synchronously to ensure the gossip syncer is
// registered with the gossiper before attempting to read
// messages from the remote peer.
p.server.authGossiper.InitSyncState(p, recvUpdates)
//
// TODO(wilmer): Only sync updates from non-channel peers. This
// requires an improved version of the current network
// bootstrapper to ensure we can find and connect to non-channel
// peers.
p.server.authGossiper.InitSyncState(p)
// If the remote peer has the initial sync feature bit set, then we'll
// being the synchronization protocol to exchange authenticated channel

View File

@ -34,6 +34,7 @@ import (
"github.com/lightningnetwork/lnd/chanbackup"
"github.com/lightningnetwork/lnd/channeldb"
"github.com/lightningnetwork/lnd/channelnotifier"
"github.com/lightningnetwork/lnd/discovery"
"github.com/lightningnetwork/lnd/htlcswitch"
"github.com/lightningnetwork/lnd/input"
"github.com/lightningnetwork/lnd/invoices"
@ -2012,9 +2013,36 @@ func (r *rpcServer) ListPeers(ctx context.Context,
satRecv += int64(c.TotalMSatReceived.ToSatoshis())
}
nodePub := serverPeer.addr.IdentityKey.SerializeCompressed()
nodePub := serverPeer.PubKey()
// Retrieve the peer's sync type. If we don't currently have a
// syncer for the peer, then we'll default to a passive sync.
// This can happen if the RPC is called while a peer is
// initializing.
syncer, ok := r.server.authGossiper.SyncManager().GossipSyncer(
nodePub,
)
var lnrpcSyncType lnrpc.Peer_SyncType
if !ok {
rpcsLog.Warnf("Gossip syncer for peer=%x not found",
nodePub)
lnrpcSyncType = lnrpc.Peer_UNKNOWN_SYNC
} else {
syncType := syncer.SyncType()
switch syncType {
case discovery.ActiveSync:
lnrpcSyncType = lnrpc.Peer_ACTIVE_SYNC
case discovery.PassiveSync:
lnrpcSyncType = lnrpc.Peer_PASSIVE_SYNC
default:
return nil, fmt.Errorf("unhandled sync type %v",
syncType)
}
}
peer := &lnrpc.Peer{
PubKey: hex.EncodeToString(nodePub),
PubKey: hex.EncodeToString(nodePub[:]),
Address: serverPeer.conn.RemoteAddr().String(),
Inbound: serverPeer.inbound,
BytesRecv: atomic.LoadUint64(&serverPeer.bytesReceived),
@ -2022,6 +2050,7 @@ func (r *rpcServer) ListPeers(ctx context.Context,
SatSent: satSent,
SatRecv: satRecv,
PingTime: serverPeer.PingTime(),
SyncType: lnrpcSyncType,
}
resp.Peers = append(resp.Peers, peer)

View File

@ -636,10 +636,7 @@ func newServer(listenAddrs []net.Addr, chanDB *channeldb.DB, cc *chainControl,
return nil, fmt.Errorf("can't create router: %v", err)
}
chanSeries := discovery.NewChanSeries(
s.chanDB.ChannelGraph(),
)
chanSeries := discovery.NewChanSeries(s.chanDB.ChannelGraph())
gossipMessageStore, err := discovery.NewMessageStore(s.chanDB)
if err != nil {
return nil, err
@ -650,19 +647,23 @@ func newServer(listenAddrs []net.Addr, chanDB *channeldb.DB, cc *chainControl,
}
s.authGossiper = discovery.New(discovery.Config{
Router: s.chanRouter,
Notifier: s.cc.chainNotifier,
ChainHash: *activeNetParams.GenesisHash,
Broadcast: s.BroadcastMessage,
ChanSeries: chanSeries,
NotifyWhenOnline: s.NotifyWhenOnline,
NotifyWhenOffline: s.NotifyWhenOffline,
ProofMatureDelta: 0,
TrickleDelay: time.Millisecond * time.Duration(cfg.TrickleDelay),
RetransmitDelay: time.Minute * 30,
WaitingProofStore: waitingProofStore,
MessageStore: gossipMessageStore,
AnnSigner: s.nodeSigner,
Router: s.chanRouter,
Notifier: s.cc.chainNotifier,
ChainHash: *activeNetParams.GenesisHash,
Broadcast: s.BroadcastMessage,
ChanSeries: chanSeries,
NotifyWhenOnline: s.NotifyWhenOnline,
NotifyWhenOffline: s.NotifyWhenOffline,
ProofMatureDelta: 0,
TrickleDelay: time.Millisecond * time.Duration(cfg.TrickleDelay),
RetransmitDelay: time.Minute * 30,
WaitingProofStore: waitingProofStore,
MessageStore: gossipMessageStore,
AnnSigner: s.nodeSigner,
RotateTicker: ticker.New(discovery.DefaultSyncerRotationInterval),
HistoricalSyncTicker: ticker.New(discovery.DefaultHistoricalSyncInterval),
ActiveSyncerTimeoutTicker: ticker.New(discovery.DefaultActiveSyncerTimeout),
NumActiveSyncers: cfg.NumGraphSyncPeers,
},
s.identityPriv.PubKey(),
)
@ -2622,7 +2623,7 @@ func (s *server) peerTerminationWatcher(p *peer, ready chan struct{}) {
// We'll also inform the gossiper that this peer is no longer active,
// so we don't need to maintain sync state for it any longer.
s.authGossiper.PruneSyncState(pubKey)
s.authGossiper.PruneSyncState(p.PubKey())
// Tell the switch to remove all links associated with this peer.
// Passing nil as the target link indicates that all links associated