Merge pull request #9595 from yyforyongyu/fix-gossip-syncer

multi: fix flakes and gossip syncer
This commit is contained in:
Oliver Gugger 2025-03-11 09:33:05 -06:00 committed by GitHub
commit 04c76101dd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 41 additions and 39 deletions

View file

@ -571,15 +571,11 @@ func (g *GossipSyncer) channelGraphSyncer() {
// First, we'll attempt to continue our channel
// synchronization by continuing to send off another
// query chunk.
done, err := g.synchronizeChanIDs()
if err != nil {
log.Errorf("Unable to sync chan IDs: %v", err)
}
done := g.synchronizeChanIDs()
// If this wasn't our last query, then we'll need to
// transition to our waiting state.
if !done {
g.setSyncState(waitingQueryChanReply)
continue
}
@ -736,14 +732,15 @@ func (g *GossipSyncer) sendGossipTimestampRange(firstTimestamp time.Time,
// been queried for with a response received. We'll chunk our requests as
// required to ensure they fit into a single message. We may re-renter this
// state in the case that chunking is required.
func (g *GossipSyncer) synchronizeChanIDs() (bool, error) {
func (g *GossipSyncer) synchronizeChanIDs() bool {
// If we're in this state yet there are no more new channels to query
// for, then we'll transition to our final synced state and return true
// to signal that we're fully synchronized.
if len(g.newChansToQuery) == 0 {
log.Infof("GossipSyncer(%x): no more chans to query",
g.cfg.peerPub[:])
return true, nil
return true
}
// Otherwise, we'll issue our next chunked query to receive replies
@ -767,6 +764,9 @@ func (g *GossipSyncer) synchronizeChanIDs() (bool, error) {
log.Infof("GossipSyncer(%x): querying for %v new channels",
g.cfg.peerPub[:], len(queryChunk))
// Change the state before sending the query msg.
g.setSyncState(waitingQueryChanReply)
// With our chunk obtained, we'll send over our next query, then return
// false indicating that we're net yet fully synced.
err := g.cfg.sendToPeer(&lnwire.QueryShortChanIDs{
@ -774,8 +774,11 @@ func (g *GossipSyncer) synchronizeChanIDs() (bool, error) {
EncodingType: lnwire.EncodingSortedPlain,
ShortChanIDs: queryChunk,
})
if err != nil {
log.Errorf("Unable to sync chan IDs: %v", err)
}
return false, err
return false
}
// isLegacyReplyChannelRange determines where a ReplyChannelRange message is

View file

@ -1478,10 +1478,7 @@ func TestGossipSyncerSynchronizeChanIDs(t *testing.T) {
for i := 0; i < chunkSize*2; i += 2 {
// With our set up complete, we'll request a sync of chan ID's.
done, err := syncer.synchronizeChanIDs()
if err != nil {
t.Fatalf("unable to sync chan IDs: %v", err)
}
done := syncer.synchronizeChanIDs()
// At this point, we shouldn't yet be done as only 2 items
// should have been queried for.
@ -1528,8 +1525,7 @@ func TestGossipSyncerSynchronizeChanIDs(t *testing.T) {
}
// If we issue another query, the syncer should tell us that it's done.
done, err := syncer.synchronizeChanIDs()
require.NoError(t, err, "unable to sync chan IDs")
done := syncer.synchronizeChanIDs()
if done {
t.Fatalf("syncer should be finished!")
}

View file

@ -82,6 +82,9 @@
could lead to our ChannelUpdate rate limiting logic being prematurely
triggered.
* [Fixed a bug](https://github.com/lightningnetwork/lnd/pull/9595) where the
initial graph sync query may be failed due to inconsistent state.
# New Features
* [Support](https://github.com/lightningnetwork/lnd/pull/8390) for

View file

@ -438,12 +438,6 @@ func testForwardInterceptorRestart(ht *lntest.HarnessTest) {
require.Equal(ht, lntest.CustomRecordsWithUnendorsed(customRecords),
packet.InWireCustomRecords)
err = carolInterceptor.Send(&routerrpc.ForwardHtlcInterceptResponse{
IncomingCircuitKey: packet.IncomingCircuitKey,
Action: actionResume,
})
require.NoError(ht, err, "failed to send request")
// And now we forward the payment at Carol, expecting only an
// endorsement signal in our incoming custom records.
packet = ht.ReceiveHtlcInterceptor(carolInterceptor)

View file

@ -8,6 +8,7 @@ import (
"github.com/lightningnetwork/lnd/htlcswitch"
"github.com/lightningnetwork/lnd/lnrpc"
"github.com/lightningnetwork/lnd/lntypes"
"github.com/lightningnetwork/lnd/lnutils"
"github.com/lightningnetwork/lnd/lnwire"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
@ -60,6 +61,9 @@ func (r *forwardInterceptor) run() error {
return err
}
log.Tracef("Received packet from stream: %v",
lnutils.SpewLogClosure(resp))
if err := r.resolveFromClient(resp); err != nil {
return err
}
@ -73,7 +77,8 @@ func (r *forwardInterceptor) run() error {
func (r *forwardInterceptor) onIntercept(
htlc htlcswitch.InterceptedPacket) error {
log.Tracef("Sending intercepted packet to client %v", htlc)
log.Tracef("Sending intercepted packet to client %v",
lnutils.SpewLogClosure(htlc))
inKey := htlc.IncomingCircuit

View file

@ -319,14 +319,20 @@ func TestReconnectSucceed(t *testing.T) {
// Close the old conn before reconnection.
require.NoError(t, proxy.serverConn.Close())
// Accept the connection inside a goroutine. We will also write some
// data so that the reconnection can succeed. We will mock three writes
// and two reads inside our proxy server,
// - write protocol info
// - read auth info
// - write auth challenge
// - read auth challenge
// - write OK
// Accept the connection inside a goroutine. When the client makes a
// reconnection, the messages flow is,
// 1. the client sends the command PROTOCOLINFO to the server.
// 2. the server responds with its protocol version.
// 3. the client reads the response and sends the command AUTHENTICATE
// to the server
// 4. the server responds with the authentication info.
//
// From the server's PoV, We need to mock two reads and two writes
// inside the connection,
// 1. read the command PROTOCOLINFO sent from the client.
// 2. write protocol info so the client can read it.
// 3. read the command AUTHENTICATE sent from the client.
// 4. write auth challenge so the client can read it.
go func() {
// Accept the new connection.
server, err := proxy.server.Accept()
@ -335,6 +341,11 @@ func TestReconnectSucceed(t *testing.T) {
t.Logf("server listening on %v, client listening on %v",
server.LocalAddr(), server.RemoteAddr())
// Read the protocol command from the client.
buf := make([]byte, 65535)
_, err = server.Read(buf)
require.NoError(t, err)
// Write the protocol info.
resp := "250-PROTOCOLINFO 1\n" +
"250-AUTH METHODS=NULL\n" +
@ -343,7 +354,6 @@ func TestReconnectSucceed(t *testing.T) {
require.NoErrorf(t, err, "failed to write protocol info")
// Read the auth info from the client.
buf := make([]byte, 65535)
_, err = server.Read(buf)
require.NoError(t, err)
@ -351,15 +361,6 @@ func TestReconnectSucceed(t *testing.T) {
resp = "250 AUTHCHALLENGE SERVERHASH=fake\n"
_, err = server.Write([]byte(resp))
require.NoErrorf(t, err, "failed to write auth challenge")
// Read the auth challenge resp from the client.
_, err = server.Read(buf)
require.NoError(t, err)
// Write OK resp.
resp = "250 OK\n"
_, err = server.Write([]byte(resp))
require.NoErrorf(t, err, "failed to write response auth")
}()
// Reconnect should succeed.