discovery: recheck staleness after acquiring channelMtx

Ensure that the ChannelUpdate we are processing is not a duplicate before applying rate limiting logic. The demonstration test added in the prior commit is updated to assert the new, correct logic.
2025-03-04 01:36:24 +01:00 · 2025-03-03 14:09:25 +02:00 · 2025-03-03 14:09:25 +02:00 · a65baf3148
commit a65baf3148
parent d00c724678
2 changed files with 42 additions and 12 deletions
--- a/discovery/gossiper.go
+++ b/discovery/gossiper.go
@ -3142,6 +3142,23 @@ func (d *AuthenticatedGossiper) handleChanUpdate(nMsg *networkMsg,
 		edgeToUpdate = e2
 	}

+	// We did a staleness check before grabbing the channelMtx mutex, but we
+	// should do another one now that the mutex is obtained in-case a
+	// duplicate or newer ChannelUpdate was processed while this thread was
+	// waiting for the lock. Even though the DB `UpdateEdge` call later on
+	// would catch this too, we need to check it now so that our rate
+	// limiting logic does not get triggered by duplicate updates.
+	if edgeToUpdate != nil && !edgeToUpdate.LastUpdate.Before(timestamp) {
+		log.Debugf("Ignored stale edge policy for short_chan_id(%v): "+
+			"peer=%v, msg=%s, is_remote=%v", shortChanID,
+			nMsg.peer, nMsg.msg.MsgType(), nMsg.isRemote,
+		)
+
+		nMsg.err <- nil
+
+		return nil, true
+	}
+
 	log.Debugf("Validating ChannelUpdate: channel=%v, for node=%x, has "+
 		"edge policy=%v", chanInfo.ChannelID,
 		pubKey.SerializeCompressed(), edgeToUpdate != nil)
--- a/discovery/gossiper_test.go
+++ b/discovery/gossiper_test.go
@ -4055,12 +4055,12 @@ func TestBroadcastAnnsAfterGraphSynced(t *testing.T) {
 	assertBroadcast(chanAnn2, true, true)
 }

-// TestRateLimitDeDup demonstrates a bug that currently exists in the handling
-// of channel updates. It shows that if two identical channel updates are
-// received in quick succession, then both of them might be counted towards the
-// rate limit, even though only one of them should be.
+// TestRateLimitDeDup tests that if we get the same channel update in very
+// quick succession, then these updates should not be individually considered
+// in our rate limiting logic.
 //
-// NOTE: this will be fixed in an upcoming commit.
+// NOTE: this only tests the deduplication logic. The main rate limiting logic
+// is tested by TestRateLimitChannelUpdates.
 func TestRateLimitDeDup(t *testing.T) {
 	t.Parallel()

@ -4182,12 +4182,13 @@ func TestRateLimitDeDup(t *testing.T) {
 	// Now we can un-pause the thread that grabbed the mutex first.
 	close(pause)

-	// Currently, both updates make it to UpdateEdge.
+	// Only 1 call should have made it past the staleness check to the
+	// graph's UpdateEdge call.
 	err = wait.NoError(func() error {
 		count := ctx.router.getCallCount("UpdateEdge")

-		if count != 4 {
-			return fmt.Errorf("expected 4 calls to UpdateEdge, "+
+		if count != 3 {
+			return fmt.Errorf("expected 3 calls to UpdateEdge, "+
 				"got %v", count)
 		}

@ -4215,10 +4216,8 @@ func TestRateLimitDeDup(t *testing.T) {
 	// Show that the last update was broadcast.
 	assertRateLimit(false)

-	// We should be allowed to send another update now since only one of the
-	// above duplicates should count towards the rate limit.
-	// However, this is currently not the case, and so we will be rate
-	// limited early. This will be fixed in an upcoming commit.
+	// We should be allowed to send another update now since the rate limit
+	// has still not bee met.
 	update.Timestamp++
 	update.BaseFee++
 	require.NoError(t, signUpdate(remoteKeyPriv1, &update))
@ -4233,6 +4232,20 @@ func TestRateLimitDeDup(t *testing.T) {
 		t.Fatal("remote announcement not processed")
 	}

+	assertRateLimit(false)
+
+	// Our rate limit should be hit now, so a new update should not be
+	// broadcast.
+	select {
+	case err := <-ctx.gossiper.ProcessRemoteAnnouncement(
+		&update, nodePeer1,
+	):
+
+		require.NoError(t, err)
+	case <-time.After(time.Second):
+		t.Fatal("remote announcement not processed")
+	}
+
 	assertRateLimit(true)
 }