From ddd12eff9cefc8b67e96350d8f25c8be6158869b Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Thu, 3 May 2018 20:11:46 -0700 Subject: [PATCH] htlcswitch: move link trimming to link start up In this commit, we fix a race in the set of TestChannelLinkTrimCircuits* tests. Before this commit, we would trim the circuits in the htlcManager goroutine. However, this was problematic as the scheduling order of goroutines isn't predictable. Instead, we'll now trim the circuits in the Start method. Additionally, we fix a series of off-by-2 bugs in the tests themselves. --- htlcswitch/link.go | 56 +++++++++++++++++++---------------------- htlcswitch/link_test.go | 8 +++--- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/htlcswitch/link.go b/htlcswitch/link.go index 2051561be..1be506c7d 100644 --- a/htlcswitch/link.go +++ b/htlcswitch/link.go @@ -385,6 +385,32 @@ func (l *channelLink) Start() error { l.mailBox.ResetMessages() l.overflowQueue.Start() + // Before launching the htlcManager messages, revert any circuits that + // were marked open in the switch's circuit map, but did not make it + // into a commitment txn. We use the next local htlc index as the cut + // off point, since all indexes below that are committed. This action + // is only performed if the link's final short channel ID has been + // assigned, otherwise we would try to trim the htlcs belonging to the + // all-zero, sourceHop ID. + if l.ShortChanID() != sourceHop { + localHtlcIndex, err := l.channel.NextLocalHtlcIndex() + if err != nil { + return fmt.Errorf("unable to retrieve next local "+ + "htlc index: %v", err) + } + + // NOTE: This is automatically done by the switch when it + // starts up, but is necessary to prevent inconsistencies in + // the case that the link flaps. This is a result of a link's + // life-cycle being shorter than that of the switch. + chanID := l.ShortChanID() + err = l.cfg.Circuits.TrimOpenCircuits(chanID, localHtlcIndex) + if err != nil { + return fmt.Errorf("unable to trim circuits above "+ + "local htlc index %d: %v", localHtlcIndex, err) + } + } + l.wg.Add(1) go l.htlcManager() @@ -726,36 +752,6 @@ func (l *channelLink) htlcManager() { log.Infof("HTLC manager for ChannelPoint(%v) started, "+ "bandwidth=%v", l.channel.ChannelPoint(), l.Bandwidth()) - // Before handling any messages, revert any circuits that were marked - // open in the switch's circuit map, but did not make it into a - // commitment txn. We use the next local htlc index as the cut off - // point, since all indexes below that are committed. This action is - // only performed if the link's final short channel ID has been - // assigned, otherwise we would try to trim the htlcs belonging to the - // all-zero, sourceHop ID. - if l.ShortChanID() != sourceHop { - localHtlcIndex, err := l.channel.NextLocalHtlcIndex() - if err != nil { - l.errorf("unable to retrieve next local htlc index: %v", - err) - l.fail(ErrInternalLinkFailure.Error()) - return - } - - // NOTE: This is automatically done by the switch when it starts - // up, but is necessary to prevent inconsistencies in the case - // that the link flaps. This is a result of a link's life-cycle - // being shorter than that of the switch. - chanID := l.ShortChanID() - err = l.cfg.Circuits.TrimOpenCircuits(chanID, localHtlcIndex) - if err != nil { - l.errorf("unable to trim circuits above local htlc "+ - "index %d: %v", localHtlcIndex, err) - l.fail(ErrInternalLinkFailure.Error()) - return - } - } - // TODO(roasbeef): need to call wipe chan whenever D/C? // If this isn't the first time that this channel link has been diff --git a/htlcswitch/link_test.go b/htlcswitch/link_test.go index 892d5ec97..3bd7c4d17 100644 --- a/htlcswitch/link_test.go +++ b/htlcswitch/link_test.go @@ -2528,7 +2528,7 @@ func TestChannelLinkTrimCircuitsPending(t *testing.T) { cleanUp = alice.restart(false) defer cleanUp() - alice.assertNumPendingNumOpenCircuits(4, 4) + alice.assertNumPendingNumOpenCircuits(4, 2) // Again, try to recommit all of our circuits. fwdActions = alice.commitCircuits(circuits) @@ -2715,7 +2715,7 @@ func TestChannelLinkTrimCircuitsNoCommit(t *testing.T) { cleanUp = alice.restart(false, hodl.Commit) defer cleanUp() - alice.assertNumPendingNumOpenCircuits(2, 2) + alice.assertNumPendingNumOpenCircuits(2, 0) // The first two HTLCs should have been reset in Alice's mailbox since // the switch was not shutdown. Knowing this the switch should drop the @@ -2735,6 +2735,7 @@ func TestChannelLinkTrimCircuitsNoCommit(t *testing.T) { assertLinkBandwidth(t, alice.link, aliceStartingBandwidth-halfHtlcs*(htlcAmt+htlcFee), ) + // Again, initiate another state transition by Alice to try and commit // the HTLCs. Since she is in hodl.Commit mode, this will fail, but the // circuits will be opened persistently. @@ -2805,7 +2806,7 @@ func TestChannelLinkTrimCircuitsNoCommit(t *testing.T) { cleanUp = alice.restart(false, hodl.Commit) defer cleanUp() - alice.assertNumPendingNumOpenCircuits(4, 2) + alice.assertNumPendingNumOpenCircuits(4, 0) // Now, try to commit all of known circuits. fwdActions = alice.commitCircuits(circuits) @@ -2815,6 +2816,7 @@ func TestChannelLinkTrimCircuitsNoCommit(t *testing.T) { if len(fwdActions.Fails) != halfHtlcs { t.Fatalf("expected %d packet to be failed", halfHtlcs) } + // The last two HTLCs will be dropped, as thought the circuits are // trimmed, the switch is aware that the HTLCs are still in Alice's // mailbox.