mirror of
https://github.com/lightningnetwork/lnd.git
synced 2025-03-04 09:48:19 +01:00
lnwallet+contractcourt: gracefully handle auto force close post data loss
In this commit, update the start up logic to gracefully handle a seemingly rare case. In this case, a peer detects local data loss with a set of active HTLCs. These HTLCs then eventually expire (they may or may not actually "exist"), causing a force close decision. Before this PR, this attempt would fail with a fatal error that can impede start up. To better handle such a scenario, we'll now catch the error when we fail to force close due to entering the DLP and instead terminate the state machine at the broadcast state. When a commitment transaction eventually confirms, we'll play it as normal. Fixes https://github.com/lightningnetwork/lnd/issues/7984
This commit is contained in:
parent
90effda090
commit
de54a603b7
5 changed files with 112 additions and 25 deletions
|
@ -1050,6 +1050,20 @@ func (c *ChannelArbitrator) stateStep(
|
|||
if err != nil {
|
||||
log.Errorf("ChannelArbitrator(%v): unable to "+
|
||||
"force close: %v", c.cfg.ChanPoint, err)
|
||||
|
||||
// We tried to force close (HTLC may be expiring from
|
||||
// our PoV, etc), but we think we've lost data. In this
|
||||
// case, we'll not force close, but terminate the state
|
||||
// machine here to wait to see what confirms on chain.
|
||||
if errors.Is(err, lnwallet.ErrForceCloseLocalDataLoss) {
|
||||
log.Error("ChannelArbitrator(%v): broadcast "+
|
||||
"failed due to local data loss, "+
|
||||
"waiting for on chain confimation...",
|
||||
c.cfg.ChanPoint)
|
||||
|
||||
return StateBroadcastCommit, nil, nil
|
||||
}
|
||||
|
||||
return StateError, closeTx, err
|
||||
}
|
||||
closeTx = closeSummary.CloseTx
|
||||
|
|
|
@ -286,17 +286,32 @@ func (c *chanArbTestCtx) Restart(restartClosure func(*chanArbTestCtx)) (*chanArb
|
|||
return newCtx, nil
|
||||
}
|
||||
|
||||
// testChanArbOpts is a struct that contains options that can be used to
|
||||
// initialize the channel arbitrator test context.
|
||||
type testChanArbOpts struct {
|
||||
forceCloseErr error
|
||||
arbCfg *ChannelArbitratorConfig
|
||||
}
|
||||
|
||||
// testChanArbOption applies custom settings to a channel arbitrator config for
|
||||
// testing purposes.
|
||||
type testChanArbOption func(cfg *ChannelArbitratorConfig)
|
||||
type testChanArbOption func(cfg *testChanArbOpts)
|
||||
|
||||
// remoteInitiatorOption sets the MarkChannelClosed function in the
|
||||
// Channel Arbitrator's config.
|
||||
// remoteInitiatorOption sets the MarkChannelClosed function in the Channel
|
||||
// Arbitrator's config.
|
||||
func withMarkClosed(markClosed func(*channeldb.ChannelCloseSummary,
|
||||
...channeldb.ChannelStatus) error) testChanArbOption {
|
||||
|
||||
return func(cfg *ChannelArbitratorConfig) {
|
||||
cfg.MarkChannelClosed = markClosed
|
||||
return func(cfg *testChanArbOpts) {
|
||||
cfg.arbCfg.MarkChannelClosed = markClosed
|
||||
}
|
||||
}
|
||||
|
||||
// withForceCloseErr is used to specify an error that should be returned when
|
||||
// the channel arb tries to force close a channel.
|
||||
func withForceCloseErr(err error) testChanArbOption {
|
||||
return func(opts *testChanArbOpts) {
|
||||
opts.forceCloseErr = err
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -386,7 +401,6 @@ func createTestChannelArbitrator(t *testing.T, log ArbitratorLog,
|
|||
resolvedChan <- struct{}{}
|
||||
return nil
|
||||
},
|
||||
Channel: &mockChannel{},
|
||||
MarkCommitmentBroadcasted: func(_ *wire.MsgTx, _ bool) error {
|
||||
return nil
|
||||
},
|
||||
|
@ -408,9 +422,17 @@ func createTestChannelArbitrator(t *testing.T, log ArbitratorLog,
|
|||
},
|
||||
}
|
||||
|
||||
testOpts := &testChanArbOpts{
|
||||
arbCfg: arbCfg,
|
||||
}
|
||||
|
||||
// Apply all custom options to the config struct.
|
||||
for _, option := range opts {
|
||||
option(arbCfg)
|
||||
option(testOpts)
|
||||
}
|
||||
|
||||
arbCfg.Channel = &mockChannel{
|
||||
forceCloseErr: testOpts.forceCloseErr,
|
||||
}
|
||||
|
||||
var cleanUp func()
|
||||
|
@ -2686,10 +2708,11 @@ func TestChannelArbitratorAnchors(t *testing.T) {
|
|||
)
|
||||
}
|
||||
|
||||
// TestChannelArbitratorStartAfterCommitmentRejected tests that when we run into
|
||||
// the case where our commitment tx is rejected by our bitcoin backend we still
|
||||
// continue to startup the arbitrator for a specific set of errors.
|
||||
func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
||||
// TestChannelArbitratorStartForceCloseFail tests that when we run into the
|
||||
// case where our commitment tx is rejected by our bitcoin backend, or we fail
|
||||
// to force close, we still continue to startup the arbitrator for a
|
||||
// specific set of errors.
|
||||
func TestChannelArbitratorStartForceCloseFail(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tests := []struct {
|
||||
|
@ -2698,6 +2721,10 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
|||
// The specific error during broadcasting the transaction.
|
||||
broadcastErr error
|
||||
|
||||
// forceCloseErr is the error returned when we try to force the
|
||||
// channel.
|
||||
forceCloseErr error
|
||||
|
||||
// expected state when the startup of the arbitrator succeeds.
|
||||
expectedState ArbitratorState
|
||||
|
||||
|
@ -2726,6 +2753,16 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
|||
expectedState: StateBroadcastCommit,
|
||||
expectedStartup: false,
|
||||
},
|
||||
|
||||
// We started after the DLP was triggered, and try to force
|
||||
// close. This is rejected as we can't force close with local
|
||||
// data loss. We should still be able to start up however.
|
||||
{
|
||||
name: "ignore force close local data loss",
|
||||
forceCloseErr: lnwallet.ErrForceCloseLocalDataLoss,
|
||||
expectedState: StateBroadcastCommit,
|
||||
expectedStartup: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
@ -2741,9 +2778,21 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
|||
newStates: make(chan ArbitratorState, 5),
|
||||
state: StateBroadcastCommit,
|
||||
}
|
||||
chanArbCtx, err := createTestChannelArbitrator(t, log)
|
||||
require.NoError(t, err, "unable to create "+
|
||||
"ChannelArbitrator")
|
||||
|
||||
var testOpts []testChanArbOption
|
||||
if test.forceCloseErr != nil {
|
||||
testOpts = append(
|
||||
testOpts,
|
||||
withForceCloseErr(test.forceCloseErr),
|
||||
)
|
||||
}
|
||||
|
||||
chanArbCtx, err := createTestChannelArbitrator(
|
||||
t, log, testOpts...,
|
||||
)
|
||||
require.NoError(
|
||||
t, err, "unable to create ChannelArbitrator",
|
||||
)
|
||||
|
||||
chanArb := chanArbCtx.chanArb
|
||||
|
||||
|
@ -2753,11 +2802,14 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
|||
|
||||
return test.broadcastErr
|
||||
}
|
||||
|
||||
err = chanArb.Start(nil)
|
||||
|
||||
if !test.expectedStartup {
|
||||
require.ErrorIs(t, err, test.broadcastErr)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Cleanup(func() {
|
||||
|
@ -2765,8 +2817,17 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
|||
})
|
||||
|
||||
// In case the startup succeeds we check that the state
|
||||
// is as expected.
|
||||
chanArbCtx.AssertStateTransitions(test.expectedState)
|
||||
// is as expected, we only check this if we didn't need
|
||||
// to advance from StateBroadcastCommit.
|
||||
if test.expectedState != StateBroadcastCommit {
|
||||
chanArbCtx.AssertStateTransitions(
|
||||
test.expectedState,
|
||||
)
|
||||
} else {
|
||||
// Otherwise, we expect the state to stay the
|
||||
// same.
|
||||
chanArbCtx.AssertState(test.expectedState)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -2800,6 +2861,8 @@ func assertResolverReport(t *testing.T, reports chan *channeldb.ResolverReport,
|
|||
|
||||
type mockChannel struct {
|
||||
anchorResolutions *lnwallet.AnchorResolutions
|
||||
|
||||
forceCloseErr error
|
||||
}
|
||||
|
||||
func (m *mockChannel) NewAnchorResolutions() (*lnwallet.AnchorResolutions,
|
||||
|
@ -2813,6 +2876,10 @@ func (m *mockChannel) NewAnchorResolutions() (*lnwallet.AnchorResolutions,
|
|||
}
|
||||
|
||||
func (m *mockChannel) ForceCloseChan() (*lnwallet.LocalForceCloseSummary, error) {
|
||||
if m.forceCloseErr != nil {
|
||||
return nil, m.forceCloseErr
|
||||
}
|
||||
|
||||
summary := &lnwallet.LocalForceCloseSummary{
|
||||
CloseTx: &wire.MsgTx{},
|
||||
HtlcResolutions: &lnwallet.HtlcResolutions{},
|
||||
|
|
|
@ -71,6 +71,10 @@ fails](https://github.com/lightningnetwork/lnd/pull/7876).
|
|||
retried](https://github.com/lightningnetwork/lnd/pull/7927) with an
|
||||
exponential back off.
|
||||
|
||||
* `lnd` [now properly handles a case where an erroneous force close attempt
|
||||
would impeded start up](https://github.com/lightningnetwork/lnd/pull/7985).
|
||||
|
||||
|
||||
# New Features
|
||||
## Functional Enhancements
|
||||
### Protocol Features
|
||||
|
|
|
@ -116,6 +116,12 @@ var (
|
|||
// ErrRevLogDataMissing is returned when a certain wanted optional field
|
||||
// in a revocation log entry is missing.
|
||||
ErrRevLogDataMissing = errors.New("revocation log data missing")
|
||||
|
||||
// ErrForceCloseLocalDataLoss is returned in the case a user (or
|
||||
// another sub-system) attempts to force close when we've detected that
|
||||
// we've likely lost data ourselves.
|
||||
ErrForceCloseLocalDataLoss = errors.New("cannot force close " +
|
||||
"channel with local data loss")
|
||||
)
|
||||
|
||||
// ErrCommitSyncLocalDataLoss is returned in the case that we receive a valid
|
||||
|
@ -124,7 +130,7 @@ var (
|
|||
// height. This means we have lost some critical data, and must fail the
|
||||
// channel and MUST NOT force close it. Instead we should wait for the remote
|
||||
// to force close it, such that we can attempt to sweep our funds. The
|
||||
// commitment point needed to sweep the remote's force close is encapsuled.
|
||||
// commitment point needed to sweep the remote's force close is encapsulated.
|
||||
type ErrCommitSyncLocalDataLoss struct {
|
||||
// ChannelPoint is the identifier for the channel that experienced data
|
||||
// loss.
|
||||
|
@ -7308,8 +7314,6 @@ type LocalForceCloseSummary struct {
|
|||
// outputs within the commitment transaction.
|
||||
//
|
||||
// TODO(roasbeef): all methods need to abort if in dispute state
|
||||
// TODO(roasbeef): method to generate CloseSummaries for when the remote peer
|
||||
// does a unilateral close
|
||||
func (lc *LightningChannel) ForceClose() (*LocalForceCloseSummary, error) {
|
||||
lc.Lock()
|
||||
defer lc.Unlock()
|
||||
|
@ -7318,8 +7322,9 @@ func (lc *LightningChannel) ForceClose() (*LocalForceCloseSummary, error) {
|
|||
// allow a force close, as it may be the case that we have a dated
|
||||
// version of the commitment, or this is actually a channel shell.
|
||||
if lc.channelState.HasChanStatus(channeldb.ChanStatusLocalDataLoss) {
|
||||
return nil, fmt.Errorf("cannot force close channel with "+
|
||||
"state: %v", lc.channelState.ChanStatus())
|
||||
return nil, fmt.Errorf("%w: channel_state=%v",
|
||||
ErrForceCloseLocalDataLoss,
|
||||
lc.channelState.ChanStatus())
|
||||
}
|
||||
|
||||
commitTx, err := lc.getSignedCommitTx()
|
||||
|
|
|
@ -7417,10 +7417,7 @@ func TestForceCloseFailLocalDataLoss(t *testing.T) {
|
|||
// channel, we should fail as it isn't safe to force close a
|
||||
// channel that isn't in the pure default state.
|
||||
_, err = aliceChannel.ForceClose()
|
||||
if err == nil {
|
||||
t.Fatalf("expected force close to fail due to non-default " +
|
||||
"chan state")
|
||||
}
|
||||
require.ErrorIs(t, err, ErrForceCloseLocalDataLoss)
|
||||
}
|
||||
|
||||
// TestForceCloseBorkedState tests that once we force close a channel, it's
|
||||
|
|
Loading…
Add table
Reference in a new issue