mirror of
https://github.com/lightningnetwork/lnd.git
synced 2025-03-04 17:55:36 +01:00
lnwallet+contractcourt: gracefully handle auto force close post data loss
In this commit, update the start up logic to gracefully handle a seemingly rare case. In this case, a peer detects local data loss with a set of active HTLCs. These HTLCs then eventually expire (they may or may not actually "exist"), causing a force close decision. Before this PR, this attempt would fail with a fatal error that can impede start up. To better handle such a scenario, we'll now catch the error when we fail to force close due to entering the DLP and instead terminate the state machine at the broadcast state. When a commitment transaction eventually confirms, we'll play it as normal. Fixes https://github.com/lightningnetwork/lnd/issues/7984
This commit is contained in:
parent
90effda090
commit
de54a603b7
5 changed files with 112 additions and 25 deletions
|
@ -1050,6 +1050,20 @@ func (c *ChannelArbitrator) stateStep(
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("ChannelArbitrator(%v): unable to "+
|
log.Errorf("ChannelArbitrator(%v): unable to "+
|
||||||
"force close: %v", c.cfg.ChanPoint, err)
|
"force close: %v", c.cfg.ChanPoint, err)
|
||||||
|
|
||||||
|
// We tried to force close (HTLC may be expiring from
|
||||||
|
// our PoV, etc), but we think we've lost data. In this
|
||||||
|
// case, we'll not force close, but terminate the state
|
||||||
|
// machine here to wait to see what confirms on chain.
|
||||||
|
if errors.Is(err, lnwallet.ErrForceCloseLocalDataLoss) {
|
||||||
|
log.Error("ChannelArbitrator(%v): broadcast "+
|
||||||
|
"failed due to local data loss, "+
|
||||||
|
"waiting for on chain confimation...",
|
||||||
|
c.cfg.ChanPoint)
|
||||||
|
|
||||||
|
return StateBroadcastCommit, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
return StateError, closeTx, err
|
return StateError, closeTx, err
|
||||||
}
|
}
|
||||||
closeTx = closeSummary.CloseTx
|
closeTx = closeSummary.CloseTx
|
||||||
|
|
|
@ -286,17 +286,32 @@ func (c *chanArbTestCtx) Restart(restartClosure func(*chanArbTestCtx)) (*chanArb
|
||||||
return newCtx, nil
|
return newCtx, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// testChanArbOpts is a struct that contains options that can be used to
|
||||||
|
// initialize the channel arbitrator test context.
|
||||||
|
type testChanArbOpts struct {
|
||||||
|
forceCloseErr error
|
||||||
|
arbCfg *ChannelArbitratorConfig
|
||||||
|
}
|
||||||
|
|
||||||
// testChanArbOption applies custom settings to a channel arbitrator config for
|
// testChanArbOption applies custom settings to a channel arbitrator config for
|
||||||
// testing purposes.
|
// testing purposes.
|
||||||
type testChanArbOption func(cfg *ChannelArbitratorConfig)
|
type testChanArbOption func(cfg *testChanArbOpts)
|
||||||
|
|
||||||
// remoteInitiatorOption sets the MarkChannelClosed function in the
|
// remoteInitiatorOption sets the MarkChannelClosed function in the Channel
|
||||||
// Channel Arbitrator's config.
|
// Arbitrator's config.
|
||||||
func withMarkClosed(markClosed func(*channeldb.ChannelCloseSummary,
|
func withMarkClosed(markClosed func(*channeldb.ChannelCloseSummary,
|
||||||
...channeldb.ChannelStatus) error) testChanArbOption {
|
...channeldb.ChannelStatus) error) testChanArbOption {
|
||||||
|
|
||||||
return func(cfg *ChannelArbitratorConfig) {
|
return func(cfg *testChanArbOpts) {
|
||||||
cfg.MarkChannelClosed = markClosed
|
cfg.arbCfg.MarkChannelClosed = markClosed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// withForceCloseErr is used to specify an error that should be returned when
|
||||||
|
// the channel arb tries to force close a channel.
|
||||||
|
func withForceCloseErr(err error) testChanArbOption {
|
||||||
|
return func(opts *testChanArbOpts) {
|
||||||
|
opts.forceCloseErr = err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -386,7 +401,6 @@ func createTestChannelArbitrator(t *testing.T, log ArbitratorLog,
|
||||||
resolvedChan <- struct{}{}
|
resolvedChan <- struct{}{}
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
Channel: &mockChannel{},
|
|
||||||
MarkCommitmentBroadcasted: func(_ *wire.MsgTx, _ bool) error {
|
MarkCommitmentBroadcasted: func(_ *wire.MsgTx, _ bool) error {
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
|
@ -408,9 +422,17 @@ func createTestChannelArbitrator(t *testing.T, log ArbitratorLog,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
testOpts := &testChanArbOpts{
|
||||||
|
arbCfg: arbCfg,
|
||||||
|
}
|
||||||
|
|
||||||
// Apply all custom options to the config struct.
|
// Apply all custom options to the config struct.
|
||||||
for _, option := range opts {
|
for _, option := range opts {
|
||||||
option(arbCfg)
|
option(testOpts)
|
||||||
|
}
|
||||||
|
|
||||||
|
arbCfg.Channel = &mockChannel{
|
||||||
|
forceCloseErr: testOpts.forceCloseErr,
|
||||||
}
|
}
|
||||||
|
|
||||||
var cleanUp func()
|
var cleanUp func()
|
||||||
|
@ -2686,10 +2708,11 @@ func TestChannelArbitratorAnchors(t *testing.T) {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestChannelArbitratorStartAfterCommitmentRejected tests that when we run into
|
// TestChannelArbitratorStartForceCloseFail tests that when we run into the
|
||||||
// the case where our commitment tx is rejected by our bitcoin backend we still
|
// case where our commitment tx is rejected by our bitcoin backend, or we fail
|
||||||
// continue to startup the arbitrator for a specific set of errors.
|
// to force close, we still continue to startup the arbitrator for a
|
||||||
func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
// specific set of errors.
|
||||||
|
func TestChannelArbitratorStartForceCloseFail(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
|
@ -2698,6 +2721,10 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
||||||
// The specific error during broadcasting the transaction.
|
// The specific error during broadcasting the transaction.
|
||||||
broadcastErr error
|
broadcastErr error
|
||||||
|
|
||||||
|
// forceCloseErr is the error returned when we try to force the
|
||||||
|
// channel.
|
||||||
|
forceCloseErr error
|
||||||
|
|
||||||
// expected state when the startup of the arbitrator succeeds.
|
// expected state when the startup of the arbitrator succeeds.
|
||||||
expectedState ArbitratorState
|
expectedState ArbitratorState
|
||||||
|
|
||||||
|
@ -2726,6 +2753,16 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
||||||
expectedState: StateBroadcastCommit,
|
expectedState: StateBroadcastCommit,
|
||||||
expectedStartup: false,
|
expectedStartup: false,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// We started after the DLP was triggered, and try to force
|
||||||
|
// close. This is rejected as we can't force close with local
|
||||||
|
// data loss. We should still be able to start up however.
|
||||||
|
{
|
||||||
|
name: "ignore force close local data loss",
|
||||||
|
forceCloseErr: lnwallet.ErrForceCloseLocalDataLoss,
|
||||||
|
expectedState: StateBroadcastCommit,
|
||||||
|
expectedStartup: true,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
@ -2741,9 +2778,21 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
||||||
newStates: make(chan ArbitratorState, 5),
|
newStates: make(chan ArbitratorState, 5),
|
||||||
state: StateBroadcastCommit,
|
state: StateBroadcastCommit,
|
||||||
}
|
}
|
||||||
chanArbCtx, err := createTestChannelArbitrator(t, log)
|
|
||||||
require.NoError(t, err, "unable to create "+
|
var testOpts []testChanArbOption
|
||||||
"ChannelArbitrator")
|
if test.forceCloseErr != nil {
|
||||||
|
testOpts = append(
|
||||||
|
testOpts,
|
||||||
|
withForceCloseErr(test.forceCloseErr),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
chanArbCtx, err := createTestChannelArbitrator(
|
||||||
|
t, log, testOpts...,
|
||||||
|
)
|
||||||
|
require.NoError(
|
||||||
|
t, err, "unable to create ChannelArbitrator",
|
||||||
|
)
|
||||||
|
|
||||||
chanArb := chanArbCtx.chanArb
|
chanArb := chanArbCtx.chanArb
|
||||||
|
|
||||||
|
@ -2753,11 +2802,14 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
||||||
|
|
||||||
return test.broadcastErr
|
return test.broadcastErr
|
||||||
}
|
}
|
||||||
|
|
||||||
err = chanArb.Start(nil)
|
err = chanArb.Start(nil)
|
||||||
|
|
||||||
if !test.expectedStartup {
|
if !test.expectedStartup {
|
||||||
require.ErrorIs(t, err, test.broadcastErr)
|
require.ErrorIs(t, err, test.broadcastErr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
t.Cleanup(func() {
|
t.Cleanup(func() {
|
||||||
|
@ -2765,8 +2817,17 @@ func TestChannelArbitratorStartAfterCommitmentRejected(t *testing.T) {
|
||||||
})
|
})
|
||||||
|
|
||||||
// In case the startup succeeds we check that the state
|
// In case the startup succeeds we check that the state
|
||||||
// is as expected.
|
// is as expected, we only check this if we didn't need
|
||||||
chanArbCtx.AssertStateTransitions(test.expectedState)
|
// to advance from StateBroadcastCommit.
|
||||||
|
if test.expectedState != StateBroadcastCommit {
|
||||||
|
chanArbCtx.AssertStateTransitions(
|
||||||
|
test.expectedState,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
// Otherwise, we expect the state to stay the
|
||||||
|
// same.
|
||||||
|
chanArbCtx.AssertState(test.expectedState)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2800,6 +2861,8 @@ func assertResolverReport(t *testing.T, reports chan *channeldb.ResolverReport,
|
||||||
|
|
||||||
type mockChannel struct {
|
type mockChannel struct {
|
||||||
anchorResolutions *lnwallet.AnchorResolutions
|
anchorResolutions *lnwallet.AnchorResolutions
|
||||||
|
|
||||||
|
forceCloseErr error
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mockChannel) NewAnchorResolutions() (*lnwallet.AnchorResolutions,
|
func (m *mockChannel) NewAnchorResolutions() (*lnwallet.AnchorResolutions,
|
||||||
|
@ -2813,6 +2876,10 @@ func (m *mockChannel) NewAnchorResolutions() (*lnwallet.AnchorResolutions,
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mockChannel) ForceCloseChan() (*lnwallet.LocalForceCloseSummary, error) {
|
func (m *mockChannel) ForceCloseChan() (*lnwallet.LocalForceCloseSummary, error) {
|
||||||
|
if m.forceCloseErr != nil {
|
||||||
|
return nil, m.forceCloseErr
|
||||||
|
}
|
||||||
|
|
||||||
summary := &lnwallet.LocalForceCloseSummary{
|
summary := &lnwallet.LocalForceCloseSummary{
|
||||||
CloseTx: &wire.MsgTx{},
|
CloseTx: &wire.MsgTx{},
|
||||||
HtlcResolutions: &lnwallet.HtlcResolutions{},
|
HtlcResolutions: &lnwallet.HtlcResolutions{},
|
||||||
|
|
|
@ -71,6 +71,10 @@ fails](https://github.com/lightningnetwork/lnd/pull/7876).
|
||||||
retried](https://github.com/lightningnetwork/lnd/pull/7927) with an
|
retried](https://github.com/lightningnetwork/lnd/pull/7927) with an
|
||||||
exponential back off.
|
exponential back off.
|
||||||
|
|
||||||
|
* `lnd` [now properly handles a case where an erroneous force close attempt
|
||||||
|
would impeded start up](https://github.com/lightningnetwork/lnd/pull/7985).
|
||||||
|
|
||||||
|
|
||||||
# New Features
|
# New Features
|
||||||
## Functional Enhancements
|
## Functional Enhancements
|
||||||
### Protocol Features
|
### Protocol Features
|
||||||
|
|
|
@ -116,6 +116,12 @@ var (
|
||||||
// ErrRevLogDataMissing is returned when a certain wanted optional field
|
// ErrRevLogDataMissing is returned when a certain wanted optional field
|
||||||
// in a revocation log entry is missing.
|
// in a revocation log entry is missing.
|
||||||
ErrRevLogDataMissing = errors.New("revocation log data missing")
|
ErrRevLogDataMissing = errors.New("revocation log data missing")
|
||||||
|
|
||||||
|
// ErrForceCloseLocalDataLoss is returned in the case a user (or
|
||||||
|
// another sub-system) attempts to force close when we've detected that
|
||||||
|
// we've likely lost data ourselves.
|
||||||
|
ErrForceCloseLocalDataLoss = errors.New("cannot force close " +
|
||||||
|
"channel with local data loss")
|
||||||
)
|
)
|
||||||
|
|
||||||
// ErrCommitSyncLocalDataLoss is returned in the case that we receive a valid
|
// ErrCommitSyncLocalDataLoss is returned in the case that we receive a valid
|
||||||
|
@ -124,7 +130,7 @@ var (
|
||||||
// height. This means we have lost some critical data, and must fail the
|
// height. This means we have lost some critical data, and must fail the
|
||||||
// channel and MUST NOT force close it. Instead we should wait for the remote
|
// channel and MUST NOT force close it. Instead we should wait for the remote
|
||||||
// to force close it, such that we can attempt to sweep our funds. The
|
// to force close it, such that we can attempt to sweep our funds. The
|
||||||
// commitment point needed to sweep the remote's force close is encapsuled.
|
// commitment point needed to sweep the remote's force close is encapsulated.
|
||||||
type ErrCommitSyncLocalDataLoss struct {
|
type ErrCommitSyncLocalDataLoss struct {
|
||||||
// ChannelPoint is the identifier for the channel that experienced data
|
// ChannelPoint is the identifier for the channel that experienced data
|
||||||
// loss.
|
// loss.
|
||||||
|
@ -7308,8 +7314,6 @@ type LocalForceCloseSummary struct {
|
||||||
// outputs within the commitment transaction.
|
// outputs within the commitment transaction.
|
||||||
//
|
//
|
||||||
// TODO(roasbeef): all methods need to abort if in dispute state
|
// TODO(roasbeef): all methods need to abort if in dispute state
|
||||||
// TODO(roasbeef): method to generate CloseSummaries for when the remote peer
|
|
||||||
// does a unilateral close
|
|
||||||
func (lc *LightningChannel) ForceClose() (*LocalForceCloseSummary, error) {
|
func (lc *LightningChannel) ForceClose() (*LocalForceCloseSummary, error) {
|
||||||
lc.Lock()
|
lc.Lock()
|
||||||
defer lc.Unlock()
|
defer lc.Unlock()
|
||||||
|
@ -7318,8 +7322,9 @@ func (lc *LightningChannel) ForceClose() (*LocalForceCloseSummary, error) {
|
||||||
// allow a force close, as it may be the case that we have a dated
|
// allow a force close, as it may be the case that we have a dated
|
||||||
// version of the commitment, or this is actually a channel shell.
|
// version of the commitment, or this is actually a channel shell.
|
||||||
if lc.channelState.HasChanStatus(channeldb.ChanStatusLocalDataLoss) {
|
if lc.channelState.HasChanStatus(channeldb.ChanStatusLocalDataLoss) {
|
||||||
return nil, fmt.Errorf("cannot force close channel with "+
|
return nil, fmt.Errorf("%w: channel_state=%v",
|
||||||
"state: %v", lc.channelState.ChanStatus())
|
ErrForceCloseLocalDataLoss,
|
||||||
|
lc.channelState.ChanStatus())
|
||||||
}
|
}
|
||||||
|
|
||||||
commitTx, err := lc.getSignedCommitTx()
|
commitTx, err := lc.getSignedCommitTx()
|
||||||
|
|
|
@ -7417,10 +7417,7 @@ func TestForceCloseFailLocalDataLoss(t *testing.T) {
|
||||||
// channel, we should fail as it isn't safe to force close a
|
// channel, we should fail as it isn't safe to force close a
|
||||||
// channel that isn't in the pure default state.
|
// channel that isn't in the pure default state.
|
||||||
_, err = aliceChannel.ForceClose()
|
_, err = aliceChannel.ForceClose()
|
||||||
if err == nil {
|
require.ErrorIs(t, err, ErrForceCloseLocalDataLoss)
|
||||||
t.Fatalf("expected force close to fail due to non-default " +
|
|
||||||
"chan state")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestForceCloseBorkedState tests that once we force close a channel, it's
|
// TestForceCloseBorkedState tests that once we force close a channel, it's
|
||||||
|
|
Loading…
Add table
Reference in a new issue