mirror of
https://github.com/ElementsProject/lightning.git
synced 2025-02-21 14:24:09 +01:00
channeld: treat all incoming errors as "soft", so we retry.
We still close the channel if we *send* an error, but we seem to have hit another case where LND sends an error which seems transient, so this will make a best-effort attempt to preserve our channel in that case. Some test have to be modified, since they don't terminate as they did previously :( Changelog-Changed: quirks: We'll now reconnect and retry if we get an error on an established channel. This works around lnd sending error messages that may be non-fatal. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
parent
839909d2cf
commit
1d0c433dc4
10 changed files with 41 additions and 30 deletions
|
@ -1824,7 +1824,9 @@ static void peer_in(struct peer *peer, const u8 *msg)
|
|||
return;
|
||||
}
|
||||
|
||||
if (handle_peer_gossip_or_error(peer->pps, &peer->channel_id, msg))
|
||||
/* Since LND seems to send errors which aren't actually fatal events,
|
||||
* we treat errors here as soft. */
|
||||
if (handle_peer_gossip_or_error(peer->pps, &peer->channel_id, true, msg))
|
||||
return;
|
||||
|
||||
/* Must get funding_locked before almost anything. */
|
||||
|
@ -2322,7 +2324,8 @@ static void peer_reconnect(struct peer *peer,
|
|||
do {
|
||||
clean_tmpctx();
|
||||
msg = sync_crypto_read(tmpctx, peer->pps);
|
||||
} while (handle_peer_gossip_or_error(peer->pps, &peer->channel_id, msg)
|
||||
} while (handle_peer_gossip_or_error(peer->pps, &peer->channel_id, true,
|
||||
msg)
|
||||
|| capture_premature_msg(&premature_msgs, msg));
|
||||
|
||||
if (peer->channel->option_static_remotekey) {
|
||||
|
|
|
@ -103,7 +103,7 @@ static u8 *closing_read_peer_msg(const tal_t *ctx,
|
|||
handle_gossip_msg(pps, take(msg));
|
||||
continue;
|
||||
}
|
||||
if (!handle_peer_gossip_or_error(pps, channel_id, msg))
|
||||
if (!handle_peer_gossip_or_error(pps, channel_id, false, msg))
|
||||
return msg;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,25 +52,15 @@ void peer_failed(struct per_peer_state *pps,
|
|||
/* We're failing because peer sent us an error message */
|
||||
void peer_failed_received_errmsg(struct per_peer_state *pps,
|
||||
const char *desc,
|
||||
const struct channel_id *channel_id)
|
||||
const struct channel_id *channel_id,
|
||||
bool soft_error)
|
||||
{
|
||||
static const struct channel_id all_channels;
|
||||
u8 *msg;
|
||||
bool sync_error;
|
||||
|
||||
/* <+roasbeef> rusty: sync error can just be a timing thing
|
||||
* <+roasbeef> andn doesn't always mean that we can't continue forwrd,
|
||||
* or y'all sent the wrong info
|
||||
*/
|
||||
/* So while LND is sitting in the corner eating paint, back off. */
|
||||
sync_error = strstr(desc, "sync error");
|
||||
if (sync_error)
|
||||
status_unusual("Peer said '%s' so we'll come back later",
|
||||
desc);
|
||||
|
||||
if (!channel_id)
|
||||
channel_id = &all_channels;
|
||||
msg = towire_status_peer_error(NULL, channel_id, desc, sync_error, pps,
|
||||
msg = towire_status_peer_error(NULL, channel_id, desc, soft_error, pps,
|
||||
NULL);
|
||||
peer_billboard(true, "Received error from peer: %s", desc);
|
||||
peer_fatal_continue(take(msg), pps);
|
||||
|
|
|
@ -22,7 +22,9 @@ void peer_failed(struct per_peer_state *pps,
|
|||
* channel_id means all channels. */
|
||||
void peer_failed_received_errmsg(struct per_peer_state *pps,
|
||||
const char *desc,
|
||||
const struct channel_id *channel_id) NORETURN;
|
||||
const struct channel_id *channel_id,
|
||||
bool soft_error)
|
||||
NORETURN;
|
||||
|
||||
/* I/O error */
|
||||
void peer_failed_connection_lost(void) NORETURN;
|
||||
|
|
|
@ -148,6 +148,7 @@ bool handle_timestamp_filter(struct per_peer_state *pps, const u8 *msg TAKES)
|
|||
|
||||
bool handle_peer_gossip_or_error(struct per_peer_state *pps,
|
||||
const struct channel_id *channel_id,
|
||||
bool soft_error,
|
||||
const u8 *msg TAKES)
|
||||
{
|
||||
char *err;
|
||||
|
@ -176,7 +177,8 @@ bool handle_peer_gossip_or_error(struct per_peer_state *pps,
|
|||
if (err)
|
||||
peer_failed_received_errmsg(pps, err,
|
||||
all_channels
|
||||
? NULL : channel_id);
|
||||
? NULL : channel_id,
|
||||
soft_error);
|
||||
|
||||
/* Ignore unknown channel errors. */
|
||||
goto handled;
|
||||
|
|
|
@ -58,6 +58,7 @@ bool is_wrong_channel(const u8 *msg, const struct channel_id *expected,
|
|||
* handle_peer_gossip_or_error - simple handler for all the above cases.
|
||||
* @pps: per-peer state.
|
||||
* @channel_id: the channel id of the current channel.
|
||||
* @soft_error: tell lightningd that incoming error is non-fatal.
|
||||
* @msg: the peer message (only taken if returns true).
|
||||
*
|
||||
* This returns true if it handled the packet: a gossip packet (forwarded
|
||||
|
@ -66,6 +67,7 @@ bool is_wrong_channel(const u8 *msg, const struct channel_id *expected,
|
|||
*/
|
||||
bool handle_peer_gossip_or_error(struct per_peer_state *pps,
|
||||
const struct channel_id *channel_id,
|
||||
bool soft_error,
|
||||
const u8 *msg TAKES);
|
||||
|
||||
/**
|
||||
|
|
|
@ -432,7 +432,7 @@ static u8 *opening_negotiate_msg(const tal_t *ctx, struct state *state,
|
|||
wire_sync_write(REQ_FD, take(msg));
|
||||
}
|
||||
peer_failed_received_errmsg(state->pps, err,
|
||||
NULL);
|
||||
NULL, false);
|
||||
}
|
||||
negotiation_aborted(state, am_funder,
|
||||
tal_fmt(tmpctx, "They sent error %s",
|
||||
|
@ -1283,7 +1283,7 @@ static u8 *handle_peer_in(struct state *state)
|
|||
|
||||
/* Handles standard cases, and legal unknown ones. */
|
||||
if (handle_peer_gossip_or_error(state->pps,
|
||||
&state->channel_id, msg))
|
||||
&state->channel_id, false, msg))
|
||||
return NULL;
|
||||
|
||||
sync_crypto_write(state->pps,
|
||||
|
|
|
@ -1608,7 +1608,7 @@ def test_shutdown(node_factory):
|
|||
|
||||
@flaky
|
||||
@unittest.skipIf(not DEVELOPER, "needs to set upfront_shutdown_script")
|
||||
def test_option_upfront_shutdown_script(node_factory, bitcoind):
|
||||
def test_option_upfront_shutdown_script(node_factory, bitcoind, executor):
|
||||
l1 = node_factory.get_node(start=False)
|
||||
# Insist on upfront script we're not going to match.
|
||||
l1.daemon.env["DEV_OPENINGD_UPFRONT_SHUTDOWN_SCRIPT"] = "76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac"
|
||||
|
@ -1618,14 +1618,16 @@ def test_option_upfront_shutdown_script(node_factory, bitcoind):
|
|||
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
|
||||
l1.fund_channel(l2, 1000000, False)
|
||||
|
||||
l1.rpc.close(l2.info['id'])
|
||||
# This will block, as l12 will send an error but l2 will retry.
|
||||
fut = executor.submit(l1.rpc.close, l2.info['id'])
|
||||
|
||||
# l2 will close unilaterally when it dislikes shutdown script.
|
||||
l1.daemon.wait_for_log(r'received ERROR.*scriptpubkey .* is not as agreed upfront \(76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac\)')
|
||||
l1.daemon.wait_for_log(r'scriptpubkey .* is not as agreed upfront \(76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac\)')
|
||||
|
||||
# Clear channel.
|
||||
wait_for(lambda: len(bitcoind.rpc.getrawmempool()) != 0)
|
||||
bitcoind.generate_block(1)
|
||||
fut.result(TIMEOUT)
|
||||
wait_for(lambda: [c['state'] for c in only_one(l1.rpc.listpeers()['peers'])['channels']] == ['ONCHAIN'])
|
||||
wait_for(lambda: [c['state'] for c in only_one(l2.rpc.listpeers()['peers'])['channels']] == ['ONCHAIN'])
|
||||
|
||||
|
@ -1636,7 +1638,7 @@ def test_option_upfront_shutdown_script(node_factory, bitcoind):
|
|||
l2.rpc.close(l1.info['id'])
|
||||
|
||||
# l2 will close unilaterally when it dislikes shutdown script.
|
||||
l1.daemon.wait_for_log(r'received ERROR.*scriptpubkey .* is not as agreed upfront \(76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac\)')
|
||||
l1.daemon.wait_for_log(r'scriptpubkey .* is not as agreed upfront \(76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac\)')
|
||||
|
||||
# Clear channel.
|
||||
wait_for(lambda: len(bitcoind.rpc.getrawmempool()) != 0)
|
||||
|
|
|
@ -943,6 +943,7 @@ def test_funding_by_utxos(node_factory, bitcoind):
|
|||
l1.rpc.fundchannel(l3.info["id"], int(0.01 * 10**8), utxos=utxos)
|
||||
|
||||
|
||||
@unittest.skipIf(not DEVELOPER, "needs dev_forget_channel")
|
||||
def test_funding_external_wallet_corners(node_factory, bitcoind):
|
||||
l1 = node_factory.get_node()
|
||||
l2 = node_factory.get_node()
|
||||
|
@ -1000,6 +1001,9 @@ def test_funding_external_wallet_corners(node_factory, bitcoind):
|
|||
# Canceld channel after fundchannel_complete
|
||||
assert l1.rpc.fundchannel_cancel(l2.info['id'])['cancelled']
|
||||
|
||||
# l2 won't give up, since it considers error "soft".
|
||||
l2.rpc.dev_forget_channel(l1.info['id'])
|
||||
|
||||
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
|
||||
l1.rpc.fundchannel_start(l2.info['id'], amount)['funding_address']
|
||||
assert l1.rpc.fundchannel_complete(l2.info['id'], prep['txid'], txout)['commitments_secured']
|
||||
|
@ -1431,7 +1435,7 @@ def test_update_fee(node_factory, bitcoind):
|
|||
|
||||
|
||||
@unittest.skipIf(not DEVELOPER, "needs DEVELOPER=1")
|
||||
def test_fee_limits(node_factory):
|
||||
def test_fee_limits(node_factory, bitcoind):
|
||||
l1, l2 = node_factory.line_graph(2, opts={'dev-max-fee-multiplier': 5, 'may_reconnect': True}, fundchannel=True)
|
||||
|
||||
# L1 asks for stupid low fee (will actually hit the floor of 253)
|
||||
|
@ -1439,11 +1443,13 @@ def test_fee_limits(node_factory):
|
|||
l1.set_feerates((15, 15, 15), False)
|
||||
l1.start()
|
||||
|
||||
l1.daemon.wait_for_log('Peer permanent failure in CHANNELD_NORMAL: channeld: received ERROR channel .*: update_fee 253 outside range 1875-75000')
|
||||
l1.daemon.wait_for_log('Peer transient failure in CHANNELD_NORMAL: channeld: .*: update_fee 253 outside range 1875-75000')
|
||||
# Make sure the resolution of this one doesn't interfere with the next!
|
||||
# Note: may succeed, may fail with insufficient fee, depending on how
|
||||
# bitcoind feels!
|
||||
l1.daemon.wait_for_log('sendrawtx exit')
|
||||
l2.daemon.wait_for_log('sendrawtx exit')
|
||||
bitcoind.generate_block(1)
|
||||
sync_blockheight(bitcoind, [l1, l2])
|
||||
|
||||
# Trying to open a channel with too low a fee-rate is denied
|
||||
l4 = node_factory.get_node()
|
||||
|
|
|
@ -248,16 +248,20 @@ def test_pay_disconnect(node_factory, bitcoind):
|
|||
l1.set_feerates((10**6, 1000**6, 1000**6), False)
|
||||
|
||||
# Wait for l1 notice
|
||||
l1.daemon.wait_for_log(r'Peer permanent failure in CHANNELD_NORMAL: channeld: received ERROR channel .*: update_fee \d+ outside range 1875-75000')
|
||||
l1.daemon.wait_for_log(r'Peer transient failure in CHANNELD_NORMAL: channeld: .*: update_fee \d+ outside range 1875-75000')
|
||||
|
||||
# l2 fails hard.
|
||||
l2.daemon.wait_for_log('sendrawtx exit')
|
||||
bitcoind.generate_block(1, wait_for_mempool=1)
|
||||
sync_blockheight(bitcoind, [l1, l2])
|
||||
|
||||
# Should fail due to permenant channel fail
|
||||
with pytest.raises(RpcError, match=r'failed: WIRE_UNKNOWN_NEXT_PEER \(First peer not ready\)'):
|
||||
with pytest.raises(RpcError, match=r'WIRE_UNKNOWN_NEXT_PEER'):
|
||||
l1.rpc.sendpay(route, rhash)
|
||||
|
||||
assert not l1.daemon.is_in_log('Payment is still in progress')
|
||||
|
||||
# After it sees block, someone should close channel.
|
||||
bitcoind.generate_block(1)
|
||||
l1.daemon.wait_for_log('ONCHAIN')
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue