diff --git a/lightningd/channel.c b/lightningd/channel.c index b190aec5a..83adfc86a 100644 --- a/lightningd/channel.c +++ b/lightningd/channel.c @@ -929,9 +929,9 @@ void channel_set_billboard(struct channel *channel, bool perm, const char *str) } } -static void err_and_reconnect(struct channel *channel, - const char *why, - u32 seconds_before_reconnect) +static void channel_err(struct channel *channel, + const char *why, + u32 seconds_before_reconnect /* FIXME: use this! */) { log_info(channel->log, "Peer transient failure in %s: %s", channel_state_name(channel), why); @@ -946,29 +946,23 @@ static void err_and_reconnect(struct channel *channel, #endif channel_set_owner(channel, NULL); - - /* Their address only useful if we connected to them */ - try_reconnect(channel, channel->peer, seconds_before_reconnect, - channel->peer->connected_incoming - ? NULL - : &channel->peer->addr); } -void channel_fail_reconnect_later(struct channel *channel, const char *fmt, ...) +void channel_fail_transient_delayreconnect(struct channel *channel, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - err_and_reconnect(channel, tal_vfmt(tmpctx, fmt, ap), 60); + channel_err(channel, tal_vfmt(tmpctx, fmt, ap), 60); va_end(ap); } -void channel_fail_reconnect(struct channel *channel, const char *fmt, ...) +void channel_fail_transient(struct channel *channel, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - err_and_reconnect(channel, tal_vfmt(tmpctx, fmt, ap), 1); + channel_err(channel, tal_vfmt(tmpctx, fmt, ap), 1); va_end(ap); } diff --git a/lightningd/channel.h b/lightningd/channel.h index c872bd1cc..a005f8017 100644 --- a/lightningd/channel.h +++ b/lightningd/channel.h @@ -374,11 +374,11 @@ const char *channel_state_str(enum channel_state state); void channel_set_owner(struct channel *channel, struct subd *owner); /* Channel has failed, but can try again. */ -void channel_fail_reconnect(struct channel *channel, +void channel_fail_transient(struct channel *channel, const char *fmt, ...) PRINTF_FMT(2,3); /* Channel has failed, but can try again after a minute. */ -void channel_fail_reconnect_later(struct channel *channel, - const char *fmt,...) PRINTF_FMT(2,3); +void channel_fail_transient_delayreconnect(struct channel *channel, + const char *fmt,...) PRINTF_FMT(2,3); /* Channel has failed, give up on it. */ void channel_fail_permanent(struct channel *channel, diff --git a/lightningd/channel_control.c b/lightningd/channel_control.c index 48d4f1dd7..d7bde69d8 100644 --- a/lightningd/channel_control.c +++ b/lightningd/channel_control.c @@ -319,7 +319,7 @@ static void peer_got_shutdown(struct channel *channel, const u8 *msg) &channel->peer->id, channel->peer->connectd_counter, warning))); - channel_fail_reconnect(channel, "Bad shutdown scriptpubkey %s", + channel_fail_transient(channel, "Bad shutdown scriptpubkey %s", tal_hex(tmpctx, scriptpubkey)); return; } @@ -638,8 +638,10 @@ bool peer_start_channeld(struct channel *channel, if (!channel->owner) { log_broken(channel->log, "Could not subdaemon channel: %s", strerror(errno)); - channel_fail_reconnect_later(channel, - "Failed to subdaemon channel"); + /* Disconnect it. */ + subd_send_msg(ld->connectd, + take(towire_connectd_discard_peer(NULL, &channel->peer->id, + channel->peer->connectd_counter))); return false; } diff --git a/lightningd/closing_control.c b/lightningd/closing_control.c index 47b9e39e4..057df16b7 100644 --- a/lightningd/closing_control.c +++ b/lightningd/closing_control.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -377,8 +378,10 @@ void peer_start_closingd(struct channel *channel, struct peer_fd *peer_fd) if (!channel->owner) { log_broken(channel->log, "Could not subdaemon closing: %s", strerror(errno)); - channel_fail_reconnect_later(channel, - "Failed to subdaemon closing"); + /* Disconnect it. */ + subd_send_msg(ld->connectd, + take(towire_connectd_discard_peer(NULL, &channel->peer->id, + channel->peer->connectd_counter))); return; } diff --git a/lightningd/dual_open_control.c b/lightningd/dual_open_control.c index 09f41fd75..ba8321f06 100644 --- a/lightningd/dual_open_control.c +++ b/lightningd/dual_open_control.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -46,14 +47,11 @@ static void channel_disconnect(struct channel *channel, log_(channel->log, level, NULL, false, "%s", desc); channel_cleanup_commands(channel, desc); - if (!reconnect) - channel_set_owner(channel, NULL); - else - channel_fail_reconnect(channel, "%s: %s", - channel->owner ? - channel->owner->name : - "dualopend-dead", - desc); + channel_fail_transient(channel, "%s: %s", + channel->owner ? + channel->owner->name : + "dualopend-dead", + desc); } void channel_unsaved_close_conn(struct channel *channel, const char *why) @@ -1179,6 +1177,7 @@ wallet_commit_channel(struct lightningd *ld, { struct amount_msat our_msat, lease_fee_msat; struct channel_inflight *inflight; + bool any_active = peer_any_active_channel(channel->peer, NULL); if (!amount_sat_to_msat(&our_msat, our_funding)) { log_broken(channel->log, "Unable to convert funds"); @@ -1292,6 +1291,14 @@ wallet_commit_channel(struct lightningd *ld, channel->push); wallet_inflight_add(ld->wallet, inflight); + /* We might have disconnected and decided we didn't need to + * reconnect because no channels are active. But the subd + * just made it active! */ + if (!any_active && channel->peer->connected == PEER_DISCONNECTED) { + try_reconnect(channel->peer, channel->peer, 1, + &channel->peer->addr); + } + return inflight; } @@ -1348,13 +1355,13 @@ static void handle_peer_wants_to_close(struct subd *dualopend, "Bad shutdown scriptpubkey %s", tal_hex(tmpctx, scriptpubkey)); - /* Get connectd to send warning, and then allow reconnect. */ + /* Get connectd to send warning, and kill subd. */ subd_send_msg(ld->connectd, take(towire_connectd_peer_final_msg(NULL, &channel->peer->id, channel->peer->connectd_counter, warning))); - channel_fail_reconnect(channel, "Bad shutdown scriptpubkey %s", + channel_fail_transient(channel, "Bad shutdown scriptpubkey %s", tal_hex(tmpctx, scriptpubkey)); return; } @@ -3408,8 +3415,10 @@ bool peer_restart_dualopend(struct peer *peer, if (!channel->owner) { log_broken(channel->log, "Could not subdaemon channel: %s", strerror(errno)); - channel_fail_reconnect_later(channel, - "Failed to subdaemon channel"); + /* Disconnect it. */ + subd_send_msg(peer->ld->connectd, + take(towire_connectd_discard_peer(NULL, &channel->peer->id, + channel->peer->connectd_counter))); return false; } diff --git a/lightningd/opening_control.c b/lightningd/opening_control.c index ec7ea81c5..effdd07d9 100644 --- a/lightningd/opening_control.c +++ b/lightningd/opening_control.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -100,6 +101,7 @@ wallet_commit_channel(struct lightningd *ld, u32 lease_start_blockheight = 0; /* No leases on v1 */ struct short_channel_id *alias_local; struct timeabs timestamp; + bool any_active = peer_any_active_channel(uc->peer, NULL); /* We cannot both be the fundee *and* have a `fundchannel_start` * command running! @@ -233,6 +235,15 @@ wallet_commit_channel(struct lightningd *ld, channel->state_change_cause, "new channel opened"); + + /* We might have disconnected and decided we didn't need to + * reconnect because no channels are active. But the subd + * just made it active! */ + if (!any_active && channel->peer->connected == PEER_DISCONNECTED) { + try_reconnect(channel->peer, channel->peer, 1, + &channel->peer->addr); + } + return channel; } diff --git a/lightningd/peer_control.c b/lightningd/peer_control.c index 3e0f78074..66a1a27fd 100644 --- a/lightningd/peer_control.c +++ b/lightningd/peer_control.c @@ -166,7 +166,7 @@ static void peer_channels_cleanup(struct lightningd *ld, c = channels[i]; if (channel_active(c)) { channel_cleanup_commands(c, "Disconnected"); - channel_fail_reconnect(c, "Disconnected"); + channel_fail_transient(c, "Disconnected"); } else if (channel_unsaved(c)) { channel_unsaved_close_conn(c, "Disconnected"); } @@ -357,7 +357,7 @@ void channel_errmsg(struct channel *channel, /* No peer_fd means a subd crash or disconnection. */ if (!peer_fd) { /* If the channel is unsaved, we forget it */ - channel_fail_reconnect(channel, "%s: %s", + channel_fail_transient(channel, "%s: %s", channel->owner->name, desc); return; } @@ -371,8 +371,8 @@ void channel_errmsg(struct channel *channel, * and we would close the channel on them. We now support warnings * for this case. */ if (warning) { - channel_fail_reconnect_later(channel, "%s WARNING: %s", - channel->owner->name, desc); + channel_fail_transient_delayreconnect(channel, "%s WARNING: %s", + channel->owner->name, desc); return; } @@ -1731,9 +1731,21 @@ static enum watch_result funding_depth_cb(struct lightningd *ld, } else if (!short_channel_id_eq(channel->scid, &scid) && !is_stub_scid(channel->scid)) { - /* This normally restarts channeld, initialized with updated scid + /* Send warning: that will make connectd disconnect, and then we'll + * try to reconnect. */ + u8 *warning = towire_warningfmt(tmpctx, &channel->cid, + "short_channel_id changed to %s (was %s)", + short_channel_id_to_str(tmpctx, &scid), + short_channel_id_to_str(tmpctx, channel->scid)); + if (channel->peer->connected != PEER_DISCONNECTED) + subd_send_msg(ld->connectd, + take(towire_connectd_peer_final_msg(NULL, + &channel->peer->id, + channel->peer->connectd_counter, + warning))); + /* When we restart channeld, it will be initialized with updated scid * and also adds it (at least our halve_chan) to rtable. */ - channel_fail_reconnect(channel, + channel_fail_transient_delayreconnect(channel, "short_channel_id changed to %s (was %s)", short_channel_id_to_str(tmpctx, &scid), short_channel_id_to_str(tmpctx, channel->scid)); diff --git a/lightningd/test/run-invoice-select-inchan.c b/lightningd/test/run-invoice-select-inchan.c index 05f66fe13..699564d66 100644 --- a/lightningd/test/run-invoice-select-inchan.c +++ b/lightningd/test/run-invoice-select-inchan.c @@ -68,14 +68,14 @@ void channel_fail_permanent(struct channel *channel UNNEEDED, const char *fmt UNNEEDED, ...) { fprintf(stderr, "channel_fail_permanent called!\n"); abort(); } -/* Generated stub for channel_fail_reconnect */ -void channel_fail_reconnect(struct channel *channel UNNEEDED, +/* Generated stub for channel_fail_transient */ +void channel_fail_transient(struct channel *channel UNNEEDED, const char *fmt UNNEEDED, ...) -{ fprintf(stderr, "channel_fail_reconnect called!\n"); abort(); } -/* Generated stub for channel_fail_reconnect_later */ -void channel_fail_reconnect_later(struct channel *channel UNNEEDED, - const char *fmt UNNEEDED,...) -{ fprintf(stderr, "channel_fail_reconnect_later called!\n"); abort(); } +{ fprintf(stderr, "channel_fail_transient called!\n"); abort(); } +/* Generated stub for channel_fail_transient_delayreconnect */ +void channel_fail_transient_delayreconnect(struct channel *channel UNNEEDED, + const char *fmt UNNEEDED,...) +{ fprintf(stderr, "channel_fail_transient_delayreconnect called!\n"); abort(); } /* Generated stub for channel_has_htlc_in */ struct htlc_in *channel_has_htlc_in(struct channel *channel UNNEEDED) { fprintf(stderr, "channel_has_htlc_in called!\n"); abort(); } diff --git a/tests/test_closing.py b/tests/test_closing.py index d8f8f58f7..763008a22 100644 --- a/tests/test_closing.py +++ b/tests/test_closing.py @@ -3667,8 +3667,7 @@ We send an HTLC, and peer unilaterally closes: do we close upstream? with pytest.raises(RpcError, match=r'WIRE_TEMPORARY_CHANNEL_FAILURE \(reply from remote\)'): l1.rpc.waitsendpay(ph2, timeout=TIMEOUT) - # l3 closes unilaterally. - wait_for(lambda: only_one(l3.rpc.listpeers(l2.info['id'])['peers'])['connected'] is False) + # Make close unilaterally. l3.rpc.close(l2.info['id'], 1) l3.daemon.wait_for_log('sendrawtransaction') diff --git a/tests/test_connection.py b/tests/test_connection.py index 0106efcde..be7eb5069 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -451,7 +451,8 @@ def test_disconnect_opener(node_factory): l1.rpc.connect(l2.info['id'], 'localhost', l2.port) with pytest.raises(RpcError): l1.rpc.fundchannel(l2.info['id'], 25000) - assert l1.rpc.getpeer(l2.info['id']) is None + # First peer valishes, but later it just disconnects + wait_for(lambda: all([p['connected'] is False for p in l1.rpc.listpeers()['peers']])) # This one will succeed. l1.rpc.connect(l2.info['id'], 'localhost', l2.port) @@ -495,7 +496,8 @@ def test_disconnect_fundee(node_factory): l1.rpc.connect(l2.info['id'], 'localhost', l2.port) with pytest.raises(RpcError): l1.rpc.fundchannel(l2.info['id'], 25000) - assert l1.rpc.getpeer(l2.info['id']) is None + # First peer valishes, but later it just disconnects + wait_for(lambda: all([p['connected'] is False for p in l1.rpc.listpeers()['peers']])) # This one will succeed. l1.rpc.connect(l2.info['id'], 'localhost', l2.port) @@ -541,8 +543,8 @@ def test_disconnect_fundee_v2(node_factory): l1.rpc.fundchannel(l2.info['id'], 25000) # Should still only have one peer! - assert len(l1.rpc.listpeers()) == 1 - assert len(l2.rpc.listpeers()) == 1 + assert len(l1.rpc.listpeers()['peers']) == 1 + assert len(l2.rpc.listpeers()['peers']) == 1 @pytest.mark.developer @@ -564,8 +566,8 @@ def test_disconnect_half_signed(node_factory): l1.rpc.fundchannel(l2.info['id'], 25000) # Peer remembers, opener doesn't. - assert l1.rpc.getpeer(l2.info['id']) is None - assert l2.rpc.getpeer(l1.info['id'])['id'] == l1.info['id'] + wait_for(lambda: l1.rpc.listpeers(l2.info['id'])['peers'] == []) + assert len(only_one(l2.rpc.listpeers(l1.info['id'])['peers'])['channels']) == 1 @pytest.mark.developer @@ -3606,7 +3608,8 @@ def test_upgrade_statickey_onchaind(node_factory, executor, bitcoind): l2.wait_for_onchaind_broadcast('OUR_PENALTY_TX', 'THEIR_REVOKED_UNILATERAL/DELAYED_CHEAT_OUTPUT_TO_THEM') bitcoind.generate_block(100) - wait_for(lambda: len(l2.rpc.listpeers()['peers']) == 0) + # This works even if they disconnect and listpeers() is empty: + wait_for(lambda: all([p['channels'] == [] for p in l2.rpc.listpeers()['peers']])) # TEST 2: Cheat from post-upgrade. node_factory.join_nodes([l1, l2]) @@ -3630,7 +3633,8 @@ def test_upgrade_statickey_onchaind(node_factory, executor, bitcoind): l2.wait_for_onchaind_broadcast('OUR_PENALTY_TX', 'THEIR_REVOKED_UNILATERAL/DELAYED_CHEAT_OUTPUT_TO_THEM') bitcoind.generate_block(100) - wait_for(lambda: len(l2.rpc.listpeers()['peers']) == 0) + # This works even if they disconnect and listpeers() is empty: + wait_for(lambda: all([p['channels'] == [] for p in l2.rpc.listpeers()['peers']])) # TEST 3: Unilateral close from pre-upgrade node_factory.join_nodes([l1, l2]) @@ -3658,7 +3662,8 @@ def test_upgrade_statickey_onchaind(node_factory, executor, bitcoind): bitcoind.generate_block(5) bitcoind.generate_block(100, wait_for_mempool=1) - wait_for(lambda: len(l2.rpc.listpeers()['peers']) == 0) + # This works even if they disconnect and listpeers() is empty: + wait_for(lambda: all([p['channels'] == [] for p in l2.rpc.listpeers()['peers']])) # TEST 4: Unilateral close from post-upgrade node_factory.join_nodes([l1, l2]) @@ -3683,7 +3688,8 @@ def test_upgrade_statickey_onchaind(node_factory, executor, bitcoind): bitcoind.generate_block(5) bitcoind.generate_block(100, wait_for_mempool=1) - wait_for(lambda: len(l2.rpc.listpeers()['peers']) == 0) + # This works even if they disconnect and listpeers() is empty: + wait_for(lambda: all([p['channels'] == [] for p in l2.rpc.listpeers()['peers']])) @unittest.skipIf(not EXPERIMENTAL_FEATURES, "upgrade protocol not available") diff --git a/tests/test_misc.py b/tests/test_misc.py index 630e110f6..b1946f1f8 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -1095,7 +1095,8 @@ def test_funding_reorg_private(node_factory, bitcoind): opts = {'funding-confirms': 2, 'rescan': 10, 'may_reconnect': True, 'allow_bad_gossip': True, # gossipd send lightning update for original channel. - 'allow_broken_log': True} + 'allow_broken_log': True, + 'allow_warning': True} l1, l2 = node_factory.line_graph(2, fundchannel=False, opts=opts) l1.fundwallet(10000000) sync_blockheight(bitcoind, [l1]) # height 102 @@ -1138,7 +1139,8 @@ def test_funding_reorg_remote_lags(node_factory, bitcoind): """Nodes may disagree about short_channel_id before channel announcement """ # may_reconnect so channeld will restart; bad gossip can happen due to reorg - opts = {'funding-confirms': 1, 'may_reconnect': True, 'allow_bad_gossip': True} + opts = {'funding-confirms': 1, 'may_reconnect': True, 'allow_bad_gossip': True, + 'allow_warning': True} l1, l2 = node_factory.line_graph(2, fundchannel=False, opts=opts) l1.fundwallet(10000000) sync_blockheight(bitcoind, [l1]) # height 102 diff --git a/tests/test_opening.py b/tests/test_opening.py index 29f3ee54c..ad01674e5 100644 --- a/tests/test_opening.py +++ b/tests/test_opening.py @@ -129,10 +129,6 @@ def test_multifunding_v2_best_effort(node_factory, bitcoind): for node in node_list: node.daemon.wait_for_log(r'to CLOSINGD_COMPLETE') - # Make sure disconnections are complete - if not failed_sign: - wait_for(lambda: all([c['connected'] is False for c in l1.rpc.listpeers()['peers']])) - # With 2 down, it will fail to fund channel l2.stop() l3.stop()