From f7a890ca35350ad3bb03d937512324a3073d71f1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Jul 2019 11:41:18 +0930 Subject: [PATCH] lightningd: add slow_reconnect flag for transient failure. We normally reconnect after 1 second: have a flag to say wait for 60. This will be used in the next patch which handles "soft" errors. Signed-off-by: Rusty Russell Header from folded patch 'channel_fail_transient_slowretry.patch': fixup! lightningd: add slow_reconnect flag for transient failure. @ZmnSCPxj points out that function is unsafe, since omitting the bool parameter still compiled. Make it two separate functions, each with a distinctive name so every caller has to be fixed. Signed-off-by: Rusty Russell --- lightningd/channel.c | 34 +++++++++++++++++++++++----------- lightningd/channel.h | 8 ++++++-- lightningd/channel_control.c | 3 ++- lightningd/closing_control.c | 3 ++- lightningd/connect_control.c | 2 +- lightningd/peer_control.c | 12 ++++++------ 6 files changed, 40 insertions(+), 22 deletions(-) diff --git a/lightningd/channel.c b/lightningd/channel.c index f441a9c89..e47a85161 100644 --- a/lightningd/channel.c +++ b/lightningd/channel.c @@ -420,17 +420,12 @@ void channel_set_billboard(struct channel *channel, bool perm, const char *str) } } -void channel_fail_transient(struct channel *channel, const char *fmt, ...) +static void err_and_reconnect(struct channel *channel, + const char *why, + u32 seconds_before_reconnect) { - va_list ap; - const char *why; - - va_start(ap, fmt); - why = tal_vfmt(channel, fmt, ap); - va_end(ap); log_info(channel->log, "Peer transient failure in %s: %s", channel_state_name(channel), why); - tal_free(why); #if DEVELOPER if (dev_disconnect_permanent(channel->peer->ld)) { @@ -441,7 +436,24 @@ void channel_fail_transient(struct channel *channel, const char *fmt, ...) channel_set_owner(channel, NULL); - /* Reconnect after 1 second: prevents some spurious reconnects - * during tests. */ - delay_then_reconnect(channel, 1, &channel->peer->addr); + delay_then_reconnect(channel, seconds_before_reconnect, + &channel->peer->addr); +} + +void channel_fail_reconnect_later(struct channel *channel, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + err_and_reconnect(channel, tal_vfmt(tmpctx, fmt, ap), 60); + va_end(ap); +} + +void channel_fail_reconnect(struct channel *channel, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + err_and_reconnect(channel, tal_vfmt(tmpctx, fmt, ap), 1); + va_end(ap); } diff --git a/lightningd/channel.h b/lightningd/channel.h index 7dce3a5bc..bb78abff3 100644 --- a/lightningd/channel.h +++ b/lightningd/channel.h @@ -175,8 +175,12 @@ const char *channel_state_str(enum channel_state state); void channel_set_owner(struct channel *channel, struct subd *owner); /* Channel has failed, but can try again. */ -PRINTF_FMT(2,3) void channel_fail_transient(struct channel *channel, - const char *fmt,...); +PRINTF_FMT(2,3) void channel_fail_reconnect(struct channel *channel, + const char *fmt, ...); +/* Channel has failed, but can try again after a minute. */ +PRINTF_FMT(2,3) void channel_fail_reconnect_later(struct channel *channel, + const char *fmt,...); + /* Channel has failed, give up on it. */ void channel_fail_permanent(struct channel *channel, const char *fmt, ...); /* Permanent error, but due to internal problems, not peer. */ diff --git a/lightningd/channel_control.c b/lightningd/channel_control.c index 4c8c6f017..f98cd3c06 100644 --- a/lightningd/channel_control.c +++ b/lightningd/channel_control.c @@ -317,7 +317,8 @@ void peer_start_channeld(struct channel *channel, if (!channel->owner) { log_unusual(channel->log, "Could not subdaemon channel: %s", strerror(errno)); - channel_fail_transient(channel, "Failed to subdaemon channel"); + channel_fail_reconnect_later(channel, + "Failed to subdaemon channel"); return; } diff --git a/lightningd/closing_control.c b/lightningd/closing_control.c index b7eb56221..0429249ac 100644 --- a/lightningd/closing_control.c +++ b/lightningd/closing_control.c @@ -191,7 +191,8 @@ void peer_start_closingd(struct channel *channel, if (!channel->owner) { log_unusual(channel->log, "Could not subdaemon closing: %s", strerror(errno)); - channel_fail_transient(channel, "Failed to subdaemon closing"); + channel_fail_reconnect_later(channel, + "Failed to subdaemon closing"); return; } diff --git a/lightningd/connect_control.c b/lightningd/connect_control.c index 28951fcaf..9cc16dd97 100644 --- a/lightningd/connect_control.c +++ b/lightningd/connect_control.c @@ -278,7 +278,7 @@ static void peer_please_disconnect(struct lightningd *ld, const u8 *msg) if (uc) kill_uncommitted_channel(uc, "Reconnected"); else if (c) - channel_fail_transient(c, "Reconnected"); + channel_fail_reconnect(c, "Reconnected"); } static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fds) diff --git a/lightningd/peer_control.c b/lightningd/peer_control.c index f2cf1b628..98430f7f9 100644 --- a/lightningd/peer_control.c +++ b/lightningd/peer_control.c @@ -396,7 +396,7 @@ void channel_errmsg(struct channel *channel, /* No per_peer_state means a subd crash or disconnection. */ if (!pps) { - channel_fail_transient(channel, "%s: %s", + channel_fail_reconnect(channel, "%s: %s", channel->owner->name, desc); return; } @@ -1022,10 +1022,10 @@ static enum watch_result funding_depth_cb(struct lightningd *ld, } else if (!short_channel_id_eq(channel->scid, &scid)) { /* This normally restarts channeld, initialized with updated scid * and also adds it (at least our halve_chan) to rtable. */ - channel_fail_transient(channel, - "short_channel_id changed to %s (was %s)", - short_channel_id_to_str(tmpctx, &scid), - short_channel_id_to_str(tmpctx, channel->scid)); + channel_fail_reconnect(channel, + "short_channel_id changed to %s (was %s)", + short_channel_id_to_str(tmpctx, &scid), + short_channel_id_to_str(tmpctx, channel->scid)); *channel->scid = scid; wallet_channel_save(ld->wallet, channel); @@ -1373,7 +1373,7 @@ static struct command_result *json_disconnect(struct command *cmd, channel = peer_active_channel(peer); if (channel) { if (*force) { - channel_fail_transient(channel, + channel_fail_reconnect(channel, "disconnect command force=true"); return command_success(cmd, json_stream_success(cmd)); }