lightningd: disconnect on *any* transient error, except abort

Not just if htlc addition is too slow, make this the default.  dual-open's txabort
is excluded, however.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2023-10-02 09:29:48 +10:30
parent 8dcfe1a75c
commit fe0959fd31
7 changed files with 27 additions and 25 deletions

View file

@ -1036,7 +1036,7 @@ void channel_set_billboard(struct channel *channel, bool perm, const char *str)
}
}
static void channel_err(struct channel *channel, const char *why)
static void channel_err(struct channel *channel, bool disconnect, const char *why)
{
/* Nothing to do if channel isn't actually owned! */
if (!channel->owner)
@ -1053,14 +1053,22 @@ static void channel_err(struct channel *channel, const char *why)
}
channel_set_owner(channel, NULL);
/* Force a disconnect in case the issue is with TCP */
if (disconnect && channel->peer->ld->connectd) {
const struct peer *peer = channel->peer;
subd_send_msg(peer->ld->connectd,
take(towire_connectd_discard_peer(NULL, &peer->id,
peer->connectd_counter)));
}
}
void channel_fail_transient(struct channel *channel, const char *fmt, ...)
void channel_fail_transient(struct channel *channel, bool disconnect, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
channel_err(channel, tal_vfmt(tmpctx, fmt, ap));
channel_err(channel, disconnect, tal_vfmt(tmpctx, fmt, ap));
va_end(ap);
}

View file

@ -413,9 +413,10 @@ bool channel_state_closish(enum channel_state channel_state);
void channel_set_owner(struct channel *channel, struct subd *owner);
/* Channel has failed, but can try again. */
/* Channel has failed, but can try again. Usually, set disconnect to true. */
void channel_fail_transient(struct channel *channel,
const char *fmt, ...) PRINTF_FMT(2,3);
bool disconnect,
const char *fmt, ...) PRINTF_FMT(3, 4);
/* Channel has failed, give up on it. */
void channel_fail_permanent(struct channel *channel,

View file

@ -922,7 +922,7 @@ static void peer_got_shutdown(struct channel *channel, const u8 *msg)
&channel->peer->id,
channel->peer->connectd_counter,
warning)));
channel_fail_transient(channel, "Bad shutdown scriptpubkey %s",
channel_fail_transient(channel, true, "Bad shutdown scriptpubkey %s",
tal_hex(tmpctx, scriptpubkey));
return;
}

View file

@ -48,7 +48,7 @@ static void channel_disconnect(struct channel *channel,
log_(channel->log, level, NULL, false, "%s", desc);
channel_cleanup_commands(channel, desc);
channel_fail_transient(channel, "%s: %s",
channel_fail_transient(channel, true, "%s: %s",
channel->owner ?
channel->owner->name :
"dualopend-dead",
@ -1415,7 +1415,7 @@ static void handle_peer_wants_to_close(struct subd *dualopend,
&channel->peer->id,
channel->peer->connectd_counter,
warning)));
channel_fail_transient(channel, "Bad shutdown scriptpubkey %s",
channel_fail_transient(channel, true, "Bad shutdown scriptpubkey %s",
tal_hex(tmpctx, scriptpubkey));
return;
}
@ -3607,7 +3607,7 @@ static void dualopen_errmsg(struct channel *channel,
/* No peer_fd means a subd crash or disconnection. */
if (!peer_fd) {
/* If the channel is unsaved, we forget it */
channel_fail_transient(channel, "%s: %s",
channel_fail_transient(channel, true, "%s: %s",
channel->owner->name, desc);
return;
}
@ -3621,7 +3621,8 @@ static void dualopen_errmsg(struct channel *channel,
* and we would close the channel on them. We now support warnings
* for this case. */
if (warning || aborted) {
channel_fail_transient(channel, "%s %s: %s",
/* We *don't* hang up if they aborted: that's fine! */
channel_fail_transient(channel, !aborted, "%s %s: %s",
channel->owner->name,
warning ? "WARNING" : "ABORTED",
desc);

View file

@ -178,7 +178,7 @@ static void peer_channels_cleanup(struct lightningd *ld,
c = channels[i];
if (channel_active(c)) {
channel_cleanup_commands(c, "Disconnected");
channel_fail_transient(c, "Disconnected");
channel_fail_transient(c, true, "Disconnected");
} else if (channel_unsaved(c)) {
channel_unsaved_close_conn(c, "Disconnected");
}
@ -391,7 +391,7 @@ void channel_errmsg(struct channel *channel,
/* No peer_fd means a subd crash or disconnection. */
if (!peer_fd) {
/* If the channel is unsaved, we forget it */
channel_fail_transient(channel, "%s: %s",
channel_fail_transient(channel, true, "%s: %s",
channel->owner->name, desc);
return;
}
@ -405,7 +405,7 @@ void channel_errmsg(struct channel *channel,
* would recover after a reconnect. So we downgrade, but snark
* about it in the logs. */
if (!err_for_them && strends(desc, "internal error")) {
channel_fail_transient(channel, "%s: %s",
channel_fail_transient(channel, true, "%s: %s",
channel->owner->name,
"lnd sent 'internal error':"
" let's give it some space");
@ -414,7 +414,7 @@ void channel_errmsg(struct channel *channel,
/* This is us, sending a warning. */
if (warning) {
channel_fail_transient(channel, "%s sent %s",
channel_fail_transient(channel, true, "%s sent %s",
channel->owner->name,
desc);
return;
@ -1922,7 +1922,7 @@ static enum watch_result funding_depth_cb(struct lightningd *ld,
warning)));
/* When we restart channeld, it will be initialized with updated scid
* and also adds it (at least our halve_chan) to rtable. */
channel_fail_transient(channel,
channel_fail_transient(channel, true,
"short_channel_id changed to %s (was %s)",
short_channel_id_to_str(tmpctx, &scid),
short_channel_id_to_str(tmpctx, channel->scid));

View file

@ -590,17 +590,8 @@ static void htlc_offer_timeout(struct htlc_out *out)
log_unusual(channel->owner->log,
"Adding HTLC %"PRIu64" too slow: killing connection",
out->key.id);
tal_free(channel->owner);
channel_set_billboard(channel, false,
channel_fail_transient(channel, true,
"Adding HTLC timed out: killed connection");
/* Force a disconnect in case the issue is with TCP */
if (channel->peer->ld->connectd) {
const struct peer *peer = channel->peer;
subd_send_msg(peer->ld->connectd,
take(towire_connectd_discard_peer(NULL, &peer->id,
peer->connectd_counter)));
}
}
/* Returns failmsg, or NULL on success. */

View file

@ -80,6 +80,7 @@ void channel_fail_permanent(struct channel *channel UNNEEDED,
{ fprintf(stderr, "channel_fail_permanent called!\n"); abort(); }
/* Generated stub for channel_fail_transient */
void channel_fail_transient(struct channel *channel UNNEEDED,
bool disconnect UNNEEDED,
const char *fmt UNNEEDED, ...)
{ fprintf(stderr, "channel_fail_transient called!\n"); abort(); }
/* Generated stub for channel_has_htlc_in */