lightningd: attach HTLC timeout to htlc itself, fix gratuitous disconnect bug.

We set the timeout on first HTLC, but didn't clear it if that HTLC failed.

It's saner to have a per-HTLC timeout (since that's what it is!) and
also our timer infra is specially coded to scale approximately infinitely so
trying to optimize this is vastly premature.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Changelog-Fixed: Protocol: We would sometimes gratuitously disconnect 30 seconds after an HTLC failed.
This commit is contained in:
Rusty Russell 2021-05-21 11:45:54 +09:30
parent 11180e7aa1
commit 33736b860a
7 changed files with 22 additions and 19 deletions

View File

@ -214,7 +214,6 @@ struct channel *new_unsaved_channel(struct peer *peer,
/* A zero value database id means it's not saved in the database yet */
channel->dbid = 0;
channel->error = NULL;
channel->htlc_timeout = NULL;
channel->openchannel_signed_cmd = NULL;
channel->state = DUALOPEND_OPEN_INIT;
channel->owner = NULL;
@ -345,7 +344,6 @@ struct channel *new_channel(struct peer *peer, u64 dbid,
channel->dbid = dbid;
channel->unsaved_dbid = 0;
channel->error = NULL;
channel->htlc_timeout = NULL;
channel->open_attempt = NULL;
channel->openchannel_signed_cmd = NULL;
if (their_shachain)

View File

@ -132,9 +132,6 @@ struct channel {
struct amount_msat msat_to_us_min;
struct amount_msat msat_to_us_max;
/* Timer we use in case they don't add an HTLC in a timely manner. */
struct oneshot *htlc_timeout;
/* Last tx they gave us. */
struct bitcoin_tx *last_tx;
enum wallet_tx_type last_tx_type;

View File

@ -297,6 +297,7 @@ struct htlc_out *new_htlc_out(const tal_t *ctx,
hout->failmsg = NULL;
hout->failonion = NULL;
hout->preimage = NULL;
hout->timeout = NULL;
if (blinding)
hout->blinding = tal_dup(hout, struct pubkey, blinding);

View File

@ -93,6 +93,9 @@ struct htlc_out {
/* Blinding to send alongside, if any. */
struct pubkey *blinding;
/* Timer we use in case they don't add an HTLC in a timely manner. */
struct oneshot *timeout;
};
static inline const struct htlc_key *keyof_htlc_in(const struct htlc_in *in)

View File

@ -600,17 +600,22 @@ static void rcvd_htlc_reply(struct subd *subd, const u8 *msg, const int *fds UNU
/* When channeld includes it in commitment, we'll make it persistent. */
}
static void htlc_offer_timeout(struct channel *channel)
static void htlc_offer_timeout(struct htlc_out *out)
{
/* Unset this in case we reconnect and start again. */
channel->htlc_timeout = NULL;
struct channel *channel = out->key.channel;
out->timeout = NULL;
/* Otherwise, timer would be removed. */
assert(out->hstate == SENT_ADD_HTLC);
/* If owner died, we should already be taken care of. */
if (!channel->owner || channel->state != CHANNELD_NORMAL)
return;
log_unusual(channel->owner->log,
"Adding HTLC too slow: killing connection");
"Adding HTLC %"PRIu64" too slow: killing connection",
out->key.id);
tal_free(channel->owner);
channel_set_billboard(channel, false,
"Adding HTLC timed out: killed connection");
@ -659,12 +664,14 @@ const u8 *send_htlc_out(const tal_t *ctx,
partid, in);
tal_add_destructor(*houtp, destroy_hout_subd_died);
/* Give channel 30 seconds to commit (first) htlc. */
if (!out->htlc_timeout && !IFDEV(out->peer->ld->dev_no_htlc_timeout, 0))
out->htlc_timeout = new_reltimer(out->peer->ld->timers,
out, time_from_sec(30),
/* Give channel 30 seconds to commit this htlc. */
if (!IFDEV(out->peer->ld->dev_no_htlc_timeout, 0)) {
(*houtp)->timeout = new_reltimer(out->peer->ld->timers,
*houtp, time_from_sec(30),
htlc_offer_timeout,
out);
*houtp);
}
msg = towire_channeld_offer_htlc(out, amount, cltv, payment_hash,
onion_routing_packet, blinding);
subd_req(out->peer->ld, out->owner, take(msg), -1, 0, rcvd_htlc_reply,
@ -1627,8 +1634,8 @@ static bool update_out_htlc(struct channel *channel,
/* First transition into commitment; now it outlives peer. */
if (newstate == SENT_ADD_COMMIT) {
tal_del_destructor(hout, destroy_hout_subd_died);
hout->timeout = tal_free(hout->timeout);
tal_steal(ld, hout);
} else if (newstate == RCVD_REMOVE_ACK_REVOCATION) {
remove_htlc_out(channel, hout);
}
@ -1721,8 +1728,6 @@ void peer_sending_commitsig(struct channel *channel, const u8 *msg)
struct lightningd *ld = channel->peer->ld;
struct penalty_base *pbase;
channel->htlc_timeout = tal_free(channel->htlc_timeout);
if (!fromwire_channeld_sending_commitsig(msg, msg,
&commitnum,
&pbase,

View File

@ -3286,7 +3286,6 @@ def test_openchannel_init_alternate(node_factory, executor):
fut.result(10)
@pytest.mark.xfail(strict=True)
def test_htlc_failed_noclose(node_factory):
"""Test a bug where the htlc timeout would kick in even if the HTLC failed"""
l1, l2 = node_factory.line_graph(2)

View File

@ -1418,7 +1418,7 @@ def test_htlc_send_timeout(node_factory, bitcoind, compat):
assert not l2.daemon.is_in_log(r'{}-.*channeld.*: \[IN\] 0013'.format(l3.info['id']))
assert not l2.daemon.is_in_log(r'{}-.*channeld.*: \[OUT\] 0084'.format(l3.info['id']))
# L2 killed the channel with l3 because it was too slow.
l2.daemon.wait_for_log('{}-.*channeld-.*Adding HTLC too slow: killing connection'.format(l3.info['id']))
l2.daemon.wait_for_log('{}-.*channeld-.*Adding HTLC 0 too slow: killing connection'.format(l3.info['id']))
def test_ipv4_and_ipv6(node_factory):