From 6cc9f37cab26a01bcc93ccda2fcc048a55e869a1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 23 Mar 2022 06:56:29 +1030 Subject: [PATCH] connectd: handle connect vs closing race better. We would return success from connect even though the peer was closing; this is technically correct but fairly undesirable. Better is to pass every connect attempt to connectd, and have it block if the peer is exiting (and retry), otherwise tell us it's already connected. Signed-off-by: Rusty Russell --- connectd/connectd.c | 9 ++++++++- connectd/connectd_wire.csv | 4 ++++ lightningd/connect_control.c | 31 +++++++++++++++++++------------ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/connectd/connectd.c b/connectd/connectd.c index cb19a78fa..0ca58c4a2 100644 --- a/connectd/connectd.c +++ b/connectd/connectd.c @@ -1793,8 +1793,14 @@ static void try_connect_peer(struct daemon *daemon, /* If it's exiting now, we've raced: reconnect after */ if (tal_count(existing->subds) != 0 && existing->to_peer - && !existing->ready_to_die) + && !existing->ready_to_die) { + /* Tell it it's already connected so it doesn't + * wait forever. */ + daemon_conn_send(daemon->master, + take(towire_connectd_peer_already_connected + (NULL, id))); return; + } } /* If we're trying to connect it right now, that's OK. */ @@ -2030,6 +2036,7 @@ static struct io_plan *recv_req(struct io_conn *conn, case WIRE_CONNECTD_INIT_REPLY: case WIRE_CONNECTD_ACTIVATE_REPLY: case WIRE_CONNECTD_PEER_CONNECTED: + case WIRE_CONNECTD_PEER_ALREADY_CONNECTED: case WIRE_CONNECTD_RECONNECTED: case WIRE_CONNECTD_CONNECT_FAILED: case WIRE_CONNECTD_DEV_MEMLEAK_REPLY: diff --git a/connectd/connectd_wire.csv b/connectd/connectd_wire.csv index 577c882cd..a247252f6 100644 --- a/connectd/connectd_wire.csv +++ b/connectd/connectd_wire.csv @@ -81,6 +81,10 @@ msgdata,connectd_peer_final_msg,id,node_id, msgdata,connectd_peer_final_msg,len,u16, msgdata,connectd_peer_final_msg,msg,u8,len +# connectd->master: You said to connect, but we already were. +msgtype,connectd_peer_already_connected,2007 +msgdata,connectd_peer_already_connected,id,node_id, + # master -> connectd: do you have a memleak? msgtype,connectd_dev_memleak,2033 diff --git a/lightningd/connect_control.c b/lightningd/connect_control.c index 321f9c2a2..0c031c491 100644 --- a/lightningd/connect_control.c +++ b/lightningd/connect_control.c @@ -95,7 +95,6 @@ static struct command_result *json_connect(struct command *cmd, const char *name; struct wireaddr_internal *addr; const char *err_msg; - struct peer *peer; if (!param(cmd, buffer, params, p_req("id", param_tok, (const jsmntok_t **) &idtok), @@ -137,17 +136,6 @@ static struct command_result *json_connect(struct command *cmd, "Can't specify port without host"); } - /* If we know about peer, see if it's already connected. */ - peer = peer_by_id(cmd->ld, &id); - if (peer && peer->connected) { - log_debug(cmd->ld->log, "Already connected via %s", - type_to_string(tmpctx, struct wireaddr_internal, - &peer->addr)); - return connect_cmd_succeed(cmd, peer, - peer->connected_incoming, - &peer->addr); - } - /* Was there parseable host name? */ if (name) { /* Is there a port? */ @@ -325,6 +313,21 @@ void connect_succeeded(struct lightningd *ld, const struct peer *peer, } } +static void peer_already_connected(struct lightningd *ld, const u8 *msg) +{ + struct node_id id; + struct peer *peer; + + if (!fromwire_connectd_peer_already_connected(msg, &id)) + fatal("Bad msg %s from connectd", tal_hex(tmpctx, msg)); + + peer = peer_by_id(ld, &id); + if (peer) + connect_succeeded(ld, peer, + peer->connected_incoming, + &peer->addr); +} + static void peer_please_disconnect(struct lightningd *ld, const u8 *msg) { struct node_id id; @@ -436,6 +439,10 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd peer_connected(connectd->ld, msg, fds[0]); break; + case WIRE_CONNECTD_PEER_ALREADY_CONNECTED: + peer_already_connected(connectd->ld, msg); + break; + case WIRE_CONNECTD_CONNECT_FAILED: connect_failed(connectd->ld, msg); break;