connectd: handle connect vs closing race better.

We would return success from connect even though the peer was closing;
this is technically correct but fairly undesirable.  Better is to pass
every connect attempt to connectd, and have it block if the peer is
exiting (and retry), otherwise tell us it's already connected.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2022-03-23 06:56:29 +10:30
parent 10e36e073c
commit 6cc9f37cab
3 changed files with 31 additions and 13 deletions

View file

@ -1793,8 +1793,14 @@ static void try_connect_peer(struct daemon *daemon,
/* If it's exiting now, we've raced: reconnect after */
if (tal_count(existing->subds) != 0
&& existing->to_peer
&& !existing->ready_to_die)
&& !existing->ready_to_die) {
/* Tell it it's already connected so it doesn't
* wait forever. */
daemon_conn_send(daemon->master,
take(towire_connectd_peer_already_connected
(NULL, id)));
return;
}
}
/* If we're trying to connect it right now, that's OK. */
@ -2030,6 +2036,7 @@ static struct io_plan *recv_req(struct io_conn *conn,
case WIRE_CONNECTD_INIT_REPLY:
case WIRE_CONNECTD_ACTIVATE_REPLY:
case WIRE_CONNECTD_PEER_CONNECTED:
case WIRE_CONNECTD_PEER_ALREADY_CONNECTED:
case WIRE_CONNECTD_RECONNECTED:
case WIRE_CONNECTD_CONNECT_FAILED:
case WIRE_CONNECTD_DEV_MEMLEAK_REPLY:

View file

@ -81,6 +81,10 @@ msgdata,connectd_peer_final_msg,id,node_id,
msgdata,connectd_peer_final_msg,len,u16,
msgdata,connectd_peer_final_msg,msg,u8,len
# connectd->master: You said to connect, but we already were.
msgtype,connectd_peer_already_connected,2007
msgdata,connectd_peer_already_connected,id,node_id,
# master -> connectd: do you have a memleak?
msgtype,connectd_dev_memleak,2033

1 #include <bitcoin/block.h>
81 msgdata,connectd_ping,len,u16, msgtype,connectd_ping,2030
82 msgtype,connectd_ping_reply,2130 msgdata,connectd_ping,id,node_id,
83 # False if we there was already a ping in progress. msgdata,connectd_ping,num_pong_bytes,u16,
84 msgdata,connectd_ping,len,u16,
85 msgtype,connectd_ping_reply,2130
86 # False if we there was already a ping in progress.
87 msgdata,connectd_ping_reply,sent,bool,
88 msgdata,connectd_ping_reply,sent,bool, # 0 == no pong expected, otherwise length of pong.
89 # 0 == no pong expected, otherwise length of pong. msgdata,connectd_ping_reply,totlen,u16,
90 msgdata,connectd_ping_reply,totlen,u16, # We tell lightningd we got an onionmsg

View file

@ -95,7 +95,6 @@ static struct command_result *json_connect(struct command *cmd,
const char *name;
struct wireaddr_internal *addr;
const char *err_msg;
struct peer *peer;
if (!param(cmd, buffer, params,
p_req("id", param_tok, (const jsmntok_t **) &idtok),
@ -137,17 +136,6 @@ static struct command_result *json_connect(struct command *cmd,
"Can't specify port without host");
}
/* If we know about peer, see if it's already connected. */
peer = peer_by_id(cmd->ld, &id);
if (peer && peer->connected) {
log_debug(cmd->ld->log, "Already connected via %s",
type_to_string(tmpctx, struct wireaddr_internal,
&peer->addr));
return connect_cmd_succeed(cmd, peer,
peer->connected_incoming,
&peer->addr);
}
/* Was there parseable host name? */
if (name) {
/* Is there a port? */
@ -325,6 +313,21 @@ void connect_succeeded(struct lightningd *ld, const struct peer *peer,
}
}
static void peer_already_connected(struct lightningd *ld, const u8 *msg)
{
struct node_id id;
struct peer *peer;
if (!fromwire_connectd_peer_already_connected(msg, &id))
fatal("Bad msg %s from connectd", tal_hex(tmpctx, msg));
peer = peer_by_id(ld, &id);
if (peer)
connect_succeeded(ld, peer,
peer->connected_incoming,
&peer->addr);
}
static void peer_please_disconnect(struct lightningd *ld, const u8 *msg)
{
struct node_id id;
@ -436,6 +439,10 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd
peer_connected(connectd->ld, msg, fds[0]);
break;
case WIRE_CONNECTD_PEER_ALREADY_CONNECTED:
peer_already_connected(connectd->ld, msg);
break;
case WIRE_CONNECTD_CONNECT_FAILED:
connect_failed(connectd->ld, msg);
break;