pytest: test dropping transient connections.

Requires a hack to exhaust connectd fds and make us close a transient.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2024-05-14 14:01:44 +09:30
parent 8268df9a4b
commit a9b7402910
5 changed files with 79 additions and 0 deletions

View file

@ -436,6 +436,10 @@ void close_random_connection(struct daemon *daemon)
if (!c->transient)
continue;
/* This could be the one caller is trying right now */
if (!c->conn)
continue;
status_debug("due to stress, closing transient connect attempt to %s",
fmt_node_id(tmpctx, &c->id));
/* This tells destructor why it was closed */
@ -1979,6 +1983,12 @@ static char *fd_mode_str(int fd)
static void dev_report_fds(struct daemon *daemon, const u8 *msg)
{
bool found_chr_fd = false;
/* Not only would this get upset with all the /dev/null,
* our symbol code fails if it can't open files */
if (daemon->dev_exhausted_fds)
return;
for (int fd = 3; fd < 4096; fd++) {
bool listener;
const struct io_conn *c;
@ -2035,6 +2045,19 @@ static void dev_report_fds(struct daemon *daemon, const u8 *msg)
}
}
static void dev_exhaust_fds(struct daemon *daemon, const u8 *msg)
{
int fd;
while ((fd = open("/dev/null", O_RDONLY)) >= 0);
if (errno != EMFILE)
status_failed(STATUS_FAIL_INTERNAL_ERROR, "dev_exhaust_fds got %s",
strerror(errno));
status_unusual("dev_exhaust_fds: expect failures");
daemon->dev_exhausted_fds = true;
}
static struct io_plan *recv_peer_connect_subd(struct io_conn *conn,
const u8 *msg,
int fd,
@ -2116,6 +2139,12 @@ static struct io_plan *recv_req(struct io_conn *conn,
goto out;
}
/* Fall thru */
case WIRE_CONNECTD_DEV_EXHAUST_FDS:
if (daemon->developer) {
dev_exhaust_fds(daemon, msg);
goto out;
}
/* Fall thru */
/* We send these, we don't receive them */
case WIRE_CONNECTD_INIT_REPLY:
case WIRE_CONNECTD_ACTIVATE_REPLY:
@ -2210,6 +2239,7 @@ int main(int argc, char *argv[])
daemon->shutting_down = false;
daemon->dev_suppress_gossip = false;
daemon->custom_msgs = NULL;
daemon->dev_exhausted_fds = false;
/* stdin == control */
daemon->master = daemon_conn_new(daemon, STDIN_FILENO, recv_req, NULL,

View file

@ -264,6 +264,8 @@ struct daemon {
bool dev_suppress_gossip;
/* dev_disconnect file */
int dev_disconnect_fd;
/* Did we exhaust fds? If so, skip dev_report_fds */
bool dev_exhausted_fds;
};
/* Called by io_tor_connect once it has a connection out. */

View file

@ -163,3 +163,5 @@ msgtype,connectd_start_shutdown_reply,2131
# master -> connect: stop sending gossip.
msgtype,connectd_dev_suppress_gossip,2032
# master -> connect: waste all your fds.
msgtype,connectd_dev_exhaust_fds,2036

1 #include <bitcoin/block.h>
163
164
165
166
167

View file

@ -611,6 +611,7 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd
case WIRE_CONNECTD_CUSTOMMSG_OUT:
case WIRE_CONNECTD_START_SHUTDOWN:
case WIRE_CONNECTD_SET_CUSTOMMSGS:
case WIRE_CONNECTD_DEV_EXHAUST_FDS:
/* This is a reply, so never gets through to here. */
case WIRE_CONNECTD_INIT_REPLY:
case WIRE_CONNECTD_ACTIVATE_REPLY:
@ -932,3 +933,26 @@ static const struct json_command dev_report_fds = {
.dev_only = true,
};
AUTODATA(json_command, &dev_report_fds);
static struct command_result *json_dev_connectd_exhaust_fds(struct command *cmd,
const char *buffer,
const jsmntok_t *obj UNNEEDED,
const jsmntok_t *params)
{
if (!param(cmd, buffer, params, NULL))
return command_param_failed();
subd_send_msg(cmd->ld->connectd,
take(towire_connectd_dev_exhaust_fds(NULL)));
return command_success(cmd, json_stream_success(cmd));
}
static const struct json_command dev_connectd_exhaust_fds = {
"dev-connectd-exhaust-fds",
"developer",
json_dev_connectd_exhaust_fds,
"Make connectd run out of file descriptors",
.dev_only = true,
};
AUTODATA(json_command, &dev_connectd_exhaust_fds);

View file

@ -4596,3 +4596,24 @@ def test_wss_proxy(node_factory):
msg = lconn.read_message()
if int.from_bytes(msg[0:2], 'big') == 19:
break
def test_connect_transient(node_factory):
l1, l2, l3, l4 = node_factory.get_nodes(4, opts={'may_reconnect': True})
# This is not transient, because they have a channel
node_factory.join_nodes([l1, l2])
# Make sure it reconnects once it has a channel.
l1.rpc.disconnect(l2.info['id'], force=True)
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
# This has no channel, and thus is a transient.
l1.rpc.connect(l3.info['id'], 'localhost', l3.port)
l1.rpc.dev_connectd_exhaust_fds()
# Connecting to l4 will discard connection to l3!
l1.rpc.connect(l4.info['id'], 'localhost', l4.port)
assert l1.rpc.listpeers(l3.info['id'])['peers'] == []
assert l1.daemon.is_in_log(fr"due to stress, randomly closing peer {l3.info['id']} \(score 0\)")