mirror of
https://github.com/ElementsProject/lightning.git
synced 2024-11-19 09:54:16 +01:00
channeld: close an unimportant connection when fds get low.
We use a crude heuristic: if we were trying to contact them, it's a "deliberate" connection, and should be preserved. Changelog-Changed: connectd: prioritize peers with channels (and log!) if we run low on file descriptors. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
parent
6a648fd2bc
commit
d3dbcf03fa
@ -120,46 +120,6 @@ static struct connecting *find_connecting(struct daemon *daemon,
|
|||||||
return connecting_htable_get(daemon->connecting, id);
|
return connecting_htable_get(daemon->connecting, id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*~ Once we've connected out, we disable the callback which would cause us to
|
|
||||||
* to try the next address. */
|
|
||||||
static void connected_out_to_peer(struct daemon *daemon,
|
|
||||||
struct io_conn *conn,
|
|
||||||
const struct node_id *id)
|
|
||||||
{
|
|
||||||
struct connecting *connect = find_connecting(daemon, id);
|
|
||||||
|
|
||||||
/* We allocate 'conn' as a child of 'connect': we don't want to free
|
|
||||||
* it just yet though. tal_steal() it onto the permanent 'daemon'
|
|
||||||
* struct. */
|
|
||||||
tal_steal(daemon, conn);
|
|
||||||
|
|
||||||
/* We only allow one outgoing attempt at a time */
|
|
||||||
assert(connect->conn == conn);
|
|
||||||
|
|
||||||
/* Don't call destroy_io_conn, since we're done. */
|
|
||||||
io_set_finish(conn, NULL, NULL);
|
|
||||||
|
|
||||||
/* Now free the 'connecting' struct. */
|
|
||||||
tal_free(connect);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*~ Once they've connected in, stop trying to connect out (if we were). */
|
|
||||||
static void peer_connected_in(struct daemon *daemon,
|
|
||||||
struct io_conn *conn,
|
|
||||||
const struct node_id *id)
|
|
||||||
{
|
|
||||||
struct connecting *connect = find_connecting(daemon, id);
|
|
||||||
|
|
||||||
if (!connect)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Don't call destroy_io_conn, since we're done. */
|
|
||||||
io_set_finish(connect->conn, NULL, NULL);
|
|
||||||
|
|
||||||
/* Now free the 'connecting' struct since we succeeded. */
|
|
||||||
tal_free(connect);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*~ When we free a peer, we remove it from the daemon's hashtable.
|
/*~ When we free a peer, we remove it from the daemon's hashtable.
|
||||||
* We also call this manually if we want to elegantly drain peer's
|
* We also call this manually if we want to elegantly drain peer's
|
||||||
* queues. */
|
* queues. */
|
||||||
@ -192,6 +152,7 @@ static struct peer *new_peer(struct daemon *daemon,
|
|||||||
const u8 *their_features,
|
const u8 *their_features,
|
||||||
enum is_websocket is_websocket,
|
enum is_websocket is_websocket,
|
||||||
struct io_conn *conn STEALS,
|
struct io_conn *conn STEALS,
|
||||||
|
bool deliberate_connection,
|
||||||
int *fd_for_subd)
|
int *fd_for_subd)
|
||||||
{
|
{
|
||||||
struct peer *peer = tal(daemon, struct peer);
|
struct peer *peer = tal(daemon, struct peer);
|
||||||
@ -208,6 +169,7 @@ static struct peer *new_peer(struct daemon *daemon,
|
|||||||
peer->peer_outq = msg_queue_new(peer, false);
|
peer->peer_outq = msg_queue_new(peer, false);
|
||||||
peer->last_recv_time = time_now();
|
peer->last_recv_time = time_now();
|
||||||
peer->is_websocket = is_websocket;
|
peer->is_websocket = is_websocket;
|
||||||
|
peer->deliberate_connection = deliberate_connection;
|
||||||
peer->dev_writes_enabled = NULL;
|
peer->dev_writes_enabled = NULL;
|
||||||
peer->dev_read_enabled = true;
|
peer->dev_read_enabled = true;
|
||||||
|
|
||||||
@ -239,6 +201,8 @@ struct io_plan *peer_connected(struct io_conn *conn,
|
|||||||
size_t depender, missing;
|
size_t depender, missing;
|
||||||
int subd_fd;
|
int subd_fd;
|
||||||
bool option_gossip_queries;
|
bool option_gossip_queries;
|
||||||
|
struct connecting *connect;
|
||||||
|
bool deliberate_connection;
|
||||||
|
|
||||||
/* We remove any previous connection immediately, on the assumption it's dead */
|
/* We remove any previous connection immediately, on the assumption it's dead */
|
||||||
peer = peer_htable_get(daemon->peers, id);
|
peer = peer_htable_get(daemon->peers, id);
|
||||||
@ -278,20 +242,33 @@ struct io_plan *peer_connected(struct io_conn *conn,
|
|||||||
return io_write_wire(conn, take(msg), io_close_cb, NULL);
|
return io_write_wire(conn, take(msg), io_close_cb, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We've successfully connected. */
|
/* We've successfully connected! */
|
||||||
if (incoming)
|
|
||||||
peer_connected_in(daemon, conn, id);
|
|
||||||
else
|
|
||||||
connected_out_to_peer(daemon, conn, id);
|
|
||||||
|
|
||||||
if (find_connecting(daemon, id))
|
/* Were we trying to connect deliberately? (Always true for outbound connections!) */
|
||||||
status_failed(STATUS_FAIL_INTERNAL_ERROR,
|
connect = find_connecting(daemon, id);
|
||||||
"After %s connection on %p, still trying to connect conn %p?",
|
if (!incoming) {
|
||||||
incoming ? "incoming" : "outgoing",
|
/* We allocated 'conn' as a child of 'connect': we don't want
|
||||||
conn, find_connecting(daemon, id)->conn);
|
* to free it just yet though. tal_steal() it onto the
|
||||||
|
* permanent 'daemon' struct. */
|
||||||
|
tal_steal(daemon, conn);
|
||||||
|
|
||||||
|
/* We only allow one outgoing attempt at a time */
|
||||||
|
assert(connect->conn == conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (connect) {
|
||||||
|
deliberate_connection = true;
|
||||||
|
/*~ Now we've connected, disable the callback which would
|
||||||
|
* cause us to to try the next address on failure. */
|
||||||
|
io_set_finish(connect->conn, NULL, NULL);
|
||||||
|
tal_free(connect);
|
||||||
|
} else {
|
||||||
|
deliberate_connection = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* This contains the per-peer state info; gossipd fills in pps->gs */
|
/* This contains the per-peer state info; gossipd fills in pps->gs */
|
||||||
peer = new_peer(daemon, id, cs, their_features, is_websocket, conn, &subd_fd);
|
peer = new_peer(daemon, id, cs, their_features, is_websocket, conn,
|
||||||
|
deliberate_connection, &subd_fd);
|
||||||
/* Only takes over conn if it succeeds. */
|
/* Only takes over conn if it succeeds. */
|
||||||
if (!peer)
|
if (!peer)
|
||||||
return io_close(conn);
|
return io_close(conn);
|
||||||
@ -415,6 +392,51 @@ static struct io_plan *conn_in(struct io_conn *conn,
|
|||||||
handshake_in_success, daemon);
|
handshake_in_success, daemon);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* How much is peer worth (when considering disconnet)? */
|
||||||
|
static size_t peer_score(const struct peer *peer)
|
||||||
|
{
|
||||||
|
#define PEER_SCORE_MAX 2
|
||||||
|
|
||||||
|
/* We deliberately connected to it? Highest prio */
|
||||||
|
if (peer->deliberate_connection)
|
||||||
|
return 2;
|
||||||
|
/* It has subds now? Higher prio */
|
||||||
|
if (tal_count(peer->subds))
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*~ When file descriptors are exhausted, we might be better to try to
|
||||||
|
* free an existing connection, rather than ignoring new ones. */
|
||||||
|
void close_random_connection(struct daemon *daemon)
|
||||||
|
{
|
||||||
|
struct peer *peer, *best_peer = NULL;
|
||||||
|
size_t best_peer_score = PEER_SCORE_MAX + 1;
|
||||||
|
struct peer_htable_iter it;
|
||||||
|
|
||||||
|
/* Prefer ones with no subds (just chatting), or failing that,
|
||||||
|
* ones we didn't deliberately connect to. */
|
||||||
|
peer = peer_htable_pick(daemon->peers, pseudorand_u64(), &it);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < peer_htable_count(daemon->peers); i++) {
|
||||||
|
size_t score = peer_score(peer);
|
||||||
|
if (score < best_peer_score) {
|
||||||
|
best_peer = peer;
|
||||||
|
best_peer_score = score;
|
||||||
|
/* Don't continue if we can't improve! */
|
||||||
|
if (best_peer_score == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
peer = peer_htable_next(daemon->peers, &it);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (best_peer) {
|
||||||
|
status_debug("due to stress, randomly closing peer %s (score %zu)",
|
||||||
|
fmt_node_id(tmpctx, &best_peer->id), best_peer_score);
|
||||||
|
io_close(best_peer->to_peer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*~ When we get a direct connection in we set up its network address
|
/*~ When we get a direct connection in we set up its network address
|
||||||
* then call handshake.c to set up the crypto state. */
|
* then call handshake.c to set up the crypto state. */
|
||||||
static struct io_plan *connection_in(struct io_conn *conn,
|
static struct io_plan *connection_in(struct io_conn *conn,
|
||||||
@ -430,6 +452,8 @@ static struct io_plan *connection_in(struct io_conn *conn,
|
|||||||
strerror(errno));
|
strerror(errno));
|
||||||
accept_logged = true;
|
accept_logged = true;
|
||||||
}
|
}
|
||||||
|
/* Maybe free up some fds by closing something. */
|
||||||
|
close_random_connection(daemon);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1610,10 +1634,15 @@ static void try_connect_peer(struct daemon *daemon,
|
|||||||
struct wireaddr_internal *addrs;
|
struct wireaddr_internal *addrs;
|
||||||
bool use_proxy = daemon->always_use_proxy;
|
bool use_proxy = daemon->always_use_proxy;
|
||||||
struct connecting *connect;
|
struct connecting *connect;
|
||||||
|
struct peer *peer;
|
||||||
|
|
||||||
/* Already existing? Must have crossed over, it'll know soon. */
|
/* Already existing? Must have crossed over, it'll know soon. */
|
||||||
if (peer_htable_get(daemon->peers, id))
|
peer = peer_htable_get(daemon->peers, id);
|
||||||
|
if (peer) {
|
||||||
|
/* Note now that we explicitly tried to connect */
|
||||||
|
peer->deliberate_connection = true;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* If we're trying to connect it right now, that's OK. */
|
/* If we're trying to connect it right now, that's OK. */
|
||||||
if ((connect = find_connecting(daemon, id))) {
|
if ((connect = find_connecting(daemon, id))) {
|
||||||
|
@ -90,6 +90,9 @@ struct peer {
|
|||||||
/* Last time we received traffic */
|
/* Last time we received traffic */
|
||||||
struct timeabs last_recv_time;
|
struct timeabs last_recv_time;
|
||||||
|
|
||||||
|
/* Were we explicitly told to connect to this peer? */
|
||||||
|
bool deliberate_connection;
|
||||||
|
|
||||||
bool dev_read_enabled;
|
bool dev_read_enabled;
|
||||||
/* If non-NULL, this counts down; 0 means disable */
|
/* If non-NULL, this counts down; 0 means disable */
|
||||||
u32 *dev_writes_enabled;
|
u32 *dev_writes_enabled;
|
||||||
@ -270,4 +273,6 @@ struct io_plan *peer_connected(struct io_conn *conn,
|
|||||||
/* Removes peer from hash table, tells gossipd and lightningd. */
|
/* Removes peer from hash table, tells gossipd and lightningd. */
|
||||||
void destroy_peer(struct peer *peer);
|
void destroy_peer(struct peer *peer);
|
||||||
|
|
||||||
|
/* Remove a random connection, when under stress. */
|
||||||
|
void close_random_connection(struct daemon *daemon);
|
||||||
#endif /* LIGHTNING_CONNECTD_CONNECTD_H */
|
#endif /* LIGHTNING_CONNECTD_CONNECTD_H */
|
||||||
|
@ -1288,6 +1288,8 @@ void peer_connect_subd(struct daemon *daemon, const u8 *msg, int fd)
|
|||||||
strerror(errno));
|
strerror(errno));
|
||||||
recvfd_logged = true;
|
recvfd_logged = true;
|
||||||
}
|
}
|
||||||
|
/* Maybe free up some fds by closing something. */
|
||||||
|
close_random_connection(daemon);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user