From 59e75f1b2c09f791757a3044138799608f6453dc Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 21 May 2019 16:43:26 +0930 Subject: [PATCH] gossipd: reply to large listchannels in parts. This has two effects: most importantly, it avoids the problem where lightningd creates a 800MB JSON blob in response to listchannels, which causes OOM on the Raspberry Pi (our previous max allocation was 832MB). This is because lightning-cli can start draining the JSON while we're filling the buffer, so we end up with a max allocation of 68MB. But despite being less efficient (multiple queries to gossipd), it actually speeds things up due to the parallelism: MCP with -O3 -flto before vs after: -listchannels_sec:8.980000-9.330000(9.206+/-0.14) +listchannels_sec:7.500000-7.830000(7.656+/-0.11) Signed-off-by: Rusty Russell --- gossipd/gossip_wire.csv | 2 ++ gossipd/gossipd.c | 21 +++++++---- lightningd/gossip_control.c | 69 ++++++++++++++++++++++++++----------- 3 files changed, 65 insertions(+), 27 deletions(-) diff --git a/gossipd/gossip_wire.csv b/gossipd/gossip_wire.csv index f636dfab6..169dfdde3 100644 --- a/gossipd/gossip_wire.csv +++ b/gossipd/gossip_wire.csv @@ -45,8 +45,10 @@ gossip_getroute_reply,,hops,num_hops*struct route_hop gossip_getchannels_request,3007 gossip_getchannels_request,,short_channel_id,?struct short_channel_id gossip_getchannels_request,,source,?struct node_id +gossip_getchannels_request,,prev,?struct short_channel_id gossip_getchannels_reply,3107 +gossip_getchannels_reply,,complete,bool gossip_getchannels_reply,,num_channels,u32 gossip_getchannels_reply,,nodes,num_channels*struct gossip_getchannels_entry diff --git a/gossipd/gossipd.c b/gossipd/gossipd.c index b7e962ef1..9461be5ca 100644 --- a/gossipd/gossipd.c +++ b/gossipd/gossipd.c @@ -2133,11 +2133,13 @@ static struct io_plan *getchannels_req(struct io_conn *conn, u8 *out; const struct gossip_getchannels_entry **entries; struct chan *chan; - struct short_channel_id *scid; + struct short_channel_id *scid, *prev; struct node_id *source; + bool complete = true; /* Note: scid is marked optional in gossip_wire.csv */ - if (!fromwire_gossip_getchannels_request(msg, msg, &scid, &source)) + if (!fromwire_gossip_getchannels_request(msg, msg, &scid, &source, + &prev)) master_badmsg(WIRE_GOSSIP_GETCHANNELS_REQUEST, msg); entries = tal_arr(tmpctx, const struct gossip_getchannels_entry *, 0); @@ -2161,15 +2163,20 @@ static struct io_plan *getchannels_req(struct io_conn *conn, u64 idx; /* For the more general case, we just iterate through every - * short channel id. */ - for (chan = uintmap_first(&daemon->rstate->chanmap, &idx); - chan; - chan = uintmap_after(&daemon->rstate->chanmap, &idx)) { + * short channel id, starting with previous if any (there is + * no scid 0). */ + idx = prev ? prev->u64 : 0; + while ((chan = uintmap_after(&daemon->rstate->chanmap, &idx))) { append_channel(daemon->rstate, &entries, chan, NULL); + /* Limit how many we do at once. */ + if (tal_count(entries) == 4096) { + complete = false; + break; + } } } - out = towire_gossip_getchannels_reply(NULL, entries); + out = towire_gossip_getchannels_reply(NULL, complete, entries); daemon_conn_send(daemon->master, take(out)); return daemon_conn_read_next(conn, daemon->master); } diff --git a/lightningd/gossip_control.c b/lightningd/gossip_control.c index 665db4438..48eee1f85 100644 --- a/lightningd/gossip_control.c +++ b/lightningd/gossip_control.c @@ -390,30 +390,51 @@ static void json_add_halfchan(struct json_stream *response, json_object_end(response); } +struct listchannels_info { + struct command *cmd; + struct json_stream *response; + struct short_channel_id *id; + struct node_id *source; +}; + /* Called upon receiving a getchannels_reply from `gossipd` */ static void json_listchannels_reply(struct subd *gossip UNUSED, const u8 *reply, - const int *fds UNUSED, struct command *cmd) + const int *fds UNUSED, + struct listchannels_info *linfo) { size_t i; struct gossip_getchannels_entry **entries; - struct json_stream *response; + bool complete; - if (!fromwire_gossip_getchannels_reply(reply, reply, &entries)) { - was_pending(command_fail(cmd, LIGHTNINGD, - "Invalid reply from gossipd")); + if (!fromwire_gossip_getchannels_reply(reply, reply, + &complete, &entries)) { + /* Shouldn't happen: just end json stream. */ + log_broken(linfo->cmd->ld->log, "Invalid reply from gossipd"); + was_pending(command_raw_complete(linfo->cmd, linfo->response)); return; } - response = json_stream_success(cmd); - json_object_start(response, NULL); - json_array_start(response, "channels"); for (i = 0; i < tal_count(entries); i++) { - json_add_halfchan(response, entries[i], 0); - json_add_halfchan(response, entries[i], 1); + json_add_halfchan(linfo->response, entries[i], 0); + json_add_halfchan(linfo->response, entries[i], 1); + } + + /* More coming? Ask from this point on.. */ + if (!complete) { + u8 *req; + assert(tal_count(entries) != 0); + req = towire_gossip_getchannels_request(linfo->cmd, + linfo->id, + linfo->source, + &entries[i-1] + ->short_channel_id); + subd_req(linfo->cmd->ld->gossip, linfo->cmd->ld->gossip, + req, -1, 0, json_listchannels_reply, linfo); + } else { + json_array_end(linfo->response); + json_object_end(linfo->response); + was_pending(command_success(linfo->cmd, linfo->response)); } - json_array_end(response); - json_object_end(response); - was_pending(command_success(cmd, response)); } static struct command_result *json_listchannels(struct command *cmd, @@ -422,21 +443,29 @@ static struct command_result *json_listchannels(struct command *cmd, const jsmntok_t *params) { u8 *req; - struct short_channel_id *id; - struct node_id *source; + struct listchannels_info *linfo = tal(cmd, struct listchannels_info); + linfo->cmd = cmd; if (!param(cmd, buffer, params, - p_opt("short_channel_id", param_short_channel_id, &id), - p_opt("source", param_node_id, &source), + p_opt("short_channel_id", param_short_channel_id, &linfo->id), + p_opt("source", param_node_id, &linfo->source), NULL)) return command_param_failed(); - if (id && source) + if (linfo->id && linfo->source) return command_fail(cmd, JSONRPC2_INVALID_PARAMS, "Cannot specify both source and short_channel_id"); - req = towire_gossip_getchannels_request(cmd, id, source); + + /* Start JSON response, then we stream. */ + linfo->response = json_stream_success(cmd); + json_object_start(linfo->response, NULL); + json_array_start(linfo->response, "channels"); + + req = towire_gossip_getchannels_request(cmd, linfo->id, linfo->source, + NULL); subd_req(cmd->ld->gossip, cmd->ld->gossip, - req, -1, 0, json_listchannels_reply, cmd); + req, -1, 0, json_listchannels_reply, linfo); + return command_still_pending(cmd); }