mirror of
https://github.com/ElementsProject/lightning.git
synced 2025-03-03 18:57:06 +01:00
gossipd: don't send a peer to master with half-written or half-read packet.
In this case, it was a gossip message half-sent, when we asked the peer to be released. Fix the problem in general by making send_peer_with_fds() wait until after the next packet. test_routing_gossip/lightning-4/log: b'lightning_openingd(8738): TRACE: First per_commit_point = 02e2ff759ed70c71f154695eade1983664a72546ebc552861f844bff5ea5b933bf' b'lightning_openingd(8738): TRACE: Failed hdr decrypt with rn=11' b'lightning_openingd(8738): STATUS_FAIL_PEER_IO: Reading accept_channel: Success' test_routing_gossip/lightning-5/log: b'lightning_gossipd(8461): UPDATE WIRE_GOSSIP_PEER_NONGOSSIP' b'lightning_gossipd(8461): UPDATE WIRE_GOSSIP_PEER_NONGOSSIP' b'lightningd(8308): Failed to get netaddr for outgoing: Transport endpoint is not connected' The problem occurs here on release, but could be on any place where we hand a peer over when using ccan/io. Note the other case (channel.c). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
parent
4e14185961
commit
954a3990fa
2 changed files with 86 additions and 25 deletions
|
@ -2107,6 +2107,11 @@ static void gossip_gone(struct io_conn *unused, struct daemon_conn *dc)
|
|||
"Gossip connection closed");
|
||||
}
|
||||
|
||||
/* FIXME: This doesn't cover partly read packets! We could be halfway
|
||||
* through receiving a gossip msg, for example. We'll simply reconnect
|
||||
* in this case, but the real fix is to wean off ccan/io here, as it doesn't
|
||||
* buy us anything: a poll for read on gossipfd, masterfd and peerfd then acting
|
||||
* synchronous would be a simpler model. */
|
||||
static void send_shutdown_complete(struct peer *peer)
|
||||
{
|
||||
const u8 *msg;
|
||||
|
|
106
gossipd/gossip.c
106
gossipd/gossip.c
|
@ -126,6 +126,9 @@ struct peer {
|
|||
|
||||
/* If we die, should we reach again? */
|
||||
bool reach_again;
|
||||
|
||||
/* Waiting to send_peer_with_fds to master? */
|
||||
const u8 *send_to_master;
|
||||
};
|
||||
|
||||
struct addrhint {
|
||||
|
@ -140,7 +143,7 @@ struct addrhint {
|
|||
/* FIXME: Reorder */
|
||||
static struct io_plan *peer_start_gossip(struct io_conn *conn,
|
||||
struct peer *peer);
|
||||
static void send_peer_with_fds(struct peer *peer, const u8 *msg);
|
||||
static bool send_peer_with_fds(struct peer *peer, const u8 *msg);
|
||||
static void wake_pkt_out(struct peer *peer);
|
||||
static void try_reach_peer(struct daemon *daemon, const struct pubkey *id);
|
||||
|
||||
|
@ -191,6 +194,7 @@ static struct peer *new_peer(const tal_t *ctx,
|
|||
peer->daemon = daemon;
|
||||
peer->local = true;
|
||||
peer->reach_again = false;
|
||||
peer->send_to_master = NULL;
|
||||
peer->num_pings_outstanding = 0;
|
||||
peer->broadcast_index = 0;
|
||||
msg_queue_init(&peer->peer_out, peer);
|
||||
|
@ -295,9 +299,11 @@ static struct io_plan *peer_init_received(struct io_conn *conn,
|
|||
* gossipfd closed (forget_peer) or reconnect. */
|
||||
peer_finalized(peer);
|
||||
|
||||
/* We will not have anything queued, since we're not duplex. */
|
||||
msg = towire_gossip_peer_connected(peer, &peer->id, &peer->pcs.cs,
|
||||
peer->gfeatures, peer->lfeatures);
|
||||
send_peer_with_fds(peer, msg);
|
||||
if (!send_peer_with_fds(peer, msg))
|
||||
return io_close(conn);
|
||||
|
||||
/* Start the gossip flowing. */
|
||||
/* FIXME: This is a bit wasteful in the common case where master
|
||||
|
@ -398,10 +404,49 @@ static void handle_pong(struct peer *peer, const u8 *pong)
|
|||
tal_len(pong))));
|
||||
}
|
||||
|
||||
/* If master asks us to release peer, we attach this destructor in case it
|
||||
* dies while we're waiting for it to finish IO */
|
||||
static void fail_release(struct peer *peer)
|
||||
{
|
||||
u8 *msg = towire_gossipctl_release_peer_replyfail(peer);
|
||||
daemon_conn_send(&peer->daemon->master, take(msg));
|
||||
}
|
||||
|
||||
static struct io_plan *wait_until_ready_for_master(struct io_conn *conn,
|
||||
struct peer *peer)
|
||||
{
|
||||
/* One of these is always true, since we've just finished read/write */
|
||||
if (!peer_in_started(conn, &peer->pcs)
|
||||
&& !peer_out_started(conn, &peer->pcs)) {
|
||||
if (send_peer_with_fds(peer, take(peer->send_to_master))) {
|
||||
/* In case we set this earlier. */
|
||||
tal_del_destructor(peer, fail_release);
|
||||
peer->send_to_master = NULL;
|
||||
return io_close_taken_fd(conn);
|
||||
} else
|
||||
return io_close(conn);
|
||||
}
|
||||
|
||||
/* Don't do any more I/O. */
|
||||
return io_wait(conn, peer, wait_until_ready_for_master, peer);
|
||||
}
|
||||
|
||||
static struct io_plan *peer_msgin(struct io_conn *conn,
|
||||
struct peer *peer, u8 *msg);
|
||||
|
||||
/* Wrapper around peer_read_message: don't read another if we want to
|
||||
* pass up to master */
|
||||
static struct io_plan *peer_next_in(struct io_conn *conn, struct peer *peer)
|
||||
{
|
||||
if (peer->send_to_master)
|
||||
return wait_until_ready_for_master(conn, peer);
|
||||
|
||||
return peer_read_message(conn, &peer->pcs, peer_msgin);
|
||||
}
|
||||
|
||||
static struct io_plan *peer_msgin(struct io_conn *conn,
|
||||
struct peer *peer, u8 *msg)
|
||||
{
|
||||
u8 *s;
|
||||
enum wire_type t = fromwire_peektype(msg);
|
||||
|
||||
switch (t) {
|
||||
|
@ -415,15 +460,15 @@ static struct io_plan *peer_msgin(struct io_conn *conn,
|
|||
case WIRE_NODE_ANNOUNCEMENT:
|
||||
case WIRE_CHANNEL_UPDATE:
|
||||
handle_gossip_msg(peer->daemon->rstate, msg);
|
||||
return peer_read_message(conn, &peer->pcs, peer_msgin);
|
||||
return peer_next_in(conn, peer);
|
||||
|
||||
case WIRE_PING:
|
||||
handle_ping(peer, msg);
|
||||
return peer_read_message(conn, &peer->pcs, peer_msgin);
|
||||
return peer_next_in(conn, peer);
|
||||
|
||||
case WIRE_PONG:
|
||||
handle_pong(peer, msg);
|
||||
return peer_read_message(conn, &peer->pcs, peer_msgin);
|
||||
return peer_next_in(conn, peer);
|
||||
|
||||
case WIRE_OPEN_CHANNEL:
|
||||
case WIRE_CHANNEL_REESTABLISH:
|
||||
|
@ -443,13 +488,15 @@ static struct io_plan *peer_msgin(struct io_conn *conn,
|
|||
case WIRE_REVOKE_AND_ACK:
|
||||
case WIRE_INIT:
|
||||
/* Not our place to handle this, so we punt */
|
||||
s = towire_gossip_peer_nongossip(msg, &peer->id,
|
||||
&peer->pcs.cs,
|
||||
peer->gfeatures,
|
||||
peer->lfeatures,
|
||||
msg);
|
||||
send_peer_with_fds(peer, take(s));
|
||||
return io_close_taken_fd(conn);
|
||||
peer->send_to_master
|
||||
= towire_gossip_peer_nongossip(peer, &peer->id,
|
||||
&peer->pcs.cs,
|
||||
peer->gfeatures,
|
||||
peer->lfeatures,
|
||||
msg);
|
||||
|
||||
/* This will wait. */
|
||||
return peer_next_in(conn, peer);
|
||||
}
|
||||
|
||||
/* BOLT #1:
|
||||
|
@ -463,7 +510,7 @@ static struct io_plan *peer_msgin(struct io_conn *conn,
|
|||
} else
|
||||
peer_error(peer, "Unknown packet %u", t);
|
||||
|
||||
return peer_read_message(conn, &peer->pcs, peer_msgin);
|
||||
return peer_next_in(conn, peer);
|
||||
}
|
||||
|
||||
/* Wake up the outgoing direction of the connection and write any
|
||||
|
@ -495,6 +542,10 @@ static struct io_plan *peer_pkt_out(struct io_conn *conn, struct peer *peer)
|
|||
peer_pkt_out);
|
||||
}
|
||||
|
||||
/* Do we want to send this peer to the master daemon? */
|
||||
if (peer->send_to_master)
|
||||
return wait_until_ready_for_master(conn, peer);
|
||||
|
||||
/* If we're supposed to be sending gossip, do so now. */
|
||||
if (peer->gossip_sync) {
|
||||
struct queued_message *next;
|
||||
|
@ -518,7 +569,7 @@ static struct io_plan *peer_start_gossip(struct io_conn *conn, struct peer *peer
|
|||
{
|
||||
wake_pkt_out(peer);
|
||||
return io_duplex(conn,
|
||||
peer_read_message(conn, &peer->pcs, peer_msgin),
|
||||
peer_next_in(conn, peer),
|
||||
peer_pkt_out(conn, peer));
|
||||
}
|
||||
|
||||
|
@ -554,7 +605,7 @@ static void forget_peer(struct io_conn *conn, struct daemon_conn *dc)
|
|||
|
||||
/* When a peer is to be owned by another daemon, we create a socket
|
||||
* pair to send/receive gossip from it */
|
||||
static void send_peer_with_fds(struct peer *peer, const u8 *msg)
|
||||
static bool send_peer_with_fds(struct peer *peer, const u8 *msg)
|
||||
{
|
||||
int fds[2];
|
||||
|
||||
|
@ -564,7 +615,7 @@ static void send_peer_with_fds(struct peer *peer, const u8 *msg)
|
|||
|
||||
/* FIXME: Send error to peer? */
|
||||
/* Peer will be freed when caller closes conn. */
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Now we talk to socket to get to peer's owner daemon. */
|
||||
|
@ -583,6 +634,7 @@ static void send_peer_with_fds(struct peer *peer, const u8 *msg)
|
|||
|
||||
/* Don't get confused: we can't use this any more. */
|
||||
peer->fd = -1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -678,7 +730,7 @@ static struct io_plan *handle_peer(struct io_conn *conn, struct daemon *daemon,
|
|||
|
||||
static struct io_plan *release_peer(struct io_conn *conn, struct daemon *daemon,
|
||||
const u8 *msg)
|
||||
{
|
||||
{
|
||||
struct pubkey id;
|
||||
struct peer *peer;
|
||||
|
||||
|
@ -686,20 +738,24 @@ static struct io_plan *release_peer(struct io_conn *conn, struct daemon *daemon,
|
|||
master_badmsg(WIRE_GOSSIPCTL_RELEASE_PEER, msg);
|
||||
|
||||
peer = find_peer(daemon, &id);
|
||||
if (!peer || !peer->local) {
|
||||
status_trace("release_peer: peer %s not %s",
|
||||
type_to_string(trc, struct pubkey, &id),
|
||||
peer ? "local" : "found");
|
||||
if (!peer || !peer->local || peer->send_to_master) {
|
||||
/* This can happen with dying peers, or reconnect */
|
||||
status_trace("release_peer: peer %s %s",
|
||||
type_to_string(trc, struct pubkey, &id),
|
||||
!peer ? "not found"
|
||||
: !peer->send_to_master ? "already releasing"
|
||||
: "not local");
|
||||
msg = towire_gossipctl_release_peer_replyfail(msg);
|
||||
daemon_conn_send(&daemon->master, take(msg));
|
||||
} else {
|
||||
msg = towire_gossipctl_release_peer_reply(msg,
|
||||
msg = towire_gossipctl_release_peer_reply(peer,
|
||||
&peer->pcs.cs,
|
||||
peer->gfeatures,
|
||||
peer->lfeatures);
|
||||
send_peer_with_fds(peer, take(msg));
|
||||
io_close_taken_fd(peer->conn);
|
||||
peer->send_to_master = msg;
|
||||
|
||||
/* Wake output, in case it's idle. */
|
||||
msg_wake(&peer->peer_out);
|
||||
}
|
||||
return daemon_conn_read_next(conn, &daemon->master);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue