gossipd: zombify inactive channels instead of pruning

Though BOLT 7 says a channel may be pruned when one side becomes inactive
and fails to refresh their channel_update, in practice, the
channel_announcement can be difficult to recover if deleted entirely.
Here the channel_announcement is tagged as zombie such that gossip_store
consumers may safely ignore it, but it may be retained should the channel
come back online in the future. Node_announcements and channel_updates may
also be retained in such a fashion until the channel is ready to be
resurrected.

Changelog-Fixed: Pruned channels are more reliably restored.
This commit is contained in:
Alex Myers 2022-12-16 12:38:23 -06:00 committed by Rusty Russell
parent 6bff10cd40
commit 1bae8cd28a
8 changed files with 322 additions and 10 deletions

View file

@ -620,6 +620,9 @@ static bool map_catchup(struct gossmap *map, size_t *num_rejected)
if (be32_to_cpu(ghdr.len) & GOSSIP_STORE_LEN_DELETED_BIT)
continue;
if (be32_to_cpu(ghdr.len) & GOSSIP_STORE_LEN_ZOMBIE_BIT)
continue;
/* Partial write, this can happen. */
if (map->map_end + reclen > map->map_size)
break;

View file

@ -645,6 +645,96 @@ void gossip_store_mark_channel_deleted(struct gossip_store *gs,
0, false, false, false, NULL);
}
/* Marks the length field of a channel_announcement with the zombie flag bit */
void gossip_store_mark_channel_zombie(struct gossip_store *gs,
struct broadcastable *bcast)
{
beint32_t belen;
u32 index = bcast->index;
/* Should never get here during loading! */
assert(gs->writable);
assert(index);
#if DEVELOPER
const u8 *msg = gossip_store_get(tmpctx, gs, index);
assert(fromwire_peektype(msg) == WIRE_CHANNEL_ANNOUNCEMENT);
#endif
if (pread(gs->fd, &belen, sizeof(belen), index) != sizeof(belen))
status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Failed reading len to zombie channel @%u: %s",
index, strerror(errno));
assert((be32_to_cpu(belen) & GOSSIP_STORE_LEN_DELETED_BIT) == 0);
belen |= cpu_to_be32(GOSSIP_STORE_LEN_ZOMBIE_BIT);
if (pwrite(gs->fd, &belen, sizeof(belen), index) != sizeof(belen))
status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Failed writing len to zombie channel @%u: %s",
index, strerror(errno));
}
/* Marks the length field of a channel_update with the zombie flag bit */
void gossip_store_mark_cupdate_zombie(struct gossip_store *gs,
struct broadcastable *bcast)
{
beint32_t belen;
u32 index = bcast->index;
/* Should never get here during loading! */
assert(gs->writable);
assert(index);
#if DEVELOPER
const u8 *msg = gossip_store_get(tmpctx, gs, index);
assert(fromwire_peektype(msg) == WIRE_CHANNEL_UPDATE);
#endif
if (pread(gs->fd, &belen, sizeof(belen), index) != sizeof(belen))
status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Failed reading len to zombie channel update @%u: %s",
index, strerror(errno));
assert((be32_to_cpu(belen) & GOSSIP_STORE_LEN_DELETED_BIT) == 0);
belen |= cpu_to_be32(GOSSIP_STORE_LEN_ZOMBIE_BIT);
if (pwrite(gs->fd, &belen, sizeof(belen), index) != sizeof(belen))
status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Failed writing len to zombie channel update @%u: %s",
index, strerror(errno));
}
/* Marks the length field of a node_announcement with the zombie flag bit */
void gossip_store_mark_nannounce_zombie(struct gossip_store *gs,
struct broadcastable *bcast)
{
beint32_t belen;
u32 index = bcast->index;
/* Should never get here during loading! */
assert(gs->writable);
assert(index);
#if DEVELOPER
const u8 *msg = gossip_store_get(tmpctx, gs, index);
assert(fromwire_peektype(msg) == WIRE_NODE_ANNOUNCEMENT);
#endif
if (pread(gs->fd, &belen, sizeof(belen), index) != sizeof(belen))
status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Failed reading len to zombie node announcement @%u: %s",
index, strerror(errno));
assert((be32_to_cpu(belen) & GOSSIP_STORE_LEN_DELETED_BIT) == 0);
belen |= cpu_to_be32(GOSSIP_STORE_LEN_ZOMBIE_BIT);
if (pwrite(gs->fd, &belen, sizeof(belen), index) != sizeof(belen))
status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Failed writing len to zombie channel update @%u: %s",
index, strerror(errno));
}
const u8 *gossip_store_get(const tal_t *ctx,
struct gossip_store *gs,
u64 offset)

View file

@ -66,6 +66,20 @@ void gossip_store_delete(struct gossip_store *gs,
void gossip_store_mark_channel_deleted(struct gossip_store *gs,
const struct short_channel_id *scid);
/*
* Marks the length field of a channel announcement with a zombie flag bit.
* This allows the channel_announcement to be retained in the store while
* waiting for channel updates to reactivate it.
*/
void gossip_store_mark_channel_zombie(struct gossip_store *gs,
struct broadcastable *bcast);
void gossip_store_mark_cupdate_zombie(struct gossip_store *gs,
struct broadcastable *bcast);
void gossip_store_mark_nannounce_zombie(struct gossip_store *gs,
struct broadcastable *bcast);
/**
* Direct store accessor: loads gossip msg back from store.
*

View file

@ -322,17 +322,30 @@ static void handle_local_private_channel(struct daemon *daemon, const u8 *msg)
u8 *features;
struct short_channel_id scid;
const u8 *cannounce;
struct chan *zombie;
if (!fromwire_gossipd_local_private_channel(msg, msg,
&id, &capacity, &scid,
&features))
master_badmsg(WIRE_GOSSIPD_LOCAL_PRIVATE_CHANNEL, msg);
status_debug("received private channel announcement from channeld for %s",
type_to_string(tmpctx, struct short_channel_id, &scid));
cannounce = private_channel_announcement(tmpctx,
&scid,
&daemon->id,
&id,
features);
/* If there is already a zombie announcement for this channel in the
* store we can disregard this one. */
zombie = get_channel(daemon->rstate, &scid);
if (zombie && (zombie->half[0].zombie || zombie->half[1].zombie)){
status_debug("received channel announcement for %s,"
" but it is a zombie; discarding",
type_to_string(tmpctx, struct short_channel_id,
&scid));
return;
}
if (!routing_add_private_channel(daemon->rstate, &id, capacity,
cannounce, 0)) {

View file

@ -3,6 +3,7 @@
#include <bitcoin/script.h>
#include <ccan/array_size/array_size.h>
#include <ccan/tal/str/str.h>
#include <common/gossip_store.h>
#include <common/memleak.h>
#include <common/pseudorand.h>
#include <common/status.h>
@ -394,6 +395,25 @@ static bool node_has_public_channels(struct node *node)
return false;
}
static bool is_chan_zombie(struct chan *chan)
{
if (chan->half[0].zombie || chan->half[1].zombie)
return true;
return false;
}
static bool is_node_zombie(struct node* node)
{
struct chan_map_iter i;
struct chan *c;
for (c = first_chan(node, &i); c; c = next_chan(node, &i)) {
if (!is_chan_zombie(c))
return false;
}
return true;
}
/* We can *send* a channel_announce for a channel attached to this node:
* we only send once we have a channel_update. */
static bool node_has_broadcastable_channels(struct node *node)
@ -404,8 +424,8 @@ static bool node_has_broadcastable_channels(struct node *node)
for (c = first_chan(node, &i); c; c = next_chan(node, &i)) {
if (!is_chan_public(c))
continue;
if (is_halfchan_defined(&c->half[0])
|| is_halfchan_defined(&c->half[1]))
if ((is_halfchan_defined(&c->half[0])
|| is_halfchan_defined(&c->half[1])) && !is_chan_zombie(c))
return true;
}
return false;
@ -444,6 +464,7 @@ static void force_node_announce_rexmit(struct routing_state *rstate,
node->bcast.timestamp,
is_local,
false,
false,
NULL);
if (node->rgraph.index == initial_bcast_index){
node->rgraph.index = node->bcast.index;
@ -457,6 +478,7 @@ static void force_node_announce_rexmit(struct routing_state *rstate,
node->rgraph.timestamp,
is_local,
false,
false,
NULL);
}
}
@ -553,6 +575,7 @@ static void init_half_chan(struct routing_state *rstate,
broadcastable_init(&c->bcast);
broadcastable_init(&c->rgraph);
c->tokens = TOKEN_MAX;
c->zombie = false;
}
static void bad_gossip_order(const u8 *msg,
@ -824,6 +847,7 @@ static void add_channel_announce_to_broadcast(struct routing_state *rstate,
chan->bcast.timestamp,
is_local,
false,
false,
addendum);
rstate->local_channel_announced |= is_local;
}
@ -859,8 +883,9 @@ static void delete_chan_messages_from_store(struct routing_state *rstate,
static void remove_channel_from_store(struct routing_state *rstate,
struct chan *chan)
{
/* Put in tombstone marker */
gossip_store_mark_channel_deleted(rstate->gs, &chan->scid);
/* Put in tombstone marker. Zombie channels will have one already. */
if (!is_chan_zombie(chan))
gossip_store_mark_channel_deleted(rstate->gs, &chan->scid);
/* Now delete old entries. */
delete_chan_messages_from_store(rstate, chan);
@ -1290,6 +1315,31 @@ static void delete_spam_update(struct routing_state *rstate,
hc->rgraph.timestamp = hc->bcast.timestamp;
}
static void resurrect_nannouncements(struct routing_state *rstate,
struct chan *chan)
{
const u8 *zombie_nann = NULL;
for (int i = 0; i < 2; i++) {
struct node *node = chan->nodes[i];
/* Use the most recent announcement (could be spam.) */
zombie_nann = gossip_store_get(tmpctx, rstate->gs,
node->rgraph.index);
/* If there was a spam entry, delete them both. */
if (node->bcast.index != node->rgraph.index)
gossip_store_delete(rstate->gs, &node->bcast,
WIRE_NODE_ANNOUNCEMENT);
gossip_store_delete(rstate->gs, &node->rgraph,
WIRE_NODE_ANNOUNCEMENT);
node->bcast.index = gossip_store_add(rstate->gs, zombie_nann,
node->rgraph.timestamp,
local_direction(rstate,
chan, NULL),
false, false, NULL);
node->bcast.timestamp = node->rgraph.timestamp;
node->rgraph.index = node->bcast.index;
}
}
bool routing_add_channel_update(struct routing_state *rstate,
const u8 *update TAKES,
u32 index,
@ -1312,6 +1362,7 @@ bool routing_add_channel_update(struct routing_state *rstate,
u8 direction;
struct amount_sat sat;
bool spam;
bool zombie;
/* Make sure we own msg, even if we don't save it. */
if (taken(update))
@ -1332,6 +1383,7 @@ bool routing_add_channel_update(struct routing_state *rstate,
if (chan) {
uc = NULL;
sat = chan->sat;
zombie = is_chan_zombie(chan);
} else {
/* Maybe announcement was waiting for this update? */
uc = get_unupdated_channel(rstate, &short_channel_id);
@ -1339,6 +1391,7 @@ bool routing_add_channel_update(struct routing_state *rstate,
return false;
}
sat = uc->sat;
zombie = false;
}
/* Reject update if the `htlc_maximum_msat` is greater
@ -1468,6 +1521,75 @@ bool routing_add_channel_update(struct routing_state *rstate,
return true;
}
/* Handle resurrection of zombie channels if the other side of the
* zombie channel has a recent timestamp. */
if (zombie && timestamp_reasonable(rstate,
chan->half[!direction].bcast.timestamp)) {
status_peer_debug(peer ? &peer->id : NULL,
"Resurrecting zombie channel %s.",
type_to_string(tmpctx,
struct short_channel_id,
&chan->scid));
const u8 *zombie_announcement = NULL;
const u8 *zombie_addendum = NULL;
const u8 *zombie_update[2] = {NULL, NULL};
/* Resurrection is a careful process. First delete the zombie-
* flagged channel_announcement which has already been
* tombstoned, and re-add to the store without zombie flag. */
zombie_announcement = gossip_store_get(tmpctx, rstate->gs,
chan->bcast.index);
u32 offset = tal_count(zombie_announcement) +
sizeof(struct gossip_hdr);
/* The channel_announcement addendum reminds us of its size. */
zombie_addendum = gossip_store_get(tmpctx, rstate->gs,
chan->bcast.index + offset);
gossip_store_delete(rstate->gs, &chan->bcast,
is_chan_public(chan)
? WIRE_CHANNEL_ANNOUNCEMENT
: WIRE_GOSSIP_STORE_PRIVATE_CHANNEL);
chan->bcast.index =
gossip_store_add(rstate->gs, zombie_announcement,
chan->bcast.timestamp,
local_direction(rstate, chan, NULL),
false, false, zombie_addendum);
/* Deletion of the old addendum is optional. */
/* This opposing channel_update has been stashed away. Now that
* there are two valid updates, this one gets restored. */
/* FIXME: Handle spam case probably needs a helper f'n */
zombie_update[0] = gossip_store_get(tmpctx, rstate->gs,
chan->half[!direction].bcast.index);
if (chan->half[!direction].bcast.index != chan->half[!direction].rgraph.index)
/* Don't forget the spam channel_update */
zombie_update[1] = gossip_store_get(tmpctx, rstate->gs,
chan->half[!direction].rgraph.index);
gossip_store_delete(rstate->gs, &chan->half[!direction].bcast,
is_chan_public(chan)
? WIRE_CHANNEL_UPDATE
: WIRE_GOSSIP_STORE_PRIVATE_UPDATE);
chan->half[!direction].bcast.index =
gossip_store_add(rstate->gs, zombie_update[0],
chan->half[!direction].bcast.timestamp,
local_direction(rstate, chan, NULL),
false, false, NULL);
if (zombie_update[1])
chan->half[!direction].rgraph.index =
gossip_store_add(rstate->gs, zombie_update[1],
chan->half[!direction].rgraph.timestamp,
local_direction(rstate, chan, NULL),
false, true, NULL);
else
chan->half[!direction].rgraph.index = chan->half[!direction].bcast.index;
/* If we caught any node_announcements for fully zombie nodes
* (no remaining active channels) handle those as well. */
resurrect_nannouncements(rstate, chan);
/* It's a miracle! */
chan->half[0].zombie = false;
chan->half[1].zombie = false;
zombie = false;
}
/* If we're loading from store, this means we don't re-add to store. */
if (index) {
if (!spam)
@ -1477,7 +1599,7 @@ bool routing_add_channel_update(struct routing_state *rstate,
hc->rgraph.index
= gossip_store_add(rstate->gs, update, timestamp,
local_direction(rstate, chan, NULL),
spam, NULL);
zombie, spam, NULL);
if (hc->bcast.timestamp > rstate->last_timestamp
&& hc->bcast.timestamp < time_now().ts.tv_sec)
rstate->last_timestamp = hc->bcast.timestamp;
@ -1673,7 +1795,8 @@ bool routing_add_node_announcement(struct routing_state *rstate,
node = get_node(rstate, &node_id);
if (node == NULL || !node_has_broadcastable_channels(node)) {
if (node == NULL || (!node_has_broadcastable_channels(node) &&
!is_node_zombie(node))) {
struct pending_node_announce *pna;
/* BOLT #7:
*
@ -1797,7 +1920,7 @@ bool routing_add_node_announcement(struct routing_state *rstate,
= gossip_store_add(rstate->gs, msg, timestamp,
node_id_eq(&node_id,
&rstate->local_id),
false, spam, NULL);
is_node_zombie(node), spam, NULL);
if (node->bcast.timestamp > rstate->last_timestamp
&& node->bcast.timestamp < time_now().ts.tv_sec)
rstate->last_timestamp = node->bcast.timestamp;
@ -1903,6 +2026,43 @@ u8 *handle_node_announcement(struct routing_state *rstate, const u8 *node_ann,
return NULL;
}
/* Set zombie flags in gossip_store and tombstone the channel for any
* gossip_store consumers. Remove any orphaned node_announcements. */
static void zombify_channel(struct gossip_store *gs, struct chan *channel)
{
struct half_chan *half;
assert(!is_chan_zombie(channel));
gossip_store_mark_channel_zombie(gs, &channel->bcast);
gossip_store_mark_channel_deleted(gs, &channel->scid);
for (int i = 0; i < 2; i++) {
half = &channel->half[i];
half->zombie = true;
if (half->bcast.index) {
gossip_store_mark_cupdate_zombie(gs, &half->bcast);
/* Channel may also have a spam entry */
if (half->bcast.index != half->rgraph.index)
gossip_store_mark_cupdate_zombie(gs, &half->rgraph);
}
}
status_debug("Channel %s zombified",
type_to_string(tmpctx, struct short_channel_id,
&channel->scid));
/* If one of the nodes has no remaining active channels, the
* node_announcement should also be stashed. */
for (int i = 0; i < 2; i++) {
struct node *node = channel->nodes[i];
if (!is_node_zombie(node) || !node->bcast.index)
continue;
if (node->rgraph.index != node->bcast.index)
gossip_store_mark_nannounce_zombie(gs, &node->rgraph);
gossip_store_mark_nannounce_zombie(gs, &node->bcast);
status_debug("Node %s zombified",
type_to_string(tmpctx, struct node_id,
&node->id));
}
}
void route_prune(struct routing_state *rstate)
{
u64 now = gossip_time_now(rstate).ts.tv_sec;
@ -1918,6 +2078,9 @@ void route_prune(struct routing_state *rstate)
/* Local-only? Don't prune. */
if (!is_chan_public(chan))
continue;
/* These have been pruned already */
if (is_chan_zombie(chan))
continue;
/* BOLT #7:
* - if the `timestamp` of the latest `channel_update` in
@ -1953,10 +2116,12 @@ void route_prune(struct routing_state *rstate)
}
}
/* Now free all the chans and maybe even nodes. */
/* Any channels missing an update are now considered zombies. They may
* come back later, in which case the channel_announcement needs to be
* stashed away for later use. If all remaining channels for a node are
* zombies, the node is zombified too. */
for (size_t i = 0; i < tal_count(pruned); i++) {
remove_channel_from_store(rstate, pruned[i]);
free_chan(rstate, pruned[i]);
zombify_channel(rstate->gs, pruned[i]);
}
}

View file

@ -29,6 +29,9 @@ struct half_chan {
/* Token bucket */
u8 tokens;
/* Disabled channel waiting for a channel_update from both sides. */
bool zombie;
};
struct chan {

View file

@ -86,6 +86,18 @@ const u8 *gossip_store_get_private_update(const tal_t *ctx UNNEEDED,
void gossip_store_mark_channel_deleted(struct gossip_store *gs UNNEEDED,
const struct short_channel_id *scid UNNEEDED)
{ fprintf(stderr, "gossip_store_mark_channel_deleted called!\n"); abort(); }
/* Generated stub for gossip_store_mark_channel_zombie */
void gossip_store_mark_channel_zombie(struct gossip_store *gs UNNEEDED,
struct broadcastable *bcast UNNEEDED)
{ fprintf(stderr, "gossip_store_mark_channel_zombie called!\n"); abort(); }
/* Generated stub for gossip_store_mark_cupdate_zombie */
void gossip_store_mark_cupdate_zombie(struct gossip_store *gs UNNEEDED,
struct broadcastable *bcast UNNEEDED)
{ fprintf(stderr, "gossip_store_mark_cupdate_zombie called!\n"); abort(); }
/* Generated stub for gossip_store_mark_nannounce_zombie */
void gossip_store_mark_nannounce_zombie(struct gossip_store *gs UNNEEDED,
struct broadcastable *bcast UNNEEDED)
{ fprintf(stderr, "gossip_store_mark_nannounce_zombie called!\n"); abort(); }
/* Generated stub for gossip_store_new */
struct gossip_store *gossip_store_new(struct routing_state *rstate UNNEEDED,
struct list_head *peers UNNEEDED)

View file

@ -57,6 +57,18 @@ const u8 *gossip_store_get_private_update(const tal_t *ctx UNNEEDED,
void gossip_store_mark_channel_deleted(struct gossip_store *gs UNNEEDED,
const struct short_channel_id *scid UNNEEDED)
{ fprintf(stderr, "gossip_store_mark_channel_deleted called!\n"); abort(); }
/* Generated stub for gossip_store_mark_channel_zombie */
void gossip_store_mark_channel_zombie(struct gossip_store *gs UNNEEDED,
struct broadcastable *bcast UNNEEDED)
{ fprintf(stderr, "gossip_store_mark_channel_zombie called!\n"); abort(); }
/* Generated stub for gossip_store_mark_cupdate_zombie */
void gossip_store_mark_cupdate_zombie(struct gossip_store *gs UNNEEDED,
struct broadcastable *bcast UNNEEDED)
{ fprintf(stderr, "gossip_store_mark_cupdate_zombie called!\n"); abort(); }
/* Generated stub for gossip_store_mark_nannounce_zombie */
void gossip_store_mark_nannounce_zombie(struct gossip_store *gs UNNEEDED,
struct broadcastable *bcast UNNEEDED)
{ fprintf(stderr, "gossip_store_mark_nannounce_zombie called!\n"); abort(); }
/* Generated stub for memleak_add_helper_ */
void memleak_add_helper_(const tal_t *p UNNEEDED, void (*cb)(struct htable *memtable UNNEEDED,
const tal_t *)){ }