mirror of
https://github.com/ElementsProject/lightning.git
synced 2025-02-22 22:45:27 +01:00
gossipd: check that gossmap code sees updates from gossip_store writes.
After analyzing various weird cases where we ended up with duplicate gossip_store entries, it could be explained by us not fully processing the gossip store. It's not clear that my assumptions that we would always see our own writes are true: technically this may require an fsync(). So we now add the check, and do an fsync and try again. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Changelog-Fixed: gossipd: more sanity checks that we are correctly updating the gossip_store file.
This commit is contained in:
parent
8156c83e11
commit
2b4b1479ed
5 changed files with 80 additions and 1 deletions
|
@ -1851,3 +1851,17 @@ int gossmap_fd(const struct gossmap *map)
|
|||
{
|
||||
return map->fd;
|
||||
}
|
||||
|
||||
const u8 *gossmap_fetch_tail(const tal_t *ctx, const struct gossmap *map)
|
||||
{
|
||||
size_t len;
|
||||
u8 *p;
|
||||
|
||||
/* Shouldn't happen... */
|
||||
if (map->map_end > map->map_size)
|
||||
return NULL;
|
||||
len = map->map_size - map->map_end;
|
||||
p = tal_arr(ctx, u8, len);
|
||||
map_copy(map, map->map_size, p, len);
|
||||
return p;
|
||||
}
|
||||
|
|
|
@ -304,4 +304,7 @@ u64 gossmap_lengths(const struct gossmap *map, u64 *total);
|
|||
|
||||
/* Debugging: connectd wants to enumerate fds */
|
||||
int gossmap_fd(const struct gossmap *map);
|
||||
|
||||
/* Fetch unprocessed part of gossmap */
|
||||
const u8 *gossmap_fetch_tail(const tal_t *ctx, const struct gossmap *map);
|
||||
#endif /* LIGHTNING_COMMON_GOSSMAP_H */
|
||||
|
|
|
@ -408,7 +408,7 @@ struct gossip_store *gossip_store_new(const tal_t *ctx,
|
|||
return gs;
|
||||
}
|
||||
|
||||
static void gossip_store_fsync(const struct gossip_store *gs)
|
||||
void gossip_store_fsync(const struct gossip_store *gs)
|
||||
{
|
||||
if (fsync(gs->fd) != 0)
|
||||
status_failed(STATUS_FAIL_INTERNAL_ERROR,
|
||||
|
|
|
@ -110,5 +110,6 @@ void gossip_store_set_timestamp(struct gossip_store *gs, u64 offset, u32 timesta
|
|||
* For debugging.
|
||||
*/
|
||||
u64 gossip_store_len_written(const struct gossip_store *gs);
|
||||
void gossip_store_fsync(const struct gossip_store *gs);
|
||||
|
||||
#endif /* LIGHTNING_GOSSIPD_GOSSIP_STORE_H */
|
||||
|
|
|
@ -1365,9 +1365,70 @@ void gossmap_manage_channel_spent(struct gossmap_manage *gm,
|
|||
}
|
||||
}
|
||||
|
||||
/* Fetch the part of the gossmap we didn't process via read() */
|
||||
static const u8 *fetch_tail_fd(const tal_t *ctx,
|
||||
int gossmap_fd,
|
||||
u64 map_used, u64 map_size)
|
||||
{
|
||||
size_t len;
|
||||
ssize_t r;
|
||||
u8 *p;
|
||||
|
||||
/* Shouldn't happen... */
|
||||
if (map_used > map_size)
|
||||
return NULL;
|
||||
len = map_size - map_used;
|
||||
p = tal_arrz(ctx, u8, len);
|
||||
r = pread(gossmap_fd, p, len, map_used);
|
||||
if (r != len)
|
||||
status_broken("Partial read on gossmap EOF (%zi vs %zu)",
|
||||
r, len);
|
||||
return p;
|
||||
}
|
||||
|
||||
struct gossmap *gossmap_manage_get_gossmap(struct gossmap_manage *gm)
|
||||
{
|
||||
u64 map_used, map_size, written_len;
|
||||
|
||||
gossmap_refresh(gm->raw_gossmap);
|
||||
|
||||
/* Sanity check that we see everything we wrote. */
|
||||
map_used = gossmap_lengths(gm->raw_gossmap, &map_size);
|
||||
written_len = gossip_store_len_written(gm->gs);
|
||||
|
||||
if (map_size != written_len) {
|
||||
status_broken("gossmap size %"PRIu64" != written size %"PRIu64,
|
||||
map_size, written_len);
|
||||
/* Push harder! */
|
||||
gossip_store_fsync(gm->gs);
|
||||
gossmap_refresh(gm->raw_gossmap);
|
||||
|
||||
/* Sanity check that we see everything we wrote. */
|
||||
map_used = gossmap_lengths(gm->raw_gossmap, &map_size);
|
||||
written_len = gossip_store_len_written(gm->gs);
|
||||
if (map_used != written_len || map_size != map_used)
|
||||
status_failed(STATUS_FAIL_INTERNAL_ERROR,
|
||||
"gossmap read inconsistent even after sync"
|
||||
" used=%"PRIu64" seen=%"PRIu64" written=%"PRIu64,
|
||||
map_used, map_size, written_len);
|
||||
} else if (map_size != map_used) {
|
||||
const u8 *remainder_fd, *remainder_mmap;
|
||||
|
||||
/* Sigh. Did gossmap see something different (via mmap)
|
||||
* from what we see via read? It's possible it's caught up
|
||||
* now, but just in case, log BOTH */
|
||||
remainder_mmap = gossmap_fetch_tail(tmpctx, gm->raw_gossmap);
|
||||
remainder_fd = fetch_tail_fd(tmpctx,
|
||||
gossmap_fd(gm->raw_gossmap),
|
||||
map_used, map_size);
|
||||
status_failed(STATUS_FAIL_INTERNAL_ERROR,
|
||||
"Gossmap failed to process entire gossip_store: "
|
||||
"at %"PRIu64" of %"PRIu64" remaining_mmap=%s remaining_fd=%s",
|
||||
map_used, map_size,
|
||||
tal_hex(tmpctx, remainder_mmap),
|
||||
tal_hex(tmpctx, remainder_fd));
|
||||
}
|
||||
|
||||
return gm->raw_gossmap;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue