mirror of
https://github.com/ElementsProject/lightning.git
synced 2025-02-23 06:55:13 +01:00
gossipd: check that gossmap code sees updates from gossip_store writes.
After analyzing various weird cases where we ended up with duplicate gossip_store entries, it could be explained by us not fully processing the gossip store. It's not clear that my assumptions that we would always see our own writes are true: technically this may require an fsync(). So we now add the check, and do an fsync and try again. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Changelog-Fixed: gossipd: more sanity checks that we are correctly updating the gossip_store file.
This commit is contained in:
parent
8156c83e11
commit
2b4b1479ed
5 changed files with 80 additions and 1 deletions
|
@ -1851,3 +1851,17 @@ int gossmap_fd(const struct gossmap *map)
|
||||||
{
|
{
|
||||||
return map->fd;
|
return map->fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const u8 *gossmap_fetch_tail(const tal_t *ctx, const struct gossmap *map)
|
||||||
|
{
|
||||||
|
size_t len;
|
||||||
|
u8 *p;
|
||||||
|
|
||||||
|
/* Shouldn't happen... */
|
||||||
|
if (map->map_end > map->map_size)
|
||||||
|
return NULL;
|
||||||
|
len = map->map_size - map->map_end;
|
||||||
|
p = tal_arr(ctx, u8, len);
|
||||||
|
map_copy(map, map->map_size, p, len);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
|
@ -304,4 +304,7 @@ u64 gossmap_lengths(const struct gossmap *map, u64 *total);
|
||||||
|
|
||||||
/* Debugging: connectd wants to enumerate fds */
|
/* Debugging: connectd wants to enumerate fds */
|
||||||
int gossmap_fd(const struct gossmap *map);
|
int gossmap_fd(const struct gossmap *map);
|
||||||
|
|
||||||
|
/* Fetch unprocessed part of gossmap */
|
||||||
|
const u8 *gossmap_fetch_tail(const tal_t *ctx, const struct gossmap *map);
|
||||||
#endif /* LIGHTNING_COMMON_GOSSMAP_H */
|
#endif /* LIGHTNING_COMMON_GOSSMAP_H */
|
||||||
|
|
|
@ -408,7 +408,7 @@ struct gossip_store *gossip_store_new(const tal_t *ctx,
|
||||||
return gs;
|
return gs;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gossip_store_fsync(const struct gossip_store *gs)
|
void gossip_store_fsync(const struct gossip_store *gs)
|
||||||
{
|
{
|
||||||
if (fsync(gs->fd) != 0)
|
if (fsync(gs->fd) != 0)
|
||||||
status_failed(STATUS_FAIL_INTERNAL_ERROR,
|
status_failed(STATUS_FAIL_INTERNAL_ERROR,
|
||||||
|
|
|
@ -110,5 +110,6 @@ void gossip_store_set_timestamp(struct gossip_store *gs, u64 offset, u32 timesta
|
||||||
* For debugging.
|
* For debugging.
|
||||||
*/
|
*/
|
||||||
u64 gossip_store_len_written(const struct gossip_store *gs);
|
u64 gossip_store_len_written(const struct gossip_store *gs);
|
||||||
|
void gossip_store_fsync(const struct gossip_store *gs);
|
||||||
|
|
||||||
#endif /* LIGHTNING_GOSSIPD_GOSSIP_STORE_H */
|
#endif /* LIGHTNING_GOSSIPD_GOSSIP_STORE_H */
|
||||||
|
|
|
@ -1365,9 +1365,70 @@ void gossmap_manage_channel_spent(struct gossmap_manage *gm,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Fetch the part of the gossmap we didn't process via read() */
|
||||||
|
static const u8 *fetch_tail_fd(const tal_t *ctx,
|
||||||
|
int gossmap_fd,
|
||||||
|
u64 map_used, u64 map_size)
|
||||||
|
{
|
||||||
|
size_t len;
|
||||||
|
ssize_t r;
|
||||||
|
u8 *p;
|
||||||
|
|
||||||
|
/* Shouldn't happen... */
|
||||||
|
if (map_used > map_size)
|
||||||
|
return NULL;
|
||||||
|
len = map_size - map_used;
|
||||||
|
p = tal_arrz(ctx, u8, len);
|
||||||
|
r = pread(gossmap_fd, p, len, map_used);
|
||||||
|
if (r != len)
|
||||||
|
status_broken("Partial read on gossmap EOF (%zi vs %zu)",
|
||||||
|
r, len);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
struct gossmap *gossmap_manage_get_gossmap(struct gossmap_manage *gm)
|
struct gossmap *gossmap_manage_get_gossmap(struct gossmap_manage *gm)
|
||||||
{
|
{
|
||||||
|
u64 map_used, map_size, written_len;
|
||||||
|
|
||||||
gossmap_refresh(gm->raw_gossmap);
|
gossmap_refresh(gm->raw_gossmap);
|
||||||
|
|
||||||
|
/* Sanity check that we see everything we wrote. */
|
||||||
|
map_used = gossmap_lengths(gm->raw_gossmap, &map_size);
|
||||||
|
written_len = gossip_store_len_written(gm->gs);
|
||||||
|
|
||||||
|
if (map_size != written_len) {
|
||||||
|
status_broken("gossmap size %"PRIu64" != written size %"PRIu64,
|
||||||
|
map_size, written_len);
|
||||||
|
/* Push harder! */
|
||||||
|
gossip_store_fsync(gm->gs);
|
||||||
|
gossmap_refresh(gm->raw_gossmap);
|
||||||
|
|
||||||
|
/* Sanity check that we see everything we wrote. */
|
||||||
|
map_used = gossmap_lengths(gm->raw_gossmap, &map_size);
|
||||||
|
written_len = gossip_store_len_written(gm->gs);
|
||||||
|
if (map_used != written_len || map_size != map_used)
|
||||||
|
status_failed(STATUS_FAIL_INTERNAL_ERROR,
|
||||||
|
"gossmap read inconsistent even after sync"
|
||||||
|
" used=%"PRIu64" seen=%"PRIu64" written=%"PRIu64,
|
||||||
|
map_used, map_size, written_len);
|
||||||
|
} else if (map_size != map_used) {
|
||||||
|
const u8 *remainder_fd, *remainder_mmap;
|
||||||
|
|
||||||
|
/* Sigh. Did gossmap see something different (via mmap)
|
||||||
|
* from what we see via read? It's possible it's caught up
|
||||||
|
* now, but just in case, log BOTH */
|
||||||
|
remainder_mmap = gossmap_fetch_tail(tmpctx, gm->raw_gossmap);
|
||||||
|
remainder_fd = fetch_tail_fd(tmpctx,
|
||||||
|
gossmap_fd(gm->raw_gossmap),
|
||||||
|
map_used, map_size);
|
||||||
|
status_failed(STATUS_FAIL_INTERNAL_ERROR,
|
||||||
|
"Gossmap failed to process entire gossip_store: "
|
||||||
|
"at %"PRIu64" of %"PRIu64" remaining_mmap=%s remaining_fd=%s",
|
||||||
|
map_used, map_size,
|
||||||
|
tal_hex(tmpctx, remainder_mmap),
|
||||||
|
tal_hex(tmpctx, remainder_fd));
|
||||||
|
}
|
||||||
|
|
||||||
return gm->raw_gossmap;
|
return gm->raw_gossmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue