core-lightning/common/memleak.c

376 lines
9.3 KiB
C
Raw Normal View History

/* Hello friends!
*
* You found me! This is the inner, deep magic. Right here.
*
* To help development, we have a complete set of routines to scan for
* tal-memory leaks (valgrind will detect non-tal memory leaks at exit,
* but tal hierarchies tends to get freed at exit, so we need something
* more sophisticated).
*
* Memleak detection is only active if DEVELOPER is set. It does several
* things:
* 1. attaches a backtrace list to every allocation, so we can tell
* where it came from.
* 2. when memleak_find_allocations() is called, walks the entire tal
* tree and saves a pointer to all the objects it finds, with
* a few internal exceptions (including everything under 'tmpctx').
* It then calls registered helpers, which can remove opaque things
* and handles notleak() objects.
* 3. provides a routine to access any remaining pointers in the
* table: these are the leaks.
*/
#include "config.h"
#include <backtrace.h>
#include <ccan/cast/cast.h>
#include <ccan/crypto/siphash24/siphash24.h>
#include <ccan/htable/htable.h>
#include <ccan/intmap/intmap.h>
#include <ccan/tal/str/str.h>
#include <common/memleak.h>
#include <common/utils.h>
struct backtrace_state *backtrace_state;
#if DEVELOPER
static bool memleak_track;
struct memleak_helper {
void (*cb)(struct htable *memtable, const tal_t *);
};
void *notleak_(void *ptr, bool plus_children)
{
const char *name;
/* If we're not tracking, don't do anything. */
if (!memleak_track)
return cast_const(void *, ptr);
/* We use special tal names to mark notleak */
name = tal_name(ptr);
if (!name)
name = "";
if (plus_children)
name = tal_fmt(tmpctx, "%s **NOTLEAK_IGNORE_CHILDREN**",
name);
else
name = tal_fmt(tmpctx, "%s **NOTLEAK**", name);
tal_set_name(ptr, name);
return cast_const(void *, ptr);
}
static size_t hash_ptr(const void *elem, void *unused UNNEEDED)
{
static struct siphash_seed seed;
return siphash24(&seed, &elem, sizeof(elem));
}
static bool pointer_referenced(struct htable *memtable, const void *p)
{
return htable_del(memtable, hash_ptr(p, NULL), p);
}
static void children_into_htable(const void *exclude1, const void *exclude2,
struct htable *memtable, const tal_t *p)
{
const tal_t *i;
for (i = tal_first(p); i; i = tal_next(i)) {
const char *name = tal_name(i);
if (i == exclude1 || i == exclude2)
continue;
if (name) {
/* Don't add backtrace objects. */
if (streq(name, "backtrace"))
continue;
/* Don't add our own memleak_helpers */
if (strends(name, "struct memleak_helper"))
continue;
/* Don't add tal_link objects */
if (strends(name, "struct link")
|| strends(name, "struct linkable"))
continue;
/* ccan/io allocates pollfd array, always array. */
if (strends(name, "struct pollfd[]") && !tal_parent(i))
continue;
if (strends(name, "struct io_plan *[]") && !tal_parent(i))
continue;
/* Don't add tmpctx. */
if (streq(name, "tmpctx"))
continue;
}
htable_add(memtable, hash_ptr(i, NULL), i);
children_into_htable(exclude1, exclude2, memtable, i);
}
}
static void scan_for_pointers(struct htable *memtable,
const tal_t *p, size_t bytelen)
{
size_t i, n;
/* Search for (aligned) pointers. */
n = bytelen / sizeof(void *);
for (i = 0; i < n; i++) {
void *ptr;
memcpy(&ptr, (char *)p + i * sizeof(void *), sizeof(ptr));
if (pointer_referenced(memtable, ptr))
scan_for_pointers(memtable, ptr, tal_bytelen(ptr));
}
}
void memleak_remove_region(struct htable *memtable,
const void *ptr, size_t bytelen)
{
pointer_referenced(memtable, ptr);
scan_for_pointers(memtable, ptr, bytelen);
}
static void remove_with_children(struct htable *memtable, const tal_t *p)
{
const tal_t *i;
pointer_referenced(memtable, p);
for (i = tal_first(p); i; i = tal_next(i))
remove_with_children(memtable, i);
}
/* memleak can't see inside hash tables, so do them manually */
void memleak_remove_htable(struct htable *memtable, const struct htable *ht)
{
struct htable_iter i;
const void *p;
for (p = htable_first(ht, &i); p; p = htable_next(ht, &i))
memleak_remove_region(memtable, p, tal_bytelen(p));
}
/* FIXME: If uintmap used tal, this wouldn't be necessary! */
void memleak_remove_intmap_(struct htable *memtable, const struct intmap *m)
{
void *p;
intmap_index_t i;
for (p = intmap_first_(m, &i); p; p = intmap_after_(m, &i))
memleak_remove_region(memtable, p, tal_bytelen(p));
}
static bool handle_strmap(const char *member, void *p, void *memtable_)
{
struct htable *memtable = memtable_;
/* membername may *not* be a tal ptr, but it can be! */
pointer_referenced(memtable, member);
memleak_remove_region(memtable, p, tal_bytelen(p));
/* Keep going */
return true;
}
/* FIXME: If strmap used tal, this wouldn't be necessary! */
void memleak_remove_strmap_(struct htable *memtable, const struct strmap *m)
{
strmap_iterate_(m, handle_strmap, memtable);
}
static bool ptr_match(const void *candidate, void *ptr)
{
return candidate == ptr;
}
const void *memleak_get(struct htable *memtable, const uintptr_t **backtrace)
{
struct htable_iter it;
const tal_t *i, *p;
/* Remove memtable itself */
pointer_referenced(memtable, memtable);
i = htable_first(memtable, &it);
if (!i)
return NULL;
/* Delete from table (avoids parenting loops) */
htable_delval(memtable, &it);
/* Find ancestor, which is probably source of leak. */
for (p = tal_parent(i);
htable_get(memtable, hash_ptr(p, NULL), ptr_match, p);
i = p, p = tal_parent(i));
/* Delete all children */
remove_with_children(memtable, i);
/* Does it have a child called "backtrace"? */
for (*backtrace = tal_first(i);
*backtrace;
*backtrace = tal_next(*backtrace)) {
if (tal_name(*backtrace)
&& streq(tal_name(*backtrace), "backtrace"))
break;
}
return i;
}
static int append_bt(void *data, uintptr_t pc)
{
uintptr_t *bt = data;
if (bt[0] == 32)
return 1;
bt[bt[0]++] = pc;
return 0;
}
static void add_backtrace(tal_t *parent UNUSED, enum tal_notify_type type UNNEEDED,
void *child)
{
uintptr_t *bt = tal_arrz_label(child, uintptr_t, 32, "backtrace");
/* First serves as counter. */
bt[0] = 1;
backtrace_simple(backtrace_state, 2, append_bt, NULL, bt);
tal_add_notifier(child, TAL_NOTIFY_ADD_CHILD, add_backtrace);
}
static void add_backtrace_notifiers(const tal_t *root)
{
tal_add_notifier(root, TAL_NOTIFY_ADD_CHILD, add_backtrace);
for (tal_t *i = tal_first(root); i; i = tal_next(i))
add_backtrace_notifiers(i);
}
void memleak_add_helper_(const tal_t *p,
void (*cb)(struct htable *memtable, const tal_t *))
{
struct memleak_helper *mh = tal(p, struct memleak_helper);
mh->cb = cb;
}
/* Handle allocations marked with helpers or notleak() */
static void call_memleak_helpers(struct htable *memtable, const tal_t *p)
{
const tal_t *i;
for (i = tal_first(p); i; i = tal_next(i)) {
const char *name = tal_name(i);
memleak: make sure we catch children which are also "notleak". By not recursing into "notleak" children, we could miss other "notleak" pointers: ``` 2022-03-15T05:25:33.9759500Z lightningd-1: 2022-03-15T05:00:15.998Z **BROKEN** connectd: MEMLEAK: 0x55b29c3d0cc8 2022-03-15T05:25:33.9759901Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: label=common/timeout.c:22:struct oneshot **NOTLEAK** 2022-03-15T05:25:33.9760195Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: backtrace: 2022-03-15T05:25:33.9760555Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: ccan/ccan/tal/tal.c:442 (tal_alloc_) 2022-03-15T05:25:33.9760905Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: common/timeout.c:22 (new_reltimer_) 2022-03-15T05:25:33.9761272Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: connectd/multiplex.c:558 (set_closing_timer) 2022-03-15T05:25:33.9761647Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: connectd/multiplex.c:586 (write_to_peer) 2022-03-15T05:25:33.9761986Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: ccan/ccan/io/io.c:59 (next_plan) 2022-03-15T05:25:33.9762335Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: ccan/ccan/io/io.c:435 (io_do_always) 2022-03-15T05:25:33.9762692Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: ccan/ccan/io/poll.c:304 (handle_always) 2022-03-15T05:25:33.9763035Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: ccan/ccan/io/poll.c:385 (io_loop) 2022-03-15T05:25:33.9763376Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: connectd/connectd.c:2131 (main) 2022-03-15T05:25:33.9763645Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: parents: 2022-03-15T05:25:33.9764020Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: ccan/ccan/io/io.c:91:struct io_conn **NOTLEAK** 2022-03-15T05:25:33.9764471Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: connectd/connectd.c:2104:struct daemon ``` Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2022-03-16 00:09:19 +01:00
if (name) {
if (strends(name, "struct memleak_helper")) {
const struct memleak_helper *mh = i;
mh->cb(memtable, p);
} else if (strends(name, " **NOTLEAK**")
|| strends(name, "_notleak")) {
pointer_referenced(memtable, i);
memleak_remove_region(memtable, i,
tal_bytelen(i));
} else if (strends(name,
" **NOTLEAK_IGNORE_CHILDREN**")) {
remove_with_children(memtable, i);
memleak_remove_region(memtable, i,
tal_bytelen(i));
}
}
memleak: make sure we catch children which are also "notleak". By not recursing into "notleak" children, we could miss other "notleak" pointers: ``` 2022-03-15T05:25:33.9759500Z lightningd-1: 2022-03-15T05:00:15.998Z **BROKEN** connectd: MEMLEAK: 0x55b29c3d0cc8 2022-03-15T05:25:33.9759901Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: label=common/timeout.c:22:struct oneshot **NOTLEAK** 2022-03-15T05:25:33.9760195Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: backtrace: 2022-03-15T05:25:33.9760555Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: ccan/ccan/tal/tal.c:442 (tal_alloc_) 2022-03-15T05:25:33.9760905Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: common/timeout.c:22 (new_reltimer_) 2022-03-15T05:25:33.9761272Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: connectd/multiplex.c:558 (set_closing_timer) 2022-03-15T05:25:33.9761647Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: connectd/multiplex.c:586 (write_to_peer) 2022-03-15T05:25:33.9761986Z lightningd-1: 2022-03-15T05:00:16.003Z **BROKEN** connectd: ccan/ccan/io/io.c:59 (next_plan) 2022-03-15T05:25:33.9762335Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: ccan/ccan/io/io.c:435 (io_do_always) 2022-03-15T05:25:33.9762692Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: ccan/ccan/io/poll.c:304 (handle_always) 2022-03-15T05:25:33.9763035Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: ccan/ccan/io/poll.c:385 (io_loop) 2022-03-15T05:25:33.9763376Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: connectd/connectd.c:2131 (main) 2022-03-15T05:25:33.9763645Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: parents: 2022-03-15T05:25:33.9764020Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: ccan/ccan/io/io.c:91:struct io_conn **NOTLEAK** 2022-03-15T05:25:33.9764471Z lightningd-1: 2022-03-15T05:00:16.004Z **BROKEN** connectd: connectd/connectd.c:2104:struct daemon ``` Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2022-03-16 00:09:19 +01:00
/* Recurse down looking for "notleak" children */
call_memleak_helpers(memtable, i);
}
}
struct htable *memleak_find_allocations(const tal_t *ctx,
const void *exclude1,
const void *exclude2)
{
struct htable *memtable = tal(ctx, struct htable);
htable_init(memtable, hash_ptr, NULL);
pytest: don't test for memleaks under valgrind. The next patch perturbed things enough that we suddenly started getting (with --track-origins=yes): Valgrind error file: valgrind-errors.120470 ==120470== Use of uninitialised value of size 8 ==120470== at 0x14EBD5: htable_val (htable.c:150) ==120470== by 0x14EC3C: htable_firstval_ (htable.c:165) ==120470== by 0x14F583: htable_del_ (htable.c:349) ==120470== by 0x11825D: pointer_referenced (memleak.c:65) ==120470== by 0x118485: scan_for_pointers (memleak.c:121) ==120470== by 0x118500: memleak_remove_region (memleak.c:130) ==120470== by 0x118A30: call_memleak_helpers (memleak.c:257) ==120470== by 0x118A8B: call_memleak_helpers (memleak.c:262) ==120470== by 0x118A8B: call_memleak_helpers (memleak.c:262) ==120470== by 0x118B25: memleak_find_allocations (memleak.c:278) ==120470== by 0x10EB12: closing_dev_memleak (closingd.c:584) ==120470== by 0x10F3E2: main (closingd.c:783) ==120470== Uninitialised value was created by a heap allocation ==120470== at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so) ==120470== by 0x1604E8: allocate (tal.c:250) ==120470== by 0x160AA9: tal_alloc_ (tal.c:428) ==120470== by 0x119BE0: new_per_peer_state (per_peer_state.c:24) ==120470== by 0x11A101: fromwire_per_peer_state (per_peer_state.c:95) ==120470== by 0x10FB7C: fromwire_closingd_init (closingd_wiregen.c:103) ==120470== by 0x10ED15: main (closingd.c:626) ==120470== This is because there is uninitialized padding at the end of struct peer_state. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2020-09-23 03:37:04 +02:00
if (memleak_track) {
/* First, add all pointers off NULL to table. */
children_into_htable(exclude1, exclude2, memtable, NULL);
pytest: don't test for memleaks under valgrind. The next patch perturbed things enough that we suddenly started getting (with --track-origins=yes): Valgrind error file: valgrind-errors.120470 ==120470== Use of uninitialised value of size 8 ==120470== at 0x14EBD5: htable_val (htable.c:150) ==120470== by 0x14EC3C: htable_firstval_ (htable.c:165) ==120470== by 0x14F583: htable_del_ (htable.c:349) ==120470== by 0x11825D: pointer_referenced (memleak.c:65) ==120470== by 0x118485: scan_for_pointers (memleak.c:121) ==120470== by 0x118500: memleak_remove_region (memleak.c:130) ==120470== by 0x118A30: call_memleak_helpers (memleak.c:257) ==120470== by 0x118A8B: call_memleak_helpers (memleak.c:262) ==120470== by 0x118A8B: call_memleak_helpers (memleak.c:262) ==120470== by 0x118B25: memleak_find_allocations (memleak.c:278) ==120470== by 0x10EB12: closing_dev_memleak (closingd.c:584) ==120470== by 0x10F3E2: main (closingd.c:783) ==120470== Uninitialised value was created by a heap allocation ==120470== at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so) ==120470== by 0x1604E8: allocate (tal.c:250) ==120470== by 0x160AA9: tal_alloc_ (tal.c:428) ==120470== by 0x119BE0: new_per_peer_state (per_peer_state.c:24) ==120470== by 0x11A101: fromwire_per_peer_state (per_peer_state.c:95) ==120470== by 0x10FB7C: fromwire_closingd_init (closingd_wiregen.c:103) ==120470== by 0x10ED15: main (closingd.c:626) ==120470== This is because there is uninitialized padding at the end of struct peer_state. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2020-09-23 03:37:04 +02:00
/* Iterate and call helpers to eliminate hard-to-get references. */
call_memleak_helpers(memtable, NULL);
}
tal_add_destructor(memtable, htable_clear);
return memtable;
}
void memleak_init(void)
{
memleak_track = true;
if (backtrace_state)
add_backtrace_notifiers(NULL);
}
static int dump_syminfo(void *data, uintptr_t pc UNUSED,
const char *filename, int lineno,
const char *function)
{
void PRINTF_FMT(1,2) (*print)(const char *fmt, ...) = data;
/* This can happen in backtraces. */
if (!filename || !function)
return 0;
print(" %s:%u (%s)", filename, lineno, function);
return 0;
}
static void dump_leak_backtrace(const uintptr_t *bt,
void PRINTF_FMT(1,2)
(*print)(const char *fmt, ...))
{
if (!bt)
return;
/* First one serves as counter. */
print(" backtrace:");
for (size_t i = 1; i < bt[0]; i++) {
backtrace_pcinfo(backtrace_state,
bt[i], dump_syminfo,
NULL, print);
}
}
bool dump_memleak(struct htable *memtable,
void PRINTF_FMT(1,2) (*print)(const char *fmt, ...))
{
const tal_t *i;
const uintptr_t *backtrace;
bool found_leak = false;
while ((i = memleak_get(memtable, &backtrace)) != NULL) {
print("MEMLEAK: %p", i);
if (tal_name(i))
print(" label=%s", tal_name(i));
dump_leak_backtrace(backtrace, print);
print(" parents:");
for (tal_t *p = tal_parent(i); p; p = tal_parent(p)) {
print(" %s", tal_name(p));
p = tal_parent(p);
}
found_leak = true;
}
return found_leak;
}
#else /* !DEVELOPER */
void *notleak_(void *ptr, bool plus_children UNNEEDED)
{
return ptr;
}
#endif /* !DEVELOPER */