lightningd: enable io logging on subdaemons iff we're going to print it.

This simplifies our tests, too, since we don't need a magic option to
enable io logging in subdaemons.

Note that test_bad_onion still takes too long, due to a separate minor
bug, so that's marked and left dev-only for now.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2019-11-18 10:57:17 +10:30
parent 0607f998d1
commit 4fc498f901
7 changed files with 40 additions and 52 deletions

View File

@ -18,7 +18,7 @@
static int status_fd = -1;
static struct daemon_conn *status_conn;
volatile bool logging_io = false;
static bool was_logging_io = false;
static bool was_logging_io;
/* If we're more than this many msgs deep, don't add debug messages. */
#define TRACE_QUEUE_LIMIT 20
@ -33,6 +33,9 @@ static void setup_logging_sighandler(void)
{
struct sigaction act;
/* Could have been set to true by --log-io arg. */
was_logging_io = logging_io;
memset(&act, 0, sizeof(act));
act.sa_handler = got_sigusr1;
act.sa_flags = SA_RESTART;
@ -55,10 +58,6 @@ void status_setup_sync(int fd)
assert(!status_conn);
status_fd = fd;
setup_logging_sighandler();
#if DEVELOPER
logging_io = (getenv("LIGHTNINGD_DEV_LOG_IO") != NULL);
report_logging_io("LIGHTNINGD_DEV_LOG_IO");
#endif
}
static void destroy_daemon_conn(struct daemon_conn *dc UNUSED)
@ -75,10 +74,6 @@ void status_setup_async(struct daemon_conn *master)
tal_add_destructor(master, destroy_daemon_conn);
setup_logging_sighandler();
#if DEVELOPER
logging_io = (getenv("LIGHTNINGD_DEV_LOG_IO") != NULL);
report_logging_io("LIGHTNINGD_DEV_LOG_IO");
#endif
}
void status_send(const u8 *msg TAKES)

View File

@ -542,7 +542,7 @@ class LightningD(TailableProc):
class LightningNode(object):
def __init__(self, node_id, lightning_dir, bitcoind, executor, may_fail=False,
may_reconnect=False, allow_broken_log=False,
allow_bad_gossip=False, db=None, port=None, disconnect=None, random_hsm=None, log_all_io=None, options=None, **kwargs):
allow_bad_gossip=False, db=None, port=None, disconnect=None, random_hsm=None, options=None, **kwargs):
self.bitcoin = bitcoind
self.executor = executor
self.may_fail = may_fail
@ -564,10 +564,6 @@ class LightningNode(object):
with open(self.daemon.disconnect_file, "w") as f:
f.write("\n".join(disconnect))
self.daemon.opts["dev-disconnect"] = "dev_disconnect"
if log_all_io:
assert DEVELOPER
self.daemon.env["LIGHTNINGD_DEV_LOG_IO"] = "1"
self.daemon.opts["log-level"] = "io"
if DEVELOPER:
self.daemon.opts["dev-fail-on-subdaemon-fail"] = None
self.daemon.env["LIGHTNINGD_DEV_MEMLEAK"] = "1"
@ -931,7 +927,6 @@ class NodeFactory(object):
'allow_broken_log',
'may_reconnect',
'random_hsm',
'log_all_io',
'feerates',
'wait_for_bitcoind_sync',
'allow_bad_gossip'

View File

@ -137,7 +137,9 @@ static void close_taken_fds(va_list *ap)
/* We use sockets, not pipes, because fds are bidir. */
static int subd(const char *dir, const char *name,
const char *debug_subdaemon,
int *msgfd, int dev_disconnect_fd, va_list *ap)
int *msgfd, int dev_disconnect_fd,
bool io_logging,
va_list *ap)
{
int childmsg[2], execfail[2];
pid_t childpid;
@ -161,7 +163,7 @@ static int subd(const char *dir, const char *name,
int fdnum = 3, i, stdin_is_now = STDIN_FILENO;
long max;
size_t num_args;
char *args[] = { NULL, NULL, NULL, NULL };
char *args[] = { NULL, NULL, NULL, NULL, NULL };
close(childmsg[0]);
close(execfail[0]);
@ -200,6 +202,8 @@ static int subd(const char *dir, const char *name,
num_args = 0;
args[num_args++] = path_join(NULL, dir, name);
if (io_logging)
args[num_args++] = "--log-io";
#if DEVELOPER
if (dev_disconnect_fd != -1)
args[num_args++] = tal_fmt(NULL, "--dev-disconnect=%i", dev_disconnect_fd);
@ -622,33 +626,42 @@ static struct subd *new_subd(struct lightningd *ld,
int msg_fd;
const char *debug_subd = NULL;
int disconnect_fd = -1;
const char *shortname;
assert(name != NULL);
/* This part of the name is a bit redundant for logging */
if (strstarts(name, "lightning_"))
shortname = name + strlen("lightning_");
else
shortname = name;
if (base_log) {
sd->log = new_log(sd, ld->log_book, node_id,
"%s-%s", shortname, log_prefix(base_log));
} else {
sd->log = new_log(sd, ld->log_book, node_id, "%s", shortname);
}
#if DEVELOPER
debug_subd = ld->dev_debug_subprocess;
disconnect_fd = ld->dev_disconnect_fd;
#endif /* DEVELOPER */
sd->pid = subd(ld->daemon_dir, name, debug_subd,
&msg_fd, disconnect_fd, ap);
&msg_fd, disconnect_fd,
/* We only turn on subdaemon io logging if we're going
* to print it: too stressful otherwise! */
log_print_level(sd->log) < LOG_DBG,
ap);
if (sd->pid == (pid_t)-1) {
log_unusual(ld->log, "subd %s failed: %s",
name, strerror(errno));
return tal_free(sd);
}
sd->ld = ld;
/* This part of the name is a bit redundant for logging */
if (strstarts(name, "lightning_"))
name += strlen("lightning_");
if (base_log) {
sd->log = new_log(sd, ld->log_book, node_id,
"%s-%s", name, log_prefix(base_log));
} else {
sd->log = new_log(sd, ld->log_book, node_id, "%s", name);
}
sd->name = name;
sd->name = shortname;
sd->must_not_exit = false;
sd->talks_to_peer = talks_to_peer;
sd->msgname = msgname;

View File

@ -124,6 +124,9 @@ void log_backtrace_print(const char *fmt UNNEEDED, ...)
/* Generated stub for log_prefix */
const char *log_prefix(const struct log *log UNNEEDED)
{ fprintf(stderr, "log_prefix called!\n"); abort(); }
/* Generated stub for log_print_level */
enum log_level log_print_level(struct log *log UNNEEDED)
{ fprintf(stderr, "log_print_level called!\n"); abort(); }
/* Generated stub for log_status_msg */
bool log_status_msg(struct log *log UNNEEDED,
const struct node_id *node_id UNNEEDED,

View File

@ -1811,9 +1811,9 @@ def test_funder_simple_reconnect(node_factory, bitcoind):
@unittest.skipIf(os.getenv('TEST_DB_PROVIDER', 'sqlite3') != 'sqlite3', "sqlite3-specific DB rollback")
@unittest.skipIf(not DEVELOPER, "needs LIGHTNINGD_DEV_LOG_IO")
def test_dataloss_protection(node_factory, bitcoind):
l1 = node_factory.get_node(may_reconnect=True, log_all_io=True,
l1 = node_factory.get_node(may_reconnect=True, options={'log-level': 'io'},
feerates=(7500, 7500, 7500))
l2 = node_factory.get_node(may_reconnect=True, log_all_io=True,
l2 = node_factory.get_node(may_reconnect=True, options={'log-level': 'io'},
feerates=(7500, 7500, 7500), allow_broken_log=True)
lf = expected_features()

View File

@ -123,9 +123,6 @@ def test_announce_address(node_factory, bitcoind):
scid = l1.fund_channel(l2, 10**6)
bitcoind.generate_block(5)
# Activate IO logging for l1.
subprocess.run(['kill', '-USR1', l1.subd_pid('channeld')])
l1.wait_channel_active(scid)
l2.wait_channel_active(scid)
@ -483,12 +480,6 @@ def test_gossip_no_empty_announcements(node_factory, bitcoind):
l2.rpc.connect(l3.info['id'], 'localhost', l3.port)
l3.rpc.connect(l4.info['id'], 'localhost', l4.port)
# Turn on IO logging for openingd (make sure it's ready!)
l1.daemon.wait_for_log('openingd-.*: Handed peer, entering loop')
subprocess.run(['kill', '-USR1', l1.subd_pid('openingd')])
l2.daemon.wait_for_log(r'{}-.*openingd-chan #.: Handed peer, entering loop'.format(l3.info['id']))
subprocess.run(['kill', '-USR1', l2.subd_pid('openingd', l3.info['id'])])
# Make an announced-but-not-updated channel.
l3.fund_channel(l4, 10**5)
bitcoind.generate_block(5)
@ -1021,7 +1012,7 @@ def test_gossip_store_load_amount_truncated(node_factory):
def test_node_reannounce(node_factory, bitcoind):
"Test that we reannounce a node when parameters change"
l1, l2 = node_factory.line_graph(2, opts={'may_reconnect': True,
'log_all_io': True})
'log-level': 'io'})
bitcoind.generate_block(5)
# Wait for node_announcement for l1.
@ -1100,9 +1091,6 @@ def test_gossip_notices_close(node_factory, bitcoind):
allow_bad_gossip=True)
l2, l3 = node_factory.line_graph(2)
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
# FIXME: sending SIGUSR1 immediately may kill it before handler installed.
l1.daemon.wait_for_log('Handed peer, entering loop')
subprocess.run(['kill', '-USR1', l1.subd_pid('openingd')])
bitcoind.generate_block(5)
@ -1527,7 +1515,7 @@ def test_gossip_no_backtalk(node_factory):
l1, l2 = node_factory.line_graph(2, wait_for_announce=True)
# This connects, gets gossip, but should *not* play it back.
l3 = node_factory.get_node(log_all_io=True)
l3 = node_factory.get_node(options={'log-level': 'io'})
l3.rpc.connect(l2.info['id'], 'localhost', l2.port)
# Will get channel_announce, then two channel_update and two node_announcement

View File

@ -658,9 +658,6 @@ def test_io_logging(node_factory, executor):
pid2 = l2.subd_pid('channeld')
l2.daemon.wait_for_log(' to CHANNELD_NORMAL')
# Send it sigusr1: should turn on logging.
subprocess.run(['kill', '-USR1', pid1])
fut = executor.submit(l1.pay, l2, 200000000)
# WIRE_UPDATE_ADD_HTLC = 128 = 0x0080
@ -1194,9 +1191,6 @@ def test_htlc_send_timeout(node_factory, bitcoind):
l1.fund_channel(l2, 10**6)
chanid2 = l2.fund_channel(l3, 10**6)
subprocess.run(['kill', '-USR1', l1.subd_pid('channeld')])
subprocess.run(['kill', '-USR1', l2.subd_pid('channeld')])
# Make sure channels get announced.
bitcoind.generate_block(5)
@ -1453,11 +1447,11 @@ def test_check_command(node_factory):
sock.close()
@unittest.skipIf(not DEVELOPER, "need log_all_io")
@unittest.skipIf(not DEVELOPER, "FIXME: without DEVELOPER=1 we timeout")
def test_bad_onion(node_factory, bitcoind):
"""Test that we get a reasonable error from sendpay when an onion is bad"""
l1, l2, l3, l4 = node_factory.line_graph(4, wait_for_announce=True,
opts={'log_all_io': True})
opts={'log-level': 'io'})
h = l4.rpc.invoice(123000, 'test_bad_onion', 'description')['payment_hash']
route = l1.rpc.getroute(l4.info['id'], 123000, 1)['route']