mirror of
https://github.com/ElementsProject/lightning.git
synced 2025-01-01 03:24:41 +01:00
5c949e3116
We get some memleak reports because ld owns the subd, but once the peer/channel is freed, there's no reference for the brief time until the subd exits. This happens for both opening and closingd. For openingd, the peer owns it, for others (including dualopend) the channel owns it. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
1010 lines
24 KiB
C
1010 lines
24 KiB
C
#include "config.h"
|
|
#include <ccan/closefrom/closefrom.h>
|
|
#include <ccan/err/err.h>
|
|
#include <ccan/io/fdpass/fdpass.h>
|
|
#include <ccan/mem/mem.h>
|
|
#include <ccan/noerr/noerr.h>
|
|
#include <ccan/tal/path/path.h>
|
|
#include <ccan/tal/str/str.h>
|
|
#include <common/memleak.h>
|
|
#include <common/peer_status_wiregen.h>
|
|
#include <common/status_wiregen.h>
|
|
#include <common/version.h>
|
|
#include <db/exec.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <lightningd/lightningd.h>
|
|
#include <lightningd/log_status.h>
|
|
#include <lightningd/peer_fd.h>
|
|
#include <lightningd/subd.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/wait.h>
|
|
#include <wire/wire_io.h>
|
|
|
|
void maybe_subd_child(struct lightningd *ld, int childpid, int wstatus)
|
|
{
|
|
struct subd *sd;
|
|
|
|
list_for_each(&ld->subds, sd, list) {
|
|
if (sd->pid == childpid)
|
|
sd->wstatus = tal_dup(sd, int, &wstatus);
|
|
}
|
|
}
|
|
|
|
/* Carefully move fd *@from to @to: on success *from set to to */
|
|
static bool move_fd(int *from, int to)
|
|
{
|
|
assert(*from >= 0);
|
|
|
|
/* dup2 with same arguments may be a no-op, but
|
|
* the later close would make the fd invalid.
|
|
* Handle this edge case.
|
|
*/
|
|
if (*from == to)
|
|
return true;
|
|
|
|
if (dup2(*from, to) == -1)
|
|
return false;
|
|
|
|
/* dup2 does not duplicate flags, copy it here.
|
|
* This should be benign; the only POSIX-defined
|
|
* flag is FD_CLOEXEC, and we only use it rarely.
|
|
*/
|
|
if (fcntl(to, F_SETFD, fcntl(*from, F_GETFD)) < 0)
|
|
return false;
|
|
|
|
close(*from);
|
|
*from = to;
|
|
return true;
|
|
}
|
|
|
|
/* Returns index of fds which is == this fd, or -1 */
|
|
static int fd_used(int **fds, size_t num_fds, int fd)
|
|
{
|
|
for (size_t i = 0; i < num_fds; i++) {
|
|
if (*fds[i] == fd)
|
|
return i;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/* Move an series of fd pointers into 0, 1, ... */
|
|
static bool shuffle_fds(int **fds, size_t num_fds)
|
|
{
|
|
/* If we need to move an fd out the way, this is a good place to start
|
|
* looking */
|
|
size_t next_free_fd = num_fds;
|
|
for (size_t i = 0; i < num_fds; i++) {
|
|
int in_the_way;
|
|
|
|
/* Already in the right place? Great! */
|
|
if (*fds[i] == i)
|
|
continue;
|
|
/* Is something we care about in the way? */
|
|
in_the_way = fd_used(fds + i, num_fds - i, i);
|
|
if (in_the_way != -1) {
|
|
/* Find a high-numbered unused fd. */
|
|
while (fd_used(fds + i, num_fds - i, next_free_fd) != -1)
|
|
next_free_fd++;
|
|
/* Trick: in_the_way is offset by i! */
|
|
if (!move_fd(fds[i + in_the_way], next_free_fd))
|
|
return false;
|
|
next_free_fd++;
|
|
}
|
|
|
|
/* Now there should be nothing in the way. */
|
|
assert(fd_used(fds, num_fds, i) == -1);
|
|
if (!move_fd(fds[i], i))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
struct subd_req {
|
|
struct list_node list;
|
|
|
|
/* Callback for a reply. */
|
|
int type;
|
|
void (*replycb)(struct subd *, const u8 *, const int *, void *);
|
|
void *replycb_data;
|
|
|
|
size_t num_reply_fds;
|
|
/* If non-NULL, this is here to disable replycb */
|
|
void *disabler;
|
|
};
|
|
|
|
static void destroy_subd_req(struct subd_req *sr)
|
|
{
|
|
list_del(&sr->list);
|
|
/* Don't disable once we're freed! */
|
|
if (sr->disabler)
|
|
tal_free(sr->disabler);
|
|
}
|
|
|
|
/* Called when the callback is disabled because caller was freed. */
|
|
static void ignore_reply(struct subd *sd, const u8 *msg UNUSED, const int *fds,
|
|
void *arg UNUSED)
|
|
{
|
|
size_t i;
|
|
|
|
log_debug(sd->log, "IGNORING REPLY");
|
|
for (i = 0; i < tal_count(fds); i++)
|
|
close(fds[i]);
|
|
}
|
|
|
|
static void disable_cb(void *disabler UNUSED, struct subd_req *sr)
|
|
{
|
|
sr->replycb = ignore_reply;
|
|
sr->disabler = NULL;
|
|
}
|
|
|
|
static struct subd_req *add_req(const tal_t *ctx,
|
|
struct subd *sd, int type, size_t num_fds_in,
|
|
void (*replycb)(struct subd *, const u8 *, const int *,
|
|
void *),
|
|
void *replycb_data)
|
|
{
|
|
struct subd_req *sr = tal(sd, struct subd_req);
|
|
|
|
sr->type = type;
|
|
sr->replycb = replycb;
|
|
sr->replycb_data = replycb_data;
|
|
sr->num_reply_fds = num_fds_in;
|
|
|
|
/* We don't allocate sr off ctx, because we still have to handle the
|
|
* case where ctx is freed between request and reply. Hence this
|
|
* trick. */
|
|
if (ctx) {
|
|
sr->disabler = notleak(tal(ctx, char));
|
|
tal_add_destructor2(sr->disabler, disable_cb, sr);
|
|
} else
|
|
sr->disabler = NULL;
|
|
assert(strends(sd->msgname(sr->type + SUBD_REPLY_OFFSET), "_REPLY"));
|
|
|
|
/* Keep in FIFO order: we sent in order, so replies will be too. */
|
|
list_add_tail(&sd->reqs, &sr->list);
|
|
tal_add_destructor(sr, destroy_subd_req);
|
|
|
|
return sr;
|
|
}
|
|
|
|
/* Caller must free. */
|
|
static struct subd_req *get_req(struct subd *sd, int reply_type)
|
|
{
|
|
struct subd_req *sr;
|
|
|
|
list_for_each(&sd->reqs, sr, list) {
|
|
if (sr->type + SUBD_REPLY_OFFSET == reply_type)
|
|
return sr;
|
|
/* If it's a fail, and that's a valid type. */
|
|
if (sr->type + SUBD_REPLYFAIL_OFFSET == reply_type
|
|
&& strends(sd->msgname(reply_type), "_REPLYFAIL")) {
|
|
sr->num_reply_fds = 0;
|
|
return sr;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void close_taken_fds(va_list *ap)
|
|
{
|
|
int *fd;
|
|
|
|
while ((fd = va_arg(*ap, int *)) != NULL) {
|
|
if (taken(fd) && *fd >= 0) {
|
|
close(*fd);
|
|
*fd = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* We use sockets, not pipes, because fds are bidir. */
|
|
static int subd(const char *path, const char *name,
|
|
const char *debug_subdaemon,
|
|
int *msgfd,
|
|
bool io_logging,
|
|
va_list *ap)
|
|
{
|
|
int childmsg[2], execfail[2];
|
|
pid_t childpid;
|
|
int err, *fd;
|
|
|
|
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, childmsg) != 0)
|
|
goto fail;
|
|
|
|
if (pipe(execfail) != 0)
|
|
goto close_msgfd_fail;
|
|
|
|
if (fcntl(execfail[1], F_SETFD, fcntl(execfail[1], F_GETFD)
|
|
| FD_CLOEXEC) < 0)
|
|
goto close_execfail_fail;
|
|
|
|
childpid = fork();
|
|
if (childpid < 0)
|
|
goto close_execfail_fail;
|
|
|
|
if (childpid == 0) {
|
|
size_t num_args;
|
|
char *args[] = { NULL, NULL, NULL, NULL };
|
|
int **fds = tal_arr(tmpctx, int *, 3);
|
|
int stdoutfd = STDOUT_FILENO, stderrfd = STDERR_FILENO;
|
|
|
|
close(childmsg[0]);
|
|
close(execfail[0]);
|
|
|
|
/* msg = STDIN (0) */
|
|
fds[0] = &childmsg[1];
|
|
/* These are untouched */
|
|
fds[1] = &stdoutfd;
|
|
fds[2] = &stderrfd;
|
|
|
|
while ((fd = va_arg(*ap, int *)) != NULL) {
|
|
assert(*fd != -1);
|
|
tal_arr_expand(&fds, fd);
|
|
}
|
|
|
|
/* Finally, the fd to report exec errors on */
|
|
tal_arr_expand(&fds, &execfail[1]);
|
|
|
|
if (!shuffle_fds(fds, tal_count(fds)))
|
|
goto child_errno_fail;
|
|
|
|
/* Make (fairly!) sure all other fds are closed. */
|
|
closefrom(tal_count(fds));
|
|
|
|
num_args = 0;
|
|
args[num_args++] = tal_strdup(NULL, path);
|
|
if (io_logging)
|
|
args[num_args++] = "--log-io";
|
|
#if DEVELOPER
|
|
if (debug_subdaemon && strends(name, debug_subdaemon))
|
|
args[num_args++] = "--debugger";
|
|
#endif
|
|
execv(args[0], args);
|
|
|
|
child_errno_fail:
|
|
err = errno;
|
|
/* Gcc's warn-unused-result fail. */
|
|
if (write(execfail[1], &err, sizeof(err))) {
|
|
;
|
|
}
|
|
exit(127);
|
|
}
|
|
|
|
close(childmsg[1]);
|
|
close(execfail[1]);
|
|
|
|
if (ap)
|
|
close_taken_fds(ap);
|
|
|
|
/* Child will close this without writing on successful exec. */
|
|
if (read(execfail[0], &err, sizeof(err)) == sizeof(err)) {
|
|
close(execfail[0]);
|
|
waitpid(childpid, NULL, 0);
|
|
errno = err;
|
|
return -1;
|
|
}
|
|
close(execfail[0]);
|
|
*msgfd = childmsg[0];
|
|
return childpid;
|
|
|
|
close_execfail_fail:
|
|
close_noerr(execfail[0]);
|
|
close_noerr(execfail[1]);
|
|
close_msgfd_fail:
|
|
close_noerr(childmsg[0]);
|
|
close_noerr(childmsg[1]);
|
|
fail:
|
|
if (ap)
|
|
close_taken_fds(ap);
|
|
return -1;
|
|
}
|
|
|
|
static struct io_plan *sd_msg_read(struct io_conn *conn, struct subd *sd);
|
|
|
|
static void mark_freed(struct subd *unused UNUSED, bool *freed)
|
|
{
|
|
*freed = true;
|
|
}
|
|
|
|
static struct io_plan *sd_msg_reply(struct io_conn *conn, struct subd *sd,
|
|
struct subd_req *sr)
|
|
{
|
|
int type = fromwire_peektype(sd->msg_in);
|
|
bool freed = false;
|
|
int *fds_in;
|
|
|
|
log_debug(sd->log, "REPLY %s with %zu fds",
|
|
sd->msgname(type), tal_count(sd->fds_in));
|
|
|
|
/* Callback could free sd! Make sure destroy_subd() won't free conn */
|
|
sd->conn = NULL;
|
|
|
|
/* We want to free the msg_in, unless they tal_steal() it. */
|
|
tal_steal(tmpctx, sd->msg_in);
|
|
|
|
/* And we need to free sr after this too. */
|
|
tal_steal(tmpctx, sr);
|
|
/* In case they free sd, don't deref. */
|
|
list_del_init(&sr->list);
|
|
|
|
/* Free this array after, too. */
|
|
fds_in = tal_steal(tmpctx, sd->fds_in);
|
|
sd->fds_in = NULL;
|
|
|
|
/* Find out if they freed it. */
|
|
tal_add_destructor2(sd, mark_freed, &freed);
|
|
sr->replycb(sd, sd->msg_in, fds_in, sr->replycb_data);
|
|
|
|
if (freed)
|
|
return io_close(conn);
|
|
|
|
tal_del_destructor2(sd, mark_freed, &freed);
|
|
|
|
/* Restore conn ptr. */
|
|
sd->conn = conn;
|
|
return io_read_wire(conn, sd, &sd->msg_in, sd_msg_read, sd);
|
|
}
|
|
|
|
static struct io_plan *read_fds(struct io_conn *conn, struct subd *sd)
|
|
{
|
|
if (sd->num_fds_in_read == tal_count(sd->fds_in)) {
|
|
size_t i;
|
|
|
|
/* Don't trust subd to set it blocking. */
|
|
for (i = 0; i < tal_count(sd->fds_in); i++)
|
|
io_fd_block(sd->fds_in[i], true);
|
|
return sd_msg_read(conn, sd);
|
|
}
|
|
return io_recv_fd(conn, &sd->fds_in[sd->num_fds_in_read++],
|
|
read_fds, sd);
|
|
}
|
|
|
|
static struct io_plan *sd_collect_fds(struct io_conn *conn, struct subd *sd,
|
|
size_t num_fds)
|
|
{
|
|
assert(!sd->fds_in);
|
|
sd->fds_in = tal_arr(sd, int, num_fds);
|
|
sd->num_fds_in_read = 0;
|
|
return read_fds(conn, sd);
|
|
}
|
|
|
|
static void subdaemon_malformed_msg(struct subd *sd, const u8 *msg)
|
|
{
|
|
log_broken(sd->log, "%i: malformed message '%.s'",
|
|
fromwire_peektype(msg),
|
|
tal_hex(msg, msg));
|
|
|
|
#if DEVELOPER
|
|
if (sd->ld->dev_subdaemon_fail)
|
|
exit(1);
|
|
#endif
|
|
}
|
|
|
|
static bool log_status_fail(struct subd *sd, const u8 *msg)
|
|
{
|
|
const char *name = NULL;
|
|
enum status_failreason failreason;
|
|
char *desc;
|
|
|
|
if (!fromwire_status_fail(msg, msg, &failreason, &desc))
|
|
return false;
|
|
|
|
/* No 'default:' here so gcc gives warning if a new type added */
|
|
switch (failreason) {
|
|
case STATUS_FAIL_MASTER_IO:
|
|
name = "STATUS_FAIL_MASTER_IO";
|
|
break;
|
|
case STATUS_FAIL_HSM_IO:
|
|
name = "STATUS_FAIL_HSM_IO";
|
|
break;
|
|
case STATUS_FAIL_GOSSIP_IO:
|
|
name = "STATUS_FAIL_GOSSIP_IO";
|
|
break;
|
|
case STATUS_FAIL_INTERNAL_ERROR:
|
|
name = "STATUS_FAIL_INTERNAL_ERROR";
|
|
break;
|
|
}
|
|
/* fromwire_status_fail() guarantees it's one of those... */
|
|
assert(name);
|
|
|
|
log_broken(sd->log, "%s: %s", name, desc);
|
|
|
|
#if DEVELOPER
|
|
if (sd->ld->dev_subdaemon_fail)
|
|
exit(1);
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
static bool handle_peer_error(struct subd *sd, const u8 *msg, int fds[1])
|
|
{
|
|
void *channel = sd->channel;
|
|
struct channel_id channel_id;
|
|
char *desc;
|
|
struct peer_fd *peer_fd;
|
|
u8 *err_for_them;
|
|
bool warning;
|
|
|
|
if (!fromwire_status_peer_error(msg, msg,
|
|
&channel_id, &desc, &warning,
|
|
&err_for_them))
|
|
return false;
|
|
|
|
peer_fd = new_peer_fd_arr(msg, fds);
|
|
|
|
/* Don't free sd; we may be about to free channel. */
|
|
sd->channel = NULL;
|
|
sd->errcb(channel, peer_fd, &channel_id, desc, warning, err_for_them);
|
|
return true;
|
|
}
|
|
|
|
static bool handle_set_billboard(struct subd *sd, const u8 *msg)
|
|
{
|
|
bool perm;
|
|
char *happenings;
|
|
|
|
if (!fromwire_status_peer_billboard(msg, msg, &perm, &happenings))
|
|
return false;
|
|
|
|
sd->billboardcb(sd->channel, perm, happenings);
|
|
return true;
|
|
}
|
|
|
|
static bool handle_version(struct subd *sd, const u8 *msg)
|
|
{
|
|
char *ver;
|
|
|
|
if (!fromwire_status_version(msg, msg, &ver))
|
|
return false;
|
|
|
|
if (!streq(ver, version())) {
|
|
log_broken(sd->log, "version '%s' not '%s': restarting",
|
|
ver, version());
|
|
sd->ld->try_reexec = true;
|
|
/* Return us to toplevel lightningd.c */
|
|
io_break(sd->ld);
|
|
return false;
|
|
}
|
|
|
|
sd->rcvd_version = true;
|
|
/* In case there are outgoing msgs, we can send now. */
|
|
msg_wake(sd->outq);
|
|
|
|
return true;
|
|
}
|
|
|
|
static struct io_plan *sd_msg_read(struct io_conn *conn, struct subd *sd)
|
|
{
|
|
int type = fromwire_peektype(sd->msg_in);
|
|
struct subd_req *sr;
|
|
struct db *db = sd->ld->wallet->db;
|
|
struct io_plan *plan;
|
|
unsigned int i;
|
|
bool freed = false;
|
|
|
|
/* Everything we do, we wrap in a database transaction */
|
|
db_begin_transaction(db);
|
|
|
|
if (type == -1)
|
|
goto malformed;
|
|
|
|
/* First, check for replies. */
|
|
sr = get_req(sd, type);
|
|
if (sr) {
|
|
if (sr->num_reply_fds && sd->fds_in == NULL) {
|
|
plan = sd_collect_fds(conn, sd, sr->num_reply_fds);
|
|
goto out;
|
|
}
|
|
|
|
assert(sr->num_reply_fds == tal_count(sd->fds_in));
|
|
plan = sd_msg_reply(conn, sd, sr);
|
|
goto out;
|
|
}
|
|
|
|
/* If not stolen, we'll free this later. */
|
|
tal_steal(tmpctx, sd->msg_in);
|
|
|
|
/* We handle status messages ourselves. */
|
|
switch ((enum status_wire)type) {
|
|
case WIRE_STATUS_LOG:
|
|
case WIRE_STATUS_IO:
|
|
if (!log_status_msg(sd->log, sd->node_id, sd->msg_in))
|
|
goto malformed;
|
|
goto next;
|
|
case WIRE_STATUS_FAIL:
|
|
if (!log_status_fail(sd, sd->msg_in))
|
|
goto malformed;
|
|
goto close;
|
|
case WIRE_STATUS_PEER_CONNECTION_LOST:
|
|
if (!sd->channel)
|
|
goto malformed;
|
|
log_info(sd->log, "Peer connection lost");
|
|
goto close;
|
|
case WIRE_STATUS_PEER_BILLBOARD:
|
|
if (!sd->channel)
|
|
goto malformed;
|
|
if (!handle_set_billboard(sd, sd->msg_in))
|
|
goto malformed;
|
|
goto next;
|
|
case WIRE_STATUS_VERSION:
|
|
if (!handle_version(sd, sd->msg_in))
|
|
goto close;
|
|
goto next;
|
|
}
|
|
|
|
if (sd->channel) {
|
|
switch ((enum peer_status_wire)type) {
|
|
case WIRE_STATUS_PEER_ERROR:
|
|
/* We expect 1 fd after this */
|
|
if (!sd->fds_in) {
|
|
/* Don't free msg_in: we go around again. */
|
|
tal_steal(sd, sd->msg_in);
|
|
plan = sd_collect_fds(conn, sd, 1);
|
|
goto out;
|
|
}
|
|
if (!handle_peer_error(sd, sd->msg_in, sd->fds_in))
|
|
goto malformed;
|
|
goto close;
|
|
}
|
|
}
|
|
|
|
/* Might free sd (if returns negative); save/restore sd->conn */
|
|
sd->conn = NULL;
|
|
tal_add_destructor2(sd, mark_freed, &freed);
|
|
|
|
i = sd->msgcb(sd, sd->msg_in, sd->fds_in);
|
|
if (freed)
|
|
goto close;
|
|
tal_del_destructor2(sd, mark_freed, &freed);
|
|
|
|
sd->conn = conn;
|
|
|
|
if (i != 0) {
|
|
/* Don't ask for fds twice! */
|
|
assert(!sd->fds_in);
|
|
/* Don't free msg_in: we go around again. */
|
|
tal_steal(sd, sd->msg_in);
|
|
plan = sd_collect_fds(conn, sd, i);
|
|
goto out;
|
|
}
|
|
|
|
next:
|
|
sd->msg_in = NULL;
|
|
sd->fds_in = tal_free(sd->fds_in);
|
|
|
|
plan = io_read_wire(conn, sd, &sd->msg_in, sd_msg_read, sd);
|
|
goto out;
|
|
|
|
malformed:
|
|
subdaemon_malformed_msg(sd, sd->msg_in);
|
|
close:
|
|
plan = io_close(conn);
|
|
out:
|
|
db_commit_transaction(db);
|
|
return plan;
|
|
}
|
|
|
|
|
|
static void destroy_subd(struct subd *sd)
|
|
{
|
|
int status;
|
|
bool fail_if_subd_fails;
|
|
|
|
fail_if_subd_fails = IFDEV(sd->ld->dev_subdaemon_fail, false);
|
|
list_del_from(&sd->ld->subds, &sd->list);
|
|
|
|
/* lightningd may have already done waitpid() */
|
|
if (sd->wstatus != NULL) {
|
|
status = *sd->wstatus;
|
|
} else {
|
|
switch (waitpid(sd->pid, &status, WNOHANG)) {
|
|
case 0:
|
|
/* If it's an essential daemon, don't kill: we want the
|
|
* exit status */
|
|
if (!sd->must_not_exit) {
|
|
log_debug(sd->log,
|
|
"Status closed, but not exited. Killing");
|
|
kill(sd->pid, SIGKILL);
|
|
}
|
|
waitpid(sd->pid, &status, 0);
|
|
fail_if_subd_fails = false;
|
|
break;
|
|
case -1:
|
|
log_broken(sd->log, "Status closed, but waitpid %i says %s",
|
|
sd->pid, strerror(errno));
|
|
status = -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (fail_if_subd_fails && WIFSIGNALED(status)) {
|
|
log_broken(sd->log, "Subdaemon %s killed with signal %i",
|
|
sd->name, WTERMSIG(status));
|
|
exit(1);
|
|
}
|
|
|
|
/* In case we're freed manually, such as channel_fail_permanent */
|
|
if (sd->conn)
|
|
sd->conn = tal_free(sd->conn);
|
|
|
|
/* Peer still attached? */
|
|
if (sd->channel) {
|
|
/* Don't loop back when we fail it. */
|
|
void *channel = sd->channel;
|
|
struct db *db = sd->ld->wallet->db;
|
|
bool outer_transaction;
|
|
|
|
/* Clear any transient messages in billboard */
|
|
sd->billboardcb(channel, false, NULL);
|
|
sd->channel = NULL;
|
|
|
|
/* We can be freed both inside msg handling, or spontaneously. */
|
|
outer_transaction = db_in_transaction(db);
|
|
if (!outer_transaction)
|
|
db_begin_transaction(db);
|
|
if (sd->errcb)
|
|
sd->errcb(channel, NULL, NULL,
|
|
tal_fmt(sd, "Owning subdaemon %s died (%i)",
|
|
sd->name, status),
|
|
false, NULL);
|
|
if (!outer_transaction)
|
|
db_commit_transaction(db);
|
|
}
|
|
|
|
if (sd->must_not_exit) {
|
|
if (WIFEXITED(status))
|
|
errx(1, "%s failed (exit status %i), exiting.",
|
|
sd->name, WEXITSTATUS(status));
|
|
errx(1, "%s failed (signal %u), exiting.",
|
|
sd->name, WTERMSIG(status));
|
|
}
|
|
}
|
|
|
|
static struct io_plan *msg_send_next(struct io_conn *conn, struct subd *sd)
|
|
{
|
|
const u8 *msg;
|
|
int fd;
|
|
|
|
/* Don't send if we haven't read version! */
|
|
if (!sd->rcvd_version)
|
|
return msg_queue_wait(conn, sd->outq, msg_send_next, sd);
|
|
|
|
/* Nothing to do? Wait for msg_enqueue. */
|
|
msg = msg_dequeue(sd->outq);
|
|
if (!msg)
|
|
return msg_queue_wait(conn, sd->outq, msg_send_next, sd);
|
|
|
|
fd = msg_extract_fd(sd->outq, msg);
|
|
if (fd >= 0) {
|
|
tal_free(msg);
|
|
return io_send_fd(conn, fd, true, msg_send_next, sd);
|
|
}
|
|
return io_write_wire(conn, take(msg), msg_send_next, sd);
|
|
}
|
|
|
|
static struct io_plan *msg_setup(struct io_conn *conn, struct subd *sd)
|
|
{
|
|
return io_duplex(conn,
|
|
io_read_wire(conn, sd, &sd->msg_in, sd_msg_read, sd),
|
|
msg_send_next(conn, sd));
|
|
}
|
|
|
|
static struct subd *new_subd(const tal_t *ctx,
|
|
struct lightningd *ld,
|
|
const char *name,
|
|
void *channel,
|
|
const struct node_id *node_id,
|
|
struct log *base_log,
|
|
bool talks_to_peer,
|
|
const char *(*msgname)(int msgtype),
|
|
unsigned int (*msgcb)(struct subd *,
|
|
const u8 *, const int *fds),
|
|
void (*errcb)(void *channel,
|
|
struct peer_fd *peer_fd,
|
|
const struct channel_id *channel_id,
|
|
const char *desc,
|
|
bool warning,
|
|
const u8 *err_for_them),
|
|
void (*billboardcb)(void *channel,
|
|
bool perm,
|
|
const char *happenings),
|
|
va_list *ap)
|
|
{
|
|
struct subd *sd = tal(ctx, struct subd);
|
|
int msg_fd;
|
|
const char *debug_subd = NULL;
|
|
const char *shortname;
|
|
|
|
assert(name != NULL);
|
|
|
|
/* This part of the name is a bit redundant for logging */
|
|
if (strstarts(name, "lightning_"))
|
|
shortname = name + strlen("lightning_");
|
|
else
|
|
shortname = name;
|
|
|
|
if (base_log) {
|
|
sd->log = new_log(sd, ld->log_book, node_id,
|
|
"%s-%s", shortname, log_prefix(base_log));
|
|
} else {
|
|
sd->log = new_log(sd, ld->log_book, node_id, "%s", shortname);
|
|
}
|
|
|
|
#if DEVELOPER
|
|
debug_subd = ld->dev_debug_subprocess;
|
|
#endif /* DEVELOPER */
|
|
|
|
const char *path = subdaemon_path(tmpctx, ld, name);
|
|
|
|
sd->pid = subd(path, name, debug_subd,
|
|
&msg_fd,
|
|
/* We only turn on subdaemon io logging if we're going
|
|
* to print it: too stressful otherwise! */
|
|
log_print_level(sd->log) < LOG_DBG,
|
|
ap);
|
|
if (sd->pid == (pid_t)-1) {
|
|
log_unusual(ld->log, "subd %s failed: %s",
|
|
name, strerror(errno));
|
|
return tal_free(sd);
|
|
}
|
|
sd->ld = ld;
|
|
|
|
sd->name = shortname;
|
|
sd->must_not_exit = false;
|
|
sd->talks_to_peer = talks_to_peer;
|
|
sd->msgname = msgname;
|
|
assert(msgname);
|
|
sd->msgcb = msgcb;
|
|
assert(msgcb);
|
|
sd->errcb = errcb;
|
|
sd->billboardcb = billboardcb;
|
|
sd->fds_in = NULL;
|
|
sd->outq = msg_queue_new(sd, true);
|
|
sd->wstatus = NULL;
|
|
list_add(&ld->subds, &sd->list);
|
|
tal_add_destructor(sd, destroy_subd);
|
|
list_head_init(&sd->reqs);
|
|
sd->channel = channel;
|
|
sd->rcvd_version = false;
|
|
sd->node_id = tal_dup_or_null(sd, struct node_id, node_id);
|
|
|
|
/* conn actually owns daemon: we die when it does. */
|
|
sd->conn = io_new_conn(ld, msg_fd, msg_setup, sd);
|
|
tal_steal(sd->conn, sd);
|
|
|
|
log_peer_debug(sd->log, node_id, "pid %u, msgfd %i", sd->pid, msg_fd);
|
|
|
|
/* Clear any old transient message. */
|
|
if (billboardcb)
|
|
billboardcb(sd->channel, false, NULL);
|
|
return sd;
|
|
}
|
|
|
|
struct subd *new_global_subd(struct lightningd *ld,
|
|
const char *name,
|
|
const char *(*msgname)(int msgtype),
|
|
unsigned int (*msgcb)(struct subd *, const u8 *,
|
|
const int *fds),
|
|
...)
|
|
{
|
|
va_list ap;
|
|
struct subd *sd;
|
|
|
|
va_start(ap, msgcb);
|
|
sd = new_subd(ld, ld, name, NULL, NULL, NULL, false,
|
|
msgname, msgcb, NULL, NULL, &ap);
|
|
va_end(ap);
|
|
|
|
sd->must_not_exit = true;
|
|
return sd;
|
|
}
|
|
|
|
struct subd *new_channel_subd_(const tal_t *ctx,
|
|
struct lightningd *ld,
|
|
const char *name,
|
|
void *channel,
|
|
const struct node_id *node_id,
|
|
struct log *base_log,
|
|
bool talks_to_peer,
|
|
const char *(*msgname)(int msgtype),
|
|
unsigned int (*msgcb)(struct subd *, const u8 *,
|
|
const int *fds),
|
|
void (*errcb)(void *channel,
|
|
struct peer_fd *peer_fd,
|
|
const struct channel_id *channel_id,
|
|
const char *desc,
|
|
bool warning,
|
|
const u8 *err_for_them),
|
|
void (*billboardcb)(void *channel, bool perm,
|
|
const char *happenings),
|
|
...)
|
|
{
|
|
va_list ap;
|
|
struct subd *sd;
|
|
|
|
va_start(ap, billboardcb);
|
|
sd = new_subd(ctx, ld, name, channel, node_id, base_log,
|
|
talks_to_peer, msgname, msgcb, errcb, billboardcb, &ap);
|
|
va_end(ap);
|
|
return sd;
|
|
}
|
|
|
|
void subd_send_msg(struct subd *sd, const u8 *msg_out)
|
|
{
|
|
u16 type = fromwire_peektype(msg_out);
|
|
/* FIXME: We should use unique upper bits for each daemon, then
|
|
* have generate-wire.py add them, just assert here. */
|
|
assert(!strstarts(sd->msgname(type), "INVALID"));
|
|
msg_enqueue(sd->outq, msg_out);
|
|
}
|
|
|
|
void subd_send_fd(struct subd *sd, int fd)
|
|
{
|
|
msg_enqueue_fd(sd->outq, fd);
|
|
}
|
|
|
|
struct subd_req *subd_req_(const tal_t *ctx,
|
|
struct subd *sd,
|
|
const u8 *msg_out,
|
|
int fd_out, size_t num_fds_in,
|
|
void (*replycb)(struct subd *, const u8 *, const int *, void *),
|
|
void *replycb_data)
|
|
{
|
|
/* Grab type now in case msg_out is taken() */
|
|
int type = fromwire_peektype(msg_out);
|
|
|
|
subd_send_msg(sd, msg_out);
|
|
if (fd_out >= 0)
|
|
subd_send_fd(sd, fd_out);
|
|
|
|
return add_req(ctx, sd, type, num_fds_in, replycb, replycb_data);
|
|
}
|
|
|
|
/* SIGALRM terminates by default: we just want it to interrupt waitpid(),
|
|
* which is implied by "handling" it. */
|
|
static void discard_alarm(int sig UNNEEDED)
|
|
{
|
|
}
|
|
|
|
struct subd *subd_shutdown(struct subd *sd, unsigned int seconds)
|
|
{
|
|
struct sigaction sa, old;
|
|
|
|
log_debug(sd->log, "Shutting down");
|
|
|
|
tal_del_destructor(sd, destroy_subd);
|
|
|
|
/* This should make it exit; steal so it stays around. */
|
|
tal_steal(sd->ld, sd);
|
|
sd->conn = tal_free(sd->conn);
|
|
|
|
/* Set up alarm to wake us up if child doesn't exit. */
|
|
sa.sa_handler = discard_alarm;
|
|
sigemptyset(&sa.sa_mask);
|
|
sa.sa_flags = 0;
|
|
sigaction(SIGALRM, &sa, &old);
|
|
alarm(seconds);
|
|
|
|
if (waitpid(sd->pid, NULL, 0) > 0) {
|
|
alarm(0);
|
|
sigaction(SIGALRM, &old, NULL);
|
|
list_del_from(&sd->ld->subds, &sd->list);
|
|
return tal_free(sd);
|
|
}
|
|
|
|
sigaction(SIGALRM, &old, NULL);
|
|
/* Didn't die? This will kill it harder */
|
|
sd->must_not_exit = false;
|
|
destroy_subd(sd);
|
|
return tal_free(sd);
|
|
}
|
|
|
|
void subd_shutdown_remaining(struct lightningd *ld)
|
|
{
|
|
struct subd *subd;
|
|
|
|
/* We give them a second to finish exiting, before we kill
|
|
* them in destroy_subd() */
|
|
sleep(1);
|
|
|
|
while ((subd = list_top(&ld->subds, struct subd, list)) != NULL) {
|
|
/* Destructor removes from list */
|
|
io_close(subd->conn);
|
|
}
|
|
}
|
|
|
|
void subd_release_channel(struct subd *owner, const void *channel)
|
|
{
|
|
/* If owner is a per-peer-daemon, and not already freeing itself... */
|
|
if (owner->channel) {
|
|
assert(owner->channel == channel);
|
|
owner->channel = NULL;
|
|
tal_free(owner);
|
|
}
|
|
}
|
|
|
|
#if DEVELOPER
|
|
char *opt_subd_dev_disconnect(const char *optarg, struct lightningd *ld)
|
|
{
|
|
ld->dev_disconnect_fd = open(optarg, O_RDONLY);
|
|
if (ld->dev_disconnect_fd < 0)
|
|
return tal_fmt(ld, "Could not open --dev-disconnect=%s: %s",
|
|
optarg, strerror(errno));
|
|
return NULL;
|
|
}
|
|
|
|
/* If test specified that this disconnection should cause permanent failure */
|
|
bool dev_disconnect_permanent(struct lightningd *ld)
|
|
{
|
|
char permfail[strlen("PERMFAIL")];
|
|
int r;
|
|
|
|
if (ld->dev_disconnect_fd == -1)
|
|
return false;
|
|
|
|
r = read(ld->dev_disconnect_fd, permfail, sizeof(permfail));
|
|
if (r < 0)
|
|
fatal("Reading dev_disconnect file: %s", strerror(errno));
|
|
|
|
if (memeq(permfail, r, "permfail", strlen("permfail")))
|
|
return true;
|
|
|
|
/* Nope, restore. */
|
|
if (lseek(ld->dev_disconnect_fd, -r, SEEK_CUR) < 0) {
|
|
fatal("lseek failure");
|
|
}
|
|
return false;
|
|
}
|
|
#endif /* DEVELOPER */
|
|
|
|
/* Ugly helper to get full pathname of the current binary. */
|
|
const char *find_my_abspath(const tal_t *ctx, const char *argv0)
|
|
{
|
|
char *me;
|
|
|
|
/* A command containing / is run relative to the current directory,
|
|
* not searched through the path. The shell sets argv0 to the command
|
|
* run, though something else could set it to a arbitrary value and
|
|
* this logic would be wrong. */
|
|
if (strchr(argv0, PATH_SEP)) {
|
|
const char *path;
|
|
/* Absolute paths are easy. */
|
|
if (strstarts(argv0, PATH_SEP_STR))
|
|
path = argv0;
|
|
/* It contains a '/', it's relative to current dir. */
|
|
else
|
|
path = path_join(tmpctx, path_cwd(tmpctx), argv0);
|
|
|
|
me = path_canon(ctx, path);
|
|
if (!me || access(me, X_OK) != 0)
|
|
errx(1, "I cannot find myself at %s based on my name %s",
|
|
path, argv0);
|
|
} else {
|
|
/* No /, search path */
|
|
char **pathdirs;
|
|
const char *pathenv = getenv("PATH");
|
|
size_t i;
|
|
|
|
/* This replicates the standard shell path search algorithm */
|
|
if (!pathenv)
|
|
errx(1, "Cannot find myself: no $PATH set");
|
|
|
|
pathdirs = tal_strsplit(tmpctx, pathenv, ":", STR_NO_EMPTY);
|
|
me = NULL;
|
|
for (i = 0; pathdirs[i]; i++) {
|
|
/* This returns NULL if it doesn't exist. */
|
|
me = path_canon(ctx,
|
|
path_join(tmpctx, pathdirs[i], argv0));
|
|
if (me && access(me, X_OK) == 0)
|
|
break;
|
|
/* Nope, try again. */
|
|
me = tal_free(me);
|
|
}
|
|
if (!me)
|
|
errx(1, "Cannot find %s in $PATH", argv0);
|
|
}
|
|
|
|
return me;
|
|
}
|