mirror of
https://github.com/ElementsProject/lightning.git
synced 2024-11-19 09:54:16 +01:00
peer_fail: differentiate transient and permanent failures.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
parent
66d55334b0
commit
ffa5e1c52c
@ -38,8 +38,7 @@ static void peer_bad_message(struct subd *gossip, const u8 *msg)
|
||||
log_debug(gossip->log, "Peer %s gave bad msg %s",
|
||||
type_to_string(msg, struct pubkey, &peer->id),
|
||||
tal_hex(msg, msg));
|
||||
peer_fail(peer, "Bad message %s during gossip phase",
|
||||
gossip_wire_type_name(fromwire_peektype(msg)));
|
||||
peer_fail_permanent(peer, msg);
|
||||
}
|
||||
|
||||
static void peer_failed(struct subd *gossip, const u8 *msg)
|
||||
@ -57,10 +56,7 @@ static void peer_failed(struct subd *gossip, const u8 *msg)
|
||||
if (!peer)
|
||||
fatal("Gossip gave bad peerid %"PRIu64, unique_id);
|
||||
|
||||
log_unusual(gossip->log, "Peer %s failed: %.*s",
|
||||
type_to_string(msg, struct pubkey, &peer->id),
|
||||
(int)tal_len(err), (const char *)err);
|
||||
peer_fail(peer, "Error during gossip phase");
|
||||
peer_fail_permanent(peer, msg);
|
||||
}
|
||||
|
||||
static void peer_nongossip(struct subd *gossip, const u8 *msg,
|
||||
|
@ -84,12 +84,58 @@ static void peer_reconnect(struct peer *peer)
|
||||
try_reconnect, peer);
|
||||
}
|
||||
|
||||
void peer_fail(struct peer *peer, const char *fmt, ...)
|
||||
static void drop_to_chain(struct peer *peer)
|
||||
{
|
||||
/* FIXME: Implement. */
|
||||
}
|
||||
|
||||
void peer_fail_permanent(struct peer *peer, const u8 *msg)
|
||||
{
|
||||
/* BOLT #1:
|
||||
*
|
||||
* The channel is referred to by `channel_id` unless `channel_id` is
|
||||
* zero (ie. all bytes zero), in which case it refers to all
|
||||
* channels. */
|
||||
static const struct channel_id all_channels;
|
||||
|
||||
log_unusual(peer->log, "Peer permanent failure in %s: %.*s",
|
||||
peer_state_name(peer->state),
|
||||
(int)tal_len(msg), (char *)msg);
|
||||
peer->error = towire_error(peer, &all_channels, msg);
|
||||
|
||||
/* In case we reconnected in the meantime. */
|
||||
if (peer->fd != -1) {
|
||||
/* FIXME: We should retransmit error if this happens. */
|
||||
close(peer->fd);
|
||||
}
|
||||
|
||||
if (peer_persists(peer))
|
||||
drop_to_chain(peer);
|
||||
else
|
||||
tal_free(peer);
|
||||
return;
|
||||
}
|
||||
|
||||
void peer_internal_error(struct peer *peer, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
log_info(peer->log, "Peer failure in %s: ",
|
||||
log_broken(peer->log, "Peer internal error %s: ",
|
||||
peer_state_name(peer->state));
|
||||
logv_add(peer->log, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
peer_fail_permanent(peer,
|
||||
take((u8 *)tal_strdup(peer, "Internal error")));
|
||||
}
|
||||
|
||||
void peer_fail_transient(struct peer *peer, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
log_info(peer->log, "Peer transient failure in %s: ",
|
||||
peer_state_name(peer->state));
|
||||
logv_add(peer->log, fmt, ap);
|
||||
va_end(ap);
|
||||
@ -133,6 +179,15 @@ static bool peer_start_channeld_hsmfd(struct subd *hsm, const u8 *resp,
|
||||
const int *fds,
|
||||
struct peer *peer);
|
||||
|
||||
/* Send (encrypted) error message, then close. */
|
||||
static struct io_plan *send_error(struct io_conn *conn, struct peer *peer)
|
||||
{
|
||||
struct peer_crypto_state *pcs = tal(conn, struct peer_crypto_state);
|
||||
init_peer_crypto_state(peer, pcs);
|
||||
pcs->cs = *peer->cs;
|
||||
return peer_write_message(conn, pcs, peer->error, (void *)io_close_cb);
|
||||
}
|
||||
|
||||
/* Returns true if we consider this a reconnection. */
|
||||
static bool peer_reconnected(struct lightningd *ld,
|
||||
const struct pubkey *id,
|
||||
@ -150,6 +205,16 @@ static bool peer_reconnected(struct lightningd *ld,
|
||||
tal_free(peer->cs);
|
||||
peer->cs = tal_dup(peer, struct crypto_state, cs);
|
||||
|
||||
/* BOLT #2:
|
||||
*
|
||||
* On reconnection, if a channel is in an error state, the node SHOULD
|
||||
* retransmit the error packet and ignore any other packets for that
|
||||
* channel, and the following requirements do not apply. */
|
||||
if (peer->error) {
|
||||
io_new_conn(peer, fd, send_error, peer);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* We need this for init */
|
||||
peer->reconnected = true;
|
||||
|
||||
@ -282,6 +347,7 @@ void add_peer(struct lightningd *ld, u64 unique_id,
|
||||
/* Fresh peer. */
|
||||
peer = tal(ld, struct peer);
|
||||
peer->ld = ld;
|
||||
peer->error = NULL;
|
||||
peer->unique_id = unique_id;
|
||||
peer->owner = NULL;
|
||||
peer->id = *id;
|
||||
@ -370,7 +436,10 @@ static void peer_owner_finished(struct subd *subd, int status)
|
||||
}
|
||||
|
||||
subd->peer->owner = NULL;
|
||||
peer_fail(subd->peer, "Owning subdaemon %s died (%i)",
|
||||
|
||||
/* Don't do a transient error if it's already perm failed. */
|
||||
if (!subd->peer->error)
|
||||
peer_fail_transient(subd->peer, "Owning subdaemon %s died (%i)",
|
||||
subd->name, status);
|
||||
}
|
||||
|
||||
@ -946,7 +1015,7 @@ static bool peer_start_channeld_hsmfd(struct subd *hsm, const u8 *resp,
|
||||
if (!peer->owner) {
|
||||
log_unusual(peer->log, "Could not subdaemon channel: %s",
|
||||
strerror(errno));
|
||||
peer_fail(peer, "Failed to subdaemon channel");
|
||||
peer_fail_transient(peer, "Failed to subdaemon channel");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1098,12 +1167,15 @@ static bool opening_funder_finished(struct subd *opening, const u8 *resp,
|
||||
bitcoin_txid(fc->funding_tx, fc->peer->funding_txid);
|
||||
|
||||
if (!structeq(fc->peer->funding_txid, &funding_txid)) {
|
||||
peer_fail(fc->peer, "Funding txid mismatch:"
|
||||
" satoshi %"PRIu64" change %"PRIu64" changeidx %u"
|
||||
peer_internal_error(fc->peer,
|
||||
"Funding txid mismatch:"
|
||||
" satoshi %"PRIu64" change %"PRIu64
|
||||
" changeidx %u"
|
||||
" localkey %s remotekey %s",
|
||||
fc->peer->funding_satoshi,
|
||||
fc->change, fc->change_keyindex,
|
||||
type_to_string(fc, struct pubkey, &local_fundingkey),
|
||||
type_to_string(fc, struct pubkey,
|
||||
&local_fundingkey),
|
||||
type_to_string(fc, struct pubkey,
|
||||
&channel_info->remote_fundingkey));
|
||||
return false;
|
||||
@ -1112,7 +1184,7 @@ static bool opening_funder_finished(struct subd *opening, const u8 *resp,
|
||||
/* We should have sent and received the first commitsig */
|
||||
if (!peer_save_commitsig_received(fc->peer, 0)
|
||||
|| !peer_save_commitsig_sent(fc->peer, 0)) {
|
||||
peer_fail(fc->peer, "Saving commitsig failed");
|
||||
peer_internal_error(fc->peer, "Saving commitsig failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1246,10 +1318,12 @@ void peer_fundee_open(struct peer *peer, const u8 *from_peer)
|
||||
/* Note: gossipd handles unknown packets, so we don't have to worry
|
||||
* about ignoring odd ones here. */
|
||||
if (fromwire_peektype(from_peer) != WIRE_OPEN_CHANNEL) {
|
||||
char *msg = tal_fmt(peer, "Bad message %i (%s) before opening",
|
||||
fromwire_peektype(from_peer),
|
||||
wire_type_name(fromwire_peektype(from_peer)));
|
||||
log_unusual(peer->log, "Strange message to exit gossip: %u",
|
||||
fromwire_peektype(from_peer));
|
||||
peer_fail(peer, "Bad message during gossiping: %s",
|
||||
tal_hex(peer, from_peer));
|
||||
peer_fail_permanent(peer, (u8 *)take(msg));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1260,7 +1334,7 @@ void peer_fundee_open(struct peer *peer, const u8 *from_peer)
|
||||
take(&peer->fd), &peer->gossip_client_fd,
|
||||
NULL);
|
||||
if (!peer->owner) {
|
||||
peer_fail(peer, "Failed to subdaemon opening: %s",
|
||||
peer_fail_transient(peer, "Failed to subdaemon opening: %s",
|
||||
strerror(errno));
|
||||
return;
|
||||
}
|
||||
@ -1294,7 +1368,8 @@ void peer_fundee_open(struct peer *peer, const u8 *from_peer)
|
||||
|
||||
/* Careful here! Their message could push us overlength! */
|
||||
if (tal_len(msg) >= 65536) {
|
||||
peer_fail(peer, "Unacceptably long open_channel");
|
||||
char *err = tal_strdup(peer, "Unacceptably long open_channel");
|
||||
peer_fail_permanent(peer, (u8 *)take(err));
|
||||
return;
|
||||
}
|
||||
subd_req(peer, peer->owner, take(msg), -1, 1,
|
||||
@ -1343,7 +1418,7 @@ static bool gossip_peer_released(struct subd *gossip,
|
||||
take(&fc->peer->fd),
|
||||
&fc->peer->gossip_client_fd, NULL);
|
||||
if (!opening) {
|
||||
peer_fail(fc->peer, "Failed to subdaemon opening: %s",
|
||||
peer_fail_transient(fc->peer, "Failed to subdaemon opening: %s",
|
||||
strerror(errno));
|
||||
return true;
|
||||
}
|
||||
|
@ -24,6 +24,9 @@ struct peer {
|
||||
/* ID of peer */
|
||||
struct pubkey id;
|
||||
|
||||
/* Error message (iff in error state) */
|
||||
u8 *error;
|
||||
|
||||
/* Their shachain. */
|
||||
struct shachain their_shachain;
|
||||
|
||||
@ -140,8 +143,13 @@ void peer_fundee_open(struct peer *peer, const u8 *msg);
|
||||
void add_peer(struct lightningd *ld, u64 unique_id,
|
||||
int fd, const struct pubkey *id,
|
||||
const struct crypto_state *cs);
|
||||
/* Peer has failed. */
|
||||
PRINTF_FMT(2,3) void peer_fail(struct peer *peer, const char *fmt, ...);
|
||||
|
||||
/* Peer has failed, but try reconnected. */
|
||||
PRINTF_FMT(2,3) void peer_fail_transient(struct peer *peer, const char *fmt,...);
|
||||
/* Peer has failed, give up on it. */
|
||||
void peer_fail_permanent(struct peer *peer, const u8 *msg);
|
||||
/* Permanent error, but due to internal problems, not peer. */
|
||||
void peer_internal_error(struct peer *peer, const char *fmt, ...);
|
||||
|
||||
const char *peer_state_name(enum peer_state state);
|
||||
void peer_set_condition(struct peer *peer, enum peer_state oldstate,
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <ccan/build_assert/build_assert.h>
|
||||
#include <ccan/mem/mem.h>
|
||||
#include <ccan/tal/str/str.h>
|
||||
#include <daemon/chaintopology.h>
|
||||
#include <daemon/invoice.h>
|
||||
#include <daemon/log.h>
|
||||
@ -334,7 +335,7 @@ static bool rcvd_htlc_reply(struct subd *subd, const u8 *msg, const int *fds,
|
||||
&hout->key.id,
|
||||
&failure_code,
|
||||
&failurestr)) {
|
||||
log_broken(subd->log, "Bad channel_offer_htlc_reply");
|
||||
peer_internal_error(subd->peer, "Bad channel_offer_htlc_reply");
|
||||
tal_free(hout);
|
||||
return false;
|
||||
}
|
||||
@ -345,7 +346,8 @@ static bool rcvd_htlc_reply(struct subd *subd, const u8 *msg, const int *fds,
|
||||
}
|
||||
|
||||
if (find_htlc_out(&subd->ld->htlcs_out, hout->key.peer, hout->key.id)) {
|
||||
log_broken(subd->log, "Bad offer_htlc_reply HTLC id %"PRIu64
|
||||
peer_internal_error(subd->peer,
|
||||
"Bad offer_htlc_reply HTLC id %"PRIu64
|
||||
" is a duplicate",
|
||||
hout->key.id);
|
||||
tal_free(hout);
|
||||
@ -530,7 +532,8 @@ static bool state_update_ok(struct peer *peer,
|
||||
expected = SENT_REMOVE_COMMIT;
|
||||
|
||||
if (newstate != expected) {
|
||||
log_broken(peer->log, "HTLC %s %"PRIu64" invalid update %s->%s",
|
||||
peer_internal_error(peer,
|
||||
"HTLC %s %"PRIu64" invalid update %s->%s",
|
||||
dir, htlc_id,
|
||||
htlc_state_name(oldstate),
|
||||
htlc_state_name(newstate));
|
||||
@ -582,7 +585,7 @@ static bool peer_accepted_htlc(struct peer *peer,
|
||||
|
||||
hin = find_htlc_in(&peer->ld->htlcs_in, peer, id);
|
||||
if (!hin) {
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"peer_got_revoke unknown htlc %"PRIu64, id);
|
||||
return false;
|
||||
}
|
||||
@ -595,7 +598,7 @@ static bool peer_accepted_htlc(struct peer *peer,
|
||||
sizeof(hin->onion_routing_packet));
|
||||
if (!op) {
|
||||
if (!memeqzero(&hin->shared_secret, sizeof(hin->shared_secret))){
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"bad onion in got_revoke: %s",
|
||||
tal_hexstr(peer, hin->onion_routing_packet,
|
||||
sizeof(hin->onion_routing_packet)));
|
||||
@ -665,7 +668,7 @@ static bool peer_fulfilled_our_htlc(struct peer *peer,
|
||||
|
||||
hout = find_htlc_out(&peer->ld->htlcs_out, peer, fulfilled->id);
|
||||
if (!hout) {
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"fulfilled_our_htlc unknown htlc %"PRIu64,
|
||||
fulfilled->id);
|
||||
return false;
|
||||
@ -694,7 +697,7 @@ static bool peer_failed_our_htlc(struct peer *peer,
|
||||
|
||||
hout = find_htlc_out(&peer->ld->htlcs_out, peer, failed->id);
|
||||
if (!hout) {
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"failed_our_htlc unknown htlc %"PRIu64,
|
||||
failed->id);
|
||||
return false;
|
||||
@ -763,7 +766,7 @@ static bool update_in_htlc(struct peer *peer, u64 id, enum htlc_state newstate)
|
||||
|
||||
hin = find_htlc_in(&peer->ld->htlcs_in, peer, id);
|
||||
if (!hin) {
|
||||
log_broken(peer->log, "Can't find in HTLC %"PRIu64, id);
|
||||
peer_internal_error(peer, "Can't find in HTLC %"PRIu64, id);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -782,7 +785,7 @@ static bool update_out_htlc(struct peer *peer, u64 id, enum htlc_state newstate)
|
||||
|
||||
hout = find_htlc_out(&peer->ld->htlcs_out, peer, id);
|
||||
if (!hout) {
|
||||
log_broken(peer->log, "Can't find out HTLC %"PRIu64, id);
|
||||
peer_internal_error(peer, "Can't find out HTLC %"PRIu64, id);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -825,14 +828,14 @@ int peer_sending_commitsig(struct peer *peer, const u8 *msg)
|
||||
&commitnum,
|
||||
&changed_htlcs,
|
||||
&commit_sig, &htlc_sigs)) {
|
||||
log_broken(peer->log, "bad channel_sending_commitsig %s",
|
||||
peer_internal_error(peer, "bad channel_sending_commitsig %s",
|
||||
tal_hex(peer, msg));
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < tal_count(changed_htlcs); i++) {
|
||||
if (!changed_htlc(peer, changed_htlcs + i)) {
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"channel_sending_commitsig: update failed");
|
||||
return -1;
|
||||
}
|
||||
@ -848,7 +851,7 @@ int peer_sending_commitsig(struct peer *peer, const u8 *msg)
|
||||
|
||||
if (num_local_added != 0) {
|
||||
if (maxid != peer->next_htlc_id + num_local_added - 1) {
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"channel_sending_commitsig:"
|
||||
" Added %"PRIu64", maxid now %"PRIu64
|
||||
" from %"PRIu64,
|
||||
@ -944,7 +947,7 @@ static bool peer_sending_revocation(struct peer *peer,
|
||||
bool peer_save_commitsig_received(struct peer *peer, u64 commitnum)
|
||||
{
|
||||
if (commitnum != peer->num_commits_received) {
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"channel_got_commitsig: expected commitnum %"PRIu64
|
||||
" got %"PRIu64,
|
||||
peer->num_commits_received, commitnum);
|
||||
@ -961,7 +964,7 @@ bool peer_save_commitsig_received(struct peer *peer, u64 commitnum)
|
||||
bool peer_save_commitsig_sent(struct peer *peer, u64 commitnum)
|
||||
{
|
||||
if (commitnum != peer->num_commits_sent) {
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"channel_sent_commitsig: expected commitnum %"PRIu64
|
||||
" got %"PRIu64,
|
||||
peer->num_commits_sent, commitnum);
|
||||
@ -996,7 +999,8 @@ int peer_got_commitsig(struct peer *peer, const u8 *msg)
|
||||
&fulfilled,
|
||||
&failed,
|
||||
&changed)) {
|
||||
log_broken(peer->log, "bad fromwire_channel_got_commitsig %s",
|
||||
peer_internal_error(peer,
|
||||
"bad fromwire_channel_got_commitsig %s",
|
||||
tal_hex(peer, msg));
|
||||
return -1;
|
||||
}
|
||||
@ -1026,7 +1030,7 @@ int peer_got_commitsig(struct peer *peer, const u8 *msg)
|
||||
|
||||
for (i = 0; i < tal_count(changed); i++) {
|
||||
if (!changed_htlc(peer, &changed[i])) {
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"got_commitsig: update failed");
|
||||
return -1;
|
||||
}
|
||||
@ -1068,7 +1072,7 @@ int peer_got_revoke(struct peer *peer, const u8 *msg)
|
||||
&revokenum, &per_commitment_secret,
|
||||
&next_per_commitment_point,
|
||||
&changed)) {
|
||||
log_broken(peer->log, "bad fromwire_channel_got_revoke %s",
|
||||
peer_internal_error(peer, "bad fromwire_channel_got_revoke %s",
|
||||
tal_hex(peer, msg));
|
||||
return -1;
|
||||
}
|
||||
@ -1087,7 +1091,7 @@ int peer_got_revoke(struct peer *peer, const u8 *msg)
|
||||
return -1;
|
||||
} else {
|
||||
if (!changed_htlc(peer, &changed[i])) {
|
||||
log_broken(peer->log,
|
||||
peer_internal_error(peer,
|
||||
"got_revoke: update failed");
|
||||
return -1;
|
||||
}
|
||||
@ -1095,13 +1099,13 @@ int peer_got_revoke(struct peer *peer, const u8 *msg)
|
||||
}
|
||||
|
||||
if (revokenum >= (1ULL << 48)) {
|
||||
log_broken(peer->log, "got_revoke: too many txs %"PRIu64,
|
||||
peer_internal_error(peer, "got_revoke: too many txs %"PRIu64,
|
||||
revokenum);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (revokenum != peer->num_revocations_received) {
|
||||
log_broken(peer->log, "got_revoke: expected %"PRIu64
|
||||
peer_internal_error(peer, "got_revoke: expected %"PRIu64
|
||||
" got %"PRIu64,
|
||||
peer->num_revocations_received, revokenum);
|
||||
return -1;
|
||||
@ -1115,10 +1119,12 @@ int peer_got_revoke(struct peer *peer, const u8 *msg)
|
||||
if (!shachain_add_hash(&peer->their_shachain,
|
||||
shachain_index(revokenum),
|
||||
&per_commitment_secret)) {
|
||||
peer_fail(peer, "Bad per_commitment_secret %s for %"PRIu64,
|
||||
char *err = tal_fmt(peer,
|
||||
"Bad per_commitment_secret %s for %"PRIu64,
|
||||
type_to_string(msg, struct sha256,
|
||||
&per_commitment_secret),
|
||||
revokenum);
|
||||
peer_fail_permanent(peer, take((u8 *)err));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user