Ensure all HTLCs for a claimed payment are claimed on startup

While the HTLC-claim process happens across all MPP parts under one
lock, this doesn't imply that they are claimed fully atomically on
disk. Ultimately, an application can crash after persisting one
`ChannelMonitorUpdate` out of multiple monitor updates needed for
the full claim.

Previously, this would leave us in a very bad state - because of
the all-channels-available check in `claim_funds` we'd refuse to
claim the payment again on restart (even though the
`PaymentReceived` event will be passed to the user again), and we'd
end up having partially claimed the payment!

The fix for the consistency part of this issue is pretty
straightforward - just check for this condition on startup and
complete the claim across all channels/`ChannelMonitor`s if we
detect it.

This still leaves us in a confused state from the perspective of
the user, however - we've actually claimed a payment but when they
call `claim_funds` we return `false` indicating it could not be
claimed.
This commit is contained in:
Matt Corallo 2022-04-18 15:42:11 +00:00
parent bd1e20d49e
commit 28c70ac506
5 changed files with 252 additions and 5 deletions

View file

@ -1085,7 +1085,8 @@ impl<Signer: Sign> ChannelMonitor<Signer> {
self.inner.lock().unwrap().provide_latest_holder_commitment_tx(holder_commitment_tx, htlc_outputs).map_err(|_| ()) self.inner.lock().unwrap().provide_latest_holder_commitment_tx(holder_commitment_tx, htlc_outputs).map_err(|_| ())
} }
#[cfg(test)] /// This is used to provide payment preimage(s) out-of-band during startup without updating the
/// off-chain state with a new commitment transaction.
pub(crate) fn provide_payment_preimage<B: Deref, F: Deref, L: Deref>( pub(crate) fn provide_payment_preimage<B: Deref, F: Deref, L: Deref>(
&self, &self,
payment_hash: &PaymentHash, payment_hash: &PaymentHash,
@ -1631,6 +1632,10 @@ impl<Signer: Sign> ChannelMonitor<Signer> {
res res
} }
pub(crate) fn get_stored_preimages(&self) -> HashMap<PaymentHash, PaymentPreimage> {
self.inner.lock().unwrap().payment_preimages.clone()
}
} }
/// Compares a broadcasted commitment transaction's HTLCs with those in the latest state, /// Compares a broadcasted commitment transaction's HTLCs with those in the latest state,

View file

@ -1703,6 +1703,28 @@ impl<Signer: Sign> Channel<Signer> {
make_funding_redeemscript(&self.get_holder_pubkeys().funding_pubkey, self.counterparty_funding_pubkey()) make_funding_redeemscript(&self.get_holder_pubkeys().funding_pubkey, self.counterparty_funding_pubkey())
} }
/// Claims an HTLC while we're disconnected from a peer, dropping the ChannelMonitorUpdate
/// entirely.
///
/// The ChannelMonitor for this channel MUST be updated out-of-band with the preimage provided
/// (i.e. without calling [`crate::chain::Watch::update_channel`]).
///
/// The HTLC claim will end up in the holding cell (because the caller must ensure the peer is
/// disconnected).
pub fn claim_htlc_while_disconnected_dropping_mon_update<L: Deref>
(&mut self, htlc_id_arg: u64, payment_preimage_arg: PaymentPreimage, logger: &L)
where L::Target: Logger {
// Assert that we'll add the HTLC claim to the holding cell in `get_update_fulfill_htlc`
// (see equivalent if condition there).
assert!(self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32) != 0);
let mon_update_id = self.latest_monitor_update_id; // Forget the ChannelMonitor update
let fulfill_resp = self.get_update_fulfill_htlc(htlc_id_arg, payment_preimage_arg, logger);
self.latest_monitor_update_id = mon_update_id;
if let UpdateFulfillFetch::NewClaim { msg, .. } = fulfill_resp {
assert!(msg.is_none()); // The HTLC must have ended up in the holding cell.
}
}
fn get_update_fulfill_htlc<L: Deref>(&mut self, htlc_id_arg: u64, payment_preimage_arg: PaymentPreimage, logger: &L) -> UpdateFulfillFetch where L::Target: Logger { fn get_update_fulfill_htlc<L: Deref>(&mut self, htlc_id_arg: u64, payment_preimage_arg: PaymentPreimage, logger: &L) -> UpdateFulfillFetch where L::Target: Logger {
// Either ChannelFunded got set (which means it won't be unset) or there is no way any // Either ChannelFunded got set (which means it won't be unset) or there is no way any
// caller thought we could have something claimed (cause we wouldn't have accepted in an // caller thought we could have something claimed (cause we wouldn't have accepted in an
@ -1765,6 +1787,10 @@ impl<Signer: Sign> Channel<Signer> {
}; };
if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32)) != 0 { if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32)) != 0 {
// Note that this condition is the same as the assertion in
// `claim_htlc_while_disconnected_dropping_mon_update` and must match exactly -
// `claim_htlc_while_disconnected_dropping_mon_update` would not work correctly if we
// do not not get into this branch.
for pending_update in self.holding_cell_htlc_updates.iter() { for pending_update in self.holding_cell_htlc_updates.iter() {
match pending_update { match pending_update {
&HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => { &HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => {

View file

@ -6698,7 +6698,7 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
// payments which are still in-flight via their on-chain state. // payments which are still in-flight via their on-chain state.
// We only rebuild the pending payments map if we were most recently serialized by // We only rebuild the pending payments map if we were most recently serialized by
// 0.0.102+ // 0.0.102+
for (_, monitor) in args.channel_monitors { for (_, monitor) in args.channel_monitors.iter() {
if by_id.get(&monitor.get_funding_txo().0.to_channel_id()).is_none() { if by_id.get(&monitor.get_funding_txo().0.to_channel_id()).is_none() {
for (htlc_source, htlc) in monitor.get_pending_outbound_htlcs() { for (htlc_source, htlc) in monitor.get_pending_outbound_htlcs() {
if let HTLCSource::OutboundRoute { payment_id, session_priv, path, payment_secret, .. } = htlc_source { if let HTLCSource::OutboundRoute { payment_id, session_priv, path, payment_secret, .. } = htlc_source {
@ -6824,6 +6824,38 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
} }
} }
for (_, monitor) in args.channel_monitors.iter() {
for (payment_hash, payment_preimage) in monitor.get_stored_preimages() {
if let Some(claimable_htlcs) = claimable_htlcs.remove(&payment_hash) {
log_info!(args.logger, "Re-claimaing HTLCs with payment hash {} due to partial-claim.", log_bytes!(payment_hash.0));
for claimable_htlc in claimable_htlcs.1 {
// Add a holding-cell claim of the payment to the Channel, which should be
// applied ~immediately on peer reconnection. Because it won't generate a
// new commitment transaction we can just provide the payment preimage to
// the corresponding ChannelMonitor and nothing else.
//
// We do so directly instead of via the normal ChannelMonitor update
// procedure as the ChainMonitor hasn't yet been initialized, implying
// we're not allowed to call it directly yet. Further, we do the update
// without incrementing the ChannelMonitor update ID as there isn't any
// reason to.
// If we were to generate a new ChannelMonitor update ID here and then
// crash before the user finishes block connect we'd end up force-closing
// this channel as well. On the flip side, there's no harm in restarting
// without the new monitor persisted - we'll end up right back here on
// restart.
let previous_channel_id = claimable_htlc.prev_hop.outpoint.to_channel_id();
if let Some(channel) = by_id.get_mut(&previous_channel_id) {
channel.claim_htlc_while_disconnected_dropping_mon_update(claimable_htlc.prev_hop.htlc_id, payment_preimage, &args.logger);
}
if let Some(previous_hop_monitor) = args.channel_monitors.get(&claimable_htlc.prev_hop.outpoint) {
previous_hop_monitor.provide_payment_preimage(&payment_hash, &payment_preimage, &args.tx_broadcaster, &args.fee_estimator, &args.logger);
}
}
}
}
}
let channel_manager = ChannelManager { let channel_manager = ChannelManager {
genesis_hash, genesis_hash,
fee_estimator: args.fee_estimator, fee_estimator: args.fee_estimator,

View file

@ -1476,7 +1476,7 @@ pub fn send_along_route_with_secret<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>,
payment_id payment_id
} }
pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path: &[&Node<'a, 'b, 'c>], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: Option<PaymentSecret>, ev: MessageSendEvent, payment_received_expected: bool, expected_preimage: Option<PaymentPreimage>) { pub fn do_pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path: &[&Node<'a, 'b, 'c>], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: Option<PaymentSecret>, ev: MessageSendEvent, payment_received_expected: bool, clear_recipient_events: bool, expected_preimage: Option<PaymentPreimage>) {
let mut payment_event = SendEvent::from_event(ev); let mut payment_event = SendEvent::from_event(ev);
let mut prev_node = origin_node; let mut prev_node = origin_node;
@ -1489,7 +1489,7 @@ pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path
expect_pending_htlcs_forwardable!(node); expect_pending_htlcs_forwardable!(node);
if idx == expected_path.len() - 1 { if idx == expected_path.len() - 1 && clear_recipient_events {
let events_2 = node.node.get_and_clear_pending_events(); let events_2 = node.node.get_and_clear_pending_events();
if payment_received_expected { if payment_received_expected {
assert_eq!(events_2.len(), 1); assert_eq!(events_2.len(), 1);
@ -1513,7 +1513,7 @@ pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path
} else { } else {
assert!(events_2.is_empty()); assert!(events_2.is_empty());
} }
} else { } else if idx != expected_path.len() - 1 {
let mut events_2 = node.node.get_and_clear_pending_msg_events(); let mut events_2 = node.node.get_and_clear_pending_msg_events();
assert_eq!(events_2.len(), 1); assert_eq!(events_2.len(), 1);
check_added_monitors!(node, 1); check_added_monitors!(node, 1);
@ -1525,6 +1525,10 @@ pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path
} }
} }
pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path: &[&Node<'a, 'b, 'c>], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: Option<PaymentSecret>, ev: MessageSendEvent, payment_received_expected: bool, expected_preimage: Option<PaymentPreimage>) {
do_pass_along_path(origin_node, expected_path, recv_value, our_payment_hash, our_payment_secret, ev, payment_received_expected, true, expected_preimage);
}
pub fn pass_along_route<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_route: &[&[&Node<'a, 'b, 'c>]], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: PaymentSecret) { pub fn pass_along_route<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_route: &[&[&Node<'a, 'b, 'c>]], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: PaymentSecret) {
let mut events = origin_node.node.get_and_clear_pending_msg_events(); let mut events = origin_node.node.get_and_clear_pending_msg_events();
assert_eq!(events.len(), expected_route.len()); assert_eq!(events.len(), expected_route.len());

View file

@ -9843,6 +9843,186 @@ fn test_keysend_payments_to_private_node() {
claim_payment(&nodes[0], &path, test_preimage); claim_payment(&nodes[0], &path, test_preimage);
} }
fn do_test_partial_claim_before_restart(persist_both_monitors: bool) {
// Test what happens if a node receives an MPP payment, claims it, but crashes before
// persisting the ChannelManager. If `persist_both_monitors` is false, also crash after only
// updating one of the two channels' ChannelMonitors. As a result, on startup, we'll (a) still
// have the PaymentReceived event, (b) have one (or two) channel(s) that goes on chain with the
// HTLC preimage in them, and (c) optionally have one channel that is live off-chain but does
// not have the preimage tied to the still-pending HTLC.
//
// To get to the correct state, on startup we should propagate the preimage to the
// still-off-chain channel, claiming the HTLC as soon as the peer connects, with the monitor
// receiving the preimage without a state update.
let chanmon_cfgs = create_chanmon_cfgs(4);
let node_cfgs = create_node_cfgs(4, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]);
let persister: test_utils::TestPersister;
let new_chain_monitor: test_utils::TestChainMonitor;
let nodes_3_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs);
create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 100_000, 0, InitFeatures::known(), InitFeatures::known());
create_announced_chan_between_nodes_with_value(&nodes, 0, 2, 100_000, 0, InitFeatures::known(), InitFeatures::known());
let chan_id_persisted = create_announced_chan_between_nodes_with_value(&nodes, 1, 3, 100_000, 0, InitFeatures::known(), InitFeatures::known()).2;
let chan_id_not_persisted = create_announced_chan_between_nodes_with_value(&nodes, 2, 3, 100_000, 0, InitFeatures::known(), InitFeatures::known()).2;
// Create an MPP route for 15k sats, more than the default htlc-max of 10%
let (mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[3], 15_000_000);
assert_eq!(route.paths.len(), 2);
route.paths.sort_by(|path_a, _| {
// Sort the path so that the path through nodes[1] comes first
if path_a[0].pubkey == nodes[1].node.get_our_node_id() {
core::cmp::Ordering::Less } else { core::cmp::Ordering::Greater }
});
nodes[0].node.send_payment(&route, payment_hash, &Some(payment_secret)).unwrap();
check_added_monitors!(nodes[0], 2);
// Send the payment through to nodes[3] *without* clearing the PaymentReceived event
let mut send_events = nodes[0].node.get_and_clear_pending_msg_events();
assert_eq!(send_events.len(), 2);
do_pass_along_path(&nodes[0], &[&nodes[1], &nodes[3]], 15_000_000, payment_hash, Some(payment_secret), send_events[0].clone(), true, false, None);
do_pass_along_path(&nodes[0], &[&nodes[2], &nodes[3]], 15_000_000, payment_hash, Some(payment_secret), send_events[1].clone(), true, false, None);
// Now that we have an MPP payment pending, get the latest encoded copies of nodes[3]'s
// monitors and ChannelManager, for use later, if we don't want to persist both monitors.
let mut original_monitor = test_utils::TestVecWriter(Vec::new());
if !persist_both_monitors {
for outpoint in nodes[3].chain_monitor.chain_monitor.list_monitors() {
if outpoint.to_channel_id() == chan_id_not_persisted {
assert!(original_monitor.0.is_empty());
nodes[3].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap().write(&mut original_monitor).unwrap();
}
}
}
let mut original_manager = test_utils::TestVecWriter(Vec::new());
nodes[3].node.write(&mut original_manager).unwrap();
expect_payment_received!(nodes[3], payment_hash, payment_secret, 15_000_000);
nodes[3].node.claim_funds(payment_preimage);
check_added_monitors!(nodes[3], 2);
// Now fetch one of the two updated ChannelMonitors from nodes[3], and restart pretending we
// crashed in between the two persistence calls - using one old ChannelMonitor and one new one,
// with the old ChannelManager.
let mut updated_monitor = test_utils::TestVecWriter(Vec::new());
for outpoint in nodes[3].chain_monitor.chain_monitor.list_monitors() {
if outpoint.to_channel_id() == chan_id_persisted {
assert!(updated_monitor.0.is_empty());
nodes[3].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap().write(&mut updated_monitor).unwrap();
}
}
// If `persist_both_monitors` is set, get the second monitor here as well
if persist_both_monitors {
for outpoint in nodes[3].chain_monitor.chain_monitor.list_monitors() {
if outpoint.to_channel_id() == chan_id_not_persisted {
assert!(original_monitor.0.is_empty());
nodes[3].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap().write(&mut original_monitor).unwrap();
}
}
}
// Now restart nodes[3].
persister = test_utils::TestPersister::new();
let keys_manager = &chanmon_cfgs[3].keys_manager;
new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[3].chain_source), nodes[3].tx_broadcaster.clone(), nodes[3].logger, node_cfgs[3].fee_estimator, &persister, keys_manager);
nodes[3].chain_monitor = &new_chain_monitor;
let mut monitors = Vec::new();
for mut monitor_data in [original_monitor, updated_monitor].iter() {
let (_, mut deserialized_monitor) = <(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(&mut &monitor_data.0[..], keys_manager).unwrap();
monitors.push(deserialized_monitor);
}
let config = UserConfig::default();
nodes_3_deserialized = {
let mut channel_monitors = HashMap::new();
for monitor in monitors.iter_mut() {
channel_monitors.insert(monitor.get_funding_txo().0, monitor);
}
<(BlockHash, ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut &original_manager.0[..], ChannelManagerReadArgs {
default_config: config,
keys_manager,
fee_estimator: node_cfgs[3].fee_estimator,
chain_monitor: nodes[3].chain_monitor,
tx_broadcaster: nodes[3].tx_broadcaster.clone(),
logger: nodes[3].logger,
channel_monitors,
}).unwrap().1
};
nodes[3].node = &nodes_3_deserialized;
for monitor in monitors {
// On startup the preimage should have been copied into the non-persisted monitor:
assert!(monitor.get_stored_preimages().contains_key(&payment_hash));
nodes[3].chain_monitor.watch_channel(monitor.get_funding_txo().0.clone(), monitor).unwrap();
}
check_added_monitors!(nodes[3], 2);
nodes[1].node.peer_disconnected(&nodes[3].node.get_our_node_id(), false);
nodes[2].node.peer_disconnected(&nodes[3].node.get_our_node_id(), false);
// During deserialization, we should have closed one channel and broadcast its latest
// commitment transaction. We should also still have the original PaymentReceived event we
// never finished processing.
let events = nodes[3].node.get_and_clear_pending_events();
assert_eq!(events.len(), if persist_both_monitors { 3 } else { 2 });
if let Event::PaymentReceived { amt: 15_000_000, .. } = events[0] { } else { panic!(); }
if let Event::ChannelClosed { reason: ClosureReason::OutdatedChannelManager, .. } = events[1] { } else { panic!(); }
if persist_both_monitors {
if let Event::ChannelClosed { reason: ClosureReason::OutdatedChannelManager, .. } = events[2] { } else { panic!(); }
}
assert_eq!(nodes[3].node.list_channels().len(), if persist_both_monitors { 0 } else { 1 });
if !persist_both_monitors {
// If one of the two channels is still live, reveal the payment preimage over it.
nodes[3].node.peer_connected(&nodes[2].node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty(), remote_network_address: None });
let reestablish_1 = get_chan_reestablish_msgs!(nodes[3], nodes[2]);
nodes[2].node.peer_connected(&nodes[3].node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty(), remote_network_address: None });
let reestablish_2 = get_chan_reestablish_msgs!(nodes[2], nodes[3]);
nodes[2].node.handle_channel_reestablish(&nodes[3].node.get_our_node_id(), &reestablish_1[0]);
get_event_msg!(nodes[2], MessageSendEvent::SendChannelUpdate, nodes[3].node.get_our_node_id());
assert!(nodes[2].node.get_and_clear_pending_msg_events().is_empty());
nodes[3].node.handle_channel_reestablish(&nodes[2].node.get_our_node_id(), &reestablish_2[0]);
// Once we call `get_and_clear_pending_msg_events` the holding cell is cleared and the HTLC
// claim should fly.
let ds_msgs = nodes[3].node.get_and_clear_pending_msg_events();
check_added_monitors!(nodes[3], 1);
assert_eq!(ds_msgs.len(), 2);
if let MessageSendEvent::SendChannelUpdate { .. } = ds_msgs[1] {} else { panic!(); }
let cs_updates = match ds_msgs[0] {
MessageSendEvent::UpdateHTLCs { ref updates, .. } => {
nodes[2].node.handle_update_fulfill_htlc(&nodes[3].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
check_added_monitors!(nodes[2], 1);
let cs_updates = get_htlc_update_msgs!(nodes[2], nodes[0].node.get_our_node_id());
expect_payment_forwarded!(nodes[2], nodes[0], nodes[3], Some(1000), false, false);
commitment_signed_dance!(nodes[2], nodes[3], updates.commitment_signed, false, true);
cs_updates
}
_ => panic!(),
};
nodes[0].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &cs_updates.update_fulfill_htlcs[0]);
commitment_signed_dance!(nodes[0], nodes[2], cs_updates.commitment_signed, false, true);
expect_payment_sent!(nodes[0], payment_preimage);
}
}
#[test]
fn test_partial_claim_before_restart() {
do_test_partial_claim_before_restart(false);
do_test_partial_claim_before_restart(true);
}
/// The possible events which may trigger a `max_dust_htlc_exposure` breach /// The possible events which may trigger a `max_dust_htlc_exposure` breach
#[derive(Clone, Copy, PartialEq)] #[derive(Clone, Copy, PartialEq)]
enum ExposureEvent { enum ExposureEvent {