From 10580f55aa64a94dac47a0a90756b3499a742e39 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 25 Oct 2021 21:51:08 +0000 Subject: [PATCH 1/7] Correct send-bounding logic in `TestRoutingMessageHandler` The `cmp::min` appeared to confused `end` for a count. --- lightning/src/util/test_utils.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lightning/src/util/test_utils.rs b/lightning/src/util/test_utils.rs index 15eaa7d46..501afe8b7 100644 --- a/lightning/src/util/test_utils.rs +++ b/lightning/src/util/test_utils.rs @@ -311,7 +311,6 @@ fn get_dummy_channel_update(short_chan_id: u64) -> msgs::ChannelUpdate { pub struct TestRoutingMessageHandler { pub chan_upds_recvd: AtomicUsize, pub chan_anns_recvd: AtomicUsize, - pub chan_anns_sent: AtomicUsize, pub request_full_sync: AtomicBool, } @@ -320,7 +319,6 @@ impl TestRoutingMessageHandler { TestRoutingMessageHandler { chan_upds_recvd: AtomicUsize::new(0), chan_anns_recvd: AtomicUsize::new(0), - chan_anns_sent: AtomicUsize::new(0), request_full_sync: AtomicBool::new(false), } } @@ -339,8 +337,8 @@ impl msgs::RoutingMessageHandler for TestRoutingMessageHandler { } fn get_next_channel_announcements(&self, starting_point: u64, batch_amount: u8) -> Vec<(msgs::ChannelAnnouncement, Option, Option)> { let mut chan_anns = Vec::new(); - const TOTAL_UPDS: u64 = 100; - let end: u64 = cmp::min(starting_point + batch_amount as u64, TOTAL_UPDS - self.chan_anns_sent.load(Ordering::Acquire) as u64); + const TOTAL_UPDS: u64 = 50; + let end: u64 = cmp::min(starting_point + batch_amount as u64, TOTAL_UPDS); for i in starting_point..end { let chan_upd_1 = get_dummy_channel_update(i); let chan_upd_2 = get_dummy_channel_update(i); @@ -349,7 +347,6 @@ impl msgs::RoutingMessageHandler for TestRoutingMessageHandler { chan_anns.push((chan_ann, Some(chan_upd_1), Some(chan_upd_2))); } - self.chan_anns_sent.fetch_add(chan_anns.len(), Ordering::AcqRel); chan_anns } From 4a58e9ad835d93b9efac106ba56b6b9052a63135 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Tue, 26 Oct 2021 02:03:02 +0000 Subject: [PATCH 2/7] Add `PeerManager::disconnect_all_peers` to avoid complexity in BP In the coming commits simply calling `timer_tick_occurred` will no longer disconnect all peers, so its helpful to have a utility method. --- lightning-background-processor/src/lib.rs | 6 ++---- lightning/src/ln/peer_handler.rs | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lightning-background-processor/src/lib.rs b/lightning-background-processor/src/lib.rs index e38a4a975..902bef6af 100644 --- a/lightning-background-processor/src/lib.rs +++ b/lightning-background-processor/src/lib.rs @@ -14,8 +14,7 @@ use lightning::chain::chainmonitor::{ChainMonitor, Persist}; use lightning::chain::keysinterface::{Sign, KeysInterface}; use lightning::ln::channelmanager::ChannelManager; use lightning::ln::msgs::{ChannelMessageHandler, RoutingMessageHandler}; -use lightning::ln::peer_handler::{PeerManager, SocketDescriptor}; -use lightning::ln::peer_handler::CustomMessageHandler; +use lightning::ln::peer_handler::{CustomMessageHandler, PeerManager, SocketDescriptor}; use lightning::routing::network_graph::NetGraphMsgHandler; use lightning::util::events::{Event, EventHandler, EventsProvider}; use lightning::util::logger::Logger; @@ -236,8 +235,7 @@ impl BackgroundProcessor { // timer, we should have disconnected all sockets by now (and they're probably // dead anyway), so disconnect them by calling `timer_tick_occurred()` twice. log_trace!(logger, "Awoke after more than double our ping timer, disconnecting peers."); - peer_manager.timer_tick_occurred(); - peer_manager.timer_tick_occurred(); + peer_manager.disconnect_all_peers(); last_ping_call = Instant::now(); } else if last_ping_call.elapsed().as_secs() > PING_TIMER { log_trace!(logger, "Calling PeerManager's timer_tick_occurred"); diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index 1815d4a35..74a6676d3 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -1431,6 +1431,23 @@ impl P } } + /// Disconnects all currently-connected peers. This is useful on platforms where there may be + /// an indication that TCP sockets have stalled even if we weren't around to time them out + /// using regular ping/pongs. + pub fn disconnect_all_peers(&self) { + let mut peers_lock = self.peers.lock().unwrap(); + let peers = &mut *peers_lock; + for (mut descriptor, peer) in peers.peers.drain() { + if let Some(node_id) = peer.their_node_id { + log_trace!(self.logger, "Disconnecting peer with id {} due to client request to disconnect all peers", node_id); + peers.node_id_to_descriptor.remove(&node_id); + self.message_handler.chan_handler.peer_disconnected(&node_id, false); + } + descriptor.disconnect_socket(); + } + debug_assert!(peers.node_id_to_descriptor.is_empty()); + } + /// Send pings to each peer and disconnect those which did not respond to the last round of /// pings. /// From e496c9beb69739386229767024fa8b3062690a31 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 25 Oct 2021 17:31:34 +0000 Subject: [PATCH 3/7] Constify the ratio in buf limits between forward and init sync msgs --- lightning/src/ln/peer_handler.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index 74a6676d3..38308bb8c 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -285,6 +285,10 @@ enum InitSyncTracker{ NodesSyncing(PublicKey), } +/// The ratio between buffer sizes at which we stop sending initial sync messages vs when we stop +/// forwarding gossip messages to peers altogether. +const FORWARD_INIT_SYNC_BUFFER_LIMIT_RATIO: usize = 2; + /// When the outbound buffer has this many messages, we'll stop reading bytes from the peer until /// we have fewer than this many messages in the outbound buffer again. /// We also use this as the target number of outbound gossip messages to keep in the write buffer, @@ -292,7 +296,7 @@ enum InitSyncTracker{ const OUTBOUND_BUFFER_LIMIT_READ_PAUSE: usize = 10; /// When the outbound buffer has this many messages, we'll simply skip relaying gossip messages to /// the peer. -const OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP: usize = 20; +const OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP: usize = OUTBOUND_BUFFER_LIMIT_READ_PAUSE * FORWARD_INIT_SYNC_BUFFER_LIMIT_RATIO; struct Peer { channel_encryptor: PeerChannelEncryptor, From 3f9a7de188a301acc222671176d152c4d3c46248 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Thu, 28 Oct 2021 17:43:58 +0000 Subject: [PATCH 4/7] Util-ify enqueueing an encoded message in peer_handler This marginally simplifies coming commits. --- lightning/src/ln/peer_handler.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index 38308bb8c..758e16b5a 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -722,14 +722,17 @@ impl P } } - /// Append a message to a peer's pending outbound/write buffer, and update the map of peers needing sends accordingly. + /// Append a message to a peer's pending outbound/write buffer + fn enqueue_encoded_message(&self, peer: &mut Peer, encoded_message: &Vec) { + peer.pending_outbound_buffer.push_back(peer.channel_encryptor.encrypt_message(&encoded_message[..])); + } + + /// Append a message to a peer's pending outbound/write buffer fn enqueue_message(&self, peer: &mut Peer, message: &M) { let mut buffer = VecWriter(Vec::with_capacity(2048)); wire::write(message, &mut buffer).unwrap(); // crash if the write failed - let encoded_message = buffer.0; - log_trace!(self.logger, "Enqueueing message {:?} to {}", message, log_pubkey!(peer.their_node_id.unwrap())); - peer.pending_outbound_buffer.push_back(peer.channel_encryptor.encrypt_message(&encoded_message[..])); + self.enqueue_encoded_message(peer, &buffer.0); } fn do_read_event(&self, peer_descriptor: &mut Descriptor, data: &[u8]) -> Result { @@ -1118,7 +1121,7 @@ impl P if except_node.is_some() && peer.their_node_id.as_ref() == except_node { continue; } - peer.pending_outbound_buffer.push_back(peer.channel_encryptor.encrypt_message(&encoded_msg[..])); + self.enqueue_encoded_message(peer, &encoded_msg); } }, wire::Message::NodeAnnouncement(ref msg) => { @@ -1140,7 +1143,7 @@ impl P if except_node.is_some() && peer.their_node_id.as_ref() == except_node { continue; } - peer.pending_outbound_buffer.push_back(peer.channel_encryptor.encrypt_message(&encoded_msg[..])); + self.enqueue_encoded_message(peer, &encoded_msg); } }, wire::Message::ChannelUpdate(ref msg) => { @@ -1159,7 +1162,7 @@ impl P if except_node.is_some() && peer.their_node_id.as_ref() == except_node { continue; } - peer.pending_outbound_buffer.push_back(peer.channel_encryptor.encrypt_message(&encoded_msg[..])); + self.enqueue_encoded_message(peer, &encoded_msg); } }, _ => debug_assert!(false, "We shouldn't attempt to forward anything but gossip messages"), From ed4a39fe1e7381d4eb8ac9e5b3a178cf047888dc Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 11 Oct 2021 04:24:08 +0000 Subject: [PATCH 5/7] Give peers which are sending us messages longer to respond to ping See comment for rationale. --- lightning/src/ln/peer_handler.rs | 120 +++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 20 deletions(-) diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index 758e16b5a..0dbad0a31 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -298,6 +298,28 @@ const OUTBOUND_BUFFER_LIMIT_READ_PAUSE: usize = 10; /// the peer. const OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP: usize = OUTBOUND_BUFFER_LIMIT_READ_PAUSE * FORWARD_INIT_SYNC_BUFFER_LIMIT_RATIO; +/// If we've sent a ping, and are still awaiting a response, we may need to churn our way through +/// the socket receive buffer before receiving the ping. +/// +/// On a fairly old Arm64 board, with Linux defaults, this can take as long as 20 seconds, not +/// including any network delays, outbound traffic, or the same for messages from other peers. +/// +/// Thus, to avoid needlessly disconnecting a peer, we allow a peer to take this many timer ticks +/// per connected peer to respond to a ping, as long as they send us at least one message during +/// each tick, ensuring we aren't actually just disconnected. +/// With a timer tick interval of five seconds, this translates to about 30 seconds per connected +/// peer. +/// +/// When we improve parallelism somewhat we should reduce this to e.g. this many timer ticks per +/// two connected peers, assuming most LDK-running systems have at least two cores. +const MAX_BUFFER_DRAIN_TICK_INTERVALS_PER_PEER: i8 = 6; + +/// This is the minimum number of messages we expect a peer to be able to handle within one timer +/// tick. Once we have sent this many messages since the last ping, we send a ping right away to +/// ensures we don't just fill up our send buffer and leave the peer with too many messages to +/// process before the next ping. +const BUFFER_DRAIN_MSGS_PER_TICK: usize = 32; + struct Peer { channel_encryptor: PeerChannelEncryptor, their_node_id: Option, @@ -313,7 +335,9 @@ struct Peer { sync_status: InitSyncTracker, - awaiting_pong: bool, + msgs_sent_since_pong: usize, + awaiting_pong_timer_tick_intervals: i8, + received_message_since_timer_tick: bool, } impl Peer { @@ -555,7 +579,9 @@ impl P sync_status: InitSyncTracker::NoSyncRequested, - awaiting_pong: false, + msgs_sent_since_pong: 0, + awaiting_pong_timer_tick_intervals: 0, + received_message_since_timer_tick: false, }).is_some() { panic!("PeerManager driver duplicated descriptors!"); }; @@ -593,7 +619,9 @@ impl P sync_status: InitSyncTracker::NoSyncRequested, - awaiting_pong: false, + msgs_sent_since_pong: 0, + awaiting_pong_timer_tick_intervals: 0, + received_message_since_timer_tick: false, }).is_some() { panic!("PeerManager driver duplicated descriptors!"); }; @@ -602,7 +630,7 @@ impl P fn do_attempt_write_data(&self, descriptor: &mut Descriptor, peer: &mut Peer) { while !peer.awaiting_write_event { - if peer.pending_outbound_buffer.len() < OUTBOUND_BUFFER_LIMIT_READ_PAUSE { + if peer.pending_outbound_buffer.len() < OUTBOUND_BUFFER_LIMIT_READ_PAUSE && peer.msgs_sent_since_pong < BUFFER_DRAIN_MSGS_PER_TICK { match peer.sync_status { InitSyncTracker::NoSyncRequested => {}, InitSyncTracker::ChannelsSyncing(c) if c < 0xffff_ffff_ffff_ffff => { @@ -647,6 +675,9 @@ impl P }, } } + if peer.msgs_sent_since_pong >= BUFFER_DRAIN_MSGS_PER_TICK { + self.maybe_send_extra_ping(peer); + } if { let next_buff = match peer.pending_outbound_buffer.front() { @@ -724,6 +755,7 @@ impl P /// Append a message to a peer's pending outbound/write buffer fn enqueue_encoded_message(&self, peer: &mut Peer, encoded_message: &Vec) { + peer.msgs_sent_since_pong += 1; peer.pending_outbound_buffer.push_back(peer.channel_encryptor.encrypt_message(&encoded_message[..])); } @@ -926,6 +958,7 @@ impl P message: wire::Message<<::Target as wire::CustomMessageReader>::CustomMessage> ) -> Result::Target as wire::CustomMessageReader>::CustomMessage>>, MessageHandlingError> { log_trace!(self.logger, "Received message {:?} from {}", message, log_pubkey!(peer.their_node_id.unwrap())); + peer.received_message_since_timer_tick = true; // Need an Init as first message if let wire::Message::Init(_) = message { @@ -989,7 +1022,8 @@ impl P } }, wire::Message::Pong(_msg) => { - peer.awaiting_pong = false; + peer.awaiting_pong_timer_tick_intervals = 0; + peer.msgs_sent_since_pong = 0; }, // Channel messages: @@ -1110,7 +1144,9 @@ impl P !peer.should_forward_channel_announcement(msg.contents.short_channel_id) { continue } - if peer.pending_outbound_buffer.len() > OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP { + if peer.pending_outbound_buffer.len() > OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP + || peer.msgs_sent_since_pong > BUFFER_DRAIN_MSGS_PER_TICK * FORWARD_INIT_SYNC_BUFFER_LIMIT_RATIO + { log_trace!(self.logger, "Skipping broadcast message to {:?} as its outbound buffer is full", peer.their_node_id); continue; } @@ -1133,7 +1169,9 @@ impl P !peer.should_forward_node_announcement(msg.contents.node_id) { continue } - if peer.pending_outbound_buffer.len() > OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP { + if peer.pending_outbound_buffer.len() > OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP + || peer.msgs_sent_since_pong > BUFFER_DRAIN_MSGS_PER_TICK * FORWARD_INIT_SYNC_BUFFER_LIMIT_RATIO + { log_trace!(self.logger, "Skipping broadcast message to {:?} as its outbound buffer is full", peer.their_node_id); continue; } @@ -1155,7 +1193,9 @@ impl P !peer.should_forward_channel_announcement(msg.contents.short_channel_id) { continue } - if peer.pending_outbound_buffer.len() > OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP { + if peer.pending_outbound_buffer.len() > OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP + || peer.msgs_sent_since_pong > BUFFER_DRAIN_MSGS_PER_TICK * FORWARD_INIT_SYNC_BUFFER_LIMIT_RATIO + { log_trace!(self.logger, "Skipping broadcast message to {:?} as its outbound buffer is full", peer.their_node_id); continue; } @@ -1455,6 +1495,20 @@ impl P debug_assert!(peers.node_id_to_descriptor.is_empty()); } + /// This is called when we're blocked on sending additional gossip messages until we receive a + /// pong. If we aren't waiting on a pong, we take this opportunity to send a ping (setting + /// `awaiting_pong_timer_tick_intervals` to a special flag value to indicate this). + fn maybe_send_extra_ping(&self, peer: &mut Peer) { + if peer.awaiting_pong_timer_tick_intervals == 0 { + peer.awaiting_pong_timer_tick_intervals = -1; + let ping = msgs::Ping { + ponglen: 0, + byteslen: 64, + }; + self.enqueue_message(peer, &ping); + } + } + /// Send pings to each peer and disconnect those which did not respond to the last round of /// pings. /// @@ -1473,9 +1527,18 @@ impl P let node_id_to_descriptor = &mut peers.node_id_to_descriptor; let peers = &mut peers.peers; let mut descriptors_needing_disconnect = Vec::new(); + let peer_count = peers.len(); peers.retain(|descriptor, peer| { - if peer.awaiting_pong { + if !peer.channel_encryptor.is_ready_for_encryption() { + // The peer needs to complete its handshake before we can exchange messages + return true; + } + + if (peer.awaiting_pong_timer_tick_intervals > 0 && !peer.received_message_since_timer_tick) + || peer.awaiting_pong_timer_tick_intervals as u64 > + MAX_BUFFER_DRAIN_TICK_INTERVALS_PER_PEER as u64 * peer_count as u64 + { descriptors_needing_disconnect.push(descriptor.clone()); match peer.their_node_id { Some(node_id) => { @@ -1492,21 +1555,26 @@ impl P return false; } - if !peer.channel_encryptor.is_ready_for_encryption() { - // The peer needs to complete its handshake before we can exchange messages + peer.received_message_since_timer_tick = false; + if peer.awaiting_pong_timer_tick_intervals == -1 { + // Magic value set in `maybe_send_extra_ping`. + peer.awaiting_pong_timer_tick_intervals = 1; return true; } + if peer.awaiting_pong_timer_tick_intervals > 0 { + peer.awaiting_pong_timer_tick_intervals += 1; + return true; + } + + peer.awaiting_pong_timer_tick_intervals = 1; let ping = msgs::Ping { ponglen: 0, byteslen: 64, }; self.enqueue_message(peer, &ping); + self.do_attempt_write_data(&mut (descriptor.clone()), &mut *peer); - let mut descriptor_clone = descriptor.clone(); - self.do_attempt_write_data(&mut descriptor_clone, peer); - - peer.awaiting_pong = true; true }); @@ -1665,11 +1733,23 @@ mod tests { // than can fit into a peer's buffer). let (mut fd_a, mut fd_b) = establish_connection(&peers[0], &peers[1]); - // Make each peer to read the messages that the other peer just wrote to them. - peers[0].process_events(); - peers[1].read_event(&mut fd_b, &fd_a.outbound_data.lock().unwrap().split_off(0)).unwrap(); - peers[1].process_events(); - peers[0].read_event(&mut fd_a, &fd_b.outbound_data.lock().unwrap().split_off(0)).unwrap(); + // Make each peer to read the messages that the other peer just wrote to them. Note that + // due to the max-messagse-before-ping limits this may take a few iterations to complete. + for _ in 0..150/super::BUFFER_DRAIN_MSGS_PER_TICK + 1 { + peers[0].process_events(); + let b_read_data = fd_a.outbound_data.lock().unwrap().split_off(0); + assert!(!b_read_data.is_empty()); + + peers[1].read_event(&mut fd_b, &b_read_data).unwrap(); + peers[1].process_events(); + + let a_read_data = fd_b.outbound_data.lock().unwrap().split_off(0); + assert!(!a_read_data.is_empty()); + peers[0].read_event(&mut fd_a, &a_read_data).unwrap(); + + peers[1].process_events(); + assert_eq!(fd_b.outbound_data.lock().unwrap().len(), 0, "Until B receives data, it shouldn't send more messages"); + } // Check that each peer has received the expected number of channel updates and channel // announcements. From be123f7d228197a0a9f9e48c4e4e31fa2ecbeca9 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Thu, 21 Oct 2021 22:33:42 +0000 Subject: [PATCH 6/7] Give peers one timer tick to finish handshake before disconnecting This ensures we don't let a hung connection stick around forever if the peer never completes the initial handshake. This also resolves a race where, on receiving a second connection from a peer, we may reset their_node_id to None to prevent sending messages even though the `channel_encryptor` `is_ready_for_encryption()`. Sending pings only checks the `channel_encryptor` status, not `their_node_id` resulting in an `unwrap` on `None` in `enqueue_message`. --- lightning/src/ln/peer_handler.rs | 70 +++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index 0dbad0a31..e1da618ec 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -860,6 +860,7 @@ impl P let features = InitFeatures::known(); let resp = msgs::Init { features }; self.enqueue_message(peer, &resp); + peer.awaiting_pong_timer_tick_intervals = 0; }, NextNoiseStep::ActThree => { let their_node_id = try_potential_handleerror!(peer.channel_encryptor.process_act_three(&peer.pending_read_buffer[..])); @@ -870,6 +871,7 @@ impl P let features = InitFeatures::known(); let resp = msgs::Init { features }; self.enqueue_message(peer, &resp); + peer.awaiting_pong_timer_tick_intervals = 0; }, NextNoiseStep::NoiseComplete => { if peer.pending_read_is_header { @@ -1530,12 +1532,29 @@ impl P let peer_count = peers.len(); peers.retain(|descriptor, peer| { - if !peer.channel_encryptor.is_ready_for_encryption() { - // The peer needs to complete its handshake before we can exchange messages + let mut do_disconnect_peer = false; + if !peer.channel_encryptor.is_ready_for_encryption() || peer.their_node_id.is_none() { + // The peer needs to complete its handshake before we can exchange messages. We + // give peers one timer tick to complete handshake, reusing + // `awaiting_pong_timer_tick_intervals` to track number of timer ticks taken + // for handshake completion. + if peer.awaiting_pong_timer_tick_intervals != 0 { + do_disconnect_peer = true; + } else { + peer.awaiting_pong_timer_tick_intervals = 1; + return true; + } + } + + if peer.awaiting_pong_timer_tick_intervals == -1 { + // Magic value set in `maybe_send_extra_ping`. + peer.awaiting_pong_timer_tick_intervals = 1; + peer.received_message_since_timer_tick = false; return true; } - if (peer.awaiting_pong_timer_tick_intervals > 0 && !peer.received_message_since_timer_tick) + if do_disconnect_peer + || (peer.awaiting_pong_timer_tick_intervals > 0 && !peer.received_message_since_timer_tick) || peer.awaiting_pong_timer_tick_intervals as u64 > MAX_BUFFER_DRAIN_TICK_INTERVALS_PER_PEER as u64 * peer_count as u64 { @@ -1546,21 +1565,11 @@ impl P node_id_to_descriptor.remove(&node_id); self.message_handler.chan_handler.peer_disconnected(&node_id, false); } - None => { - // This can't actually happen as we should have hit - // is_ready_for_encryption() previously on this same peer. - unreachable!(); - }, + None => {}, } return false; } - peer.received_message_since_timer_tick = false; - if peer.awaiting_pong_timer_tick_intervals == -1 { - // Magic value set in `maybe_send_extra_ping`. - peer.awaiting_pong_timer_tick_intervals = 1; - return true; - } if peer.awaiting_pong_timer_tick_intervals > 0 { peer.awaiting_pong_timer_tick_intervals += 1; @@ -1758,4 +1767,37 @@ mod tests { assert_eq!(cfgs[1].routing_handler.chan_upds_recvd.load(Ordering::Acquire), 100); assert_eq!(cfgs[1].routing_handler.chan_anns_recvd.load(Ordering::Acquire), 50); } + + #[test] + fn test_handshake_timeout() { + // Tests that we time out a peer still waiting on handshake completion after a full timer + // tick. + let cfgs = create_peermgr_cfgs(2); + cfgs[0].routing_handler.request_full_sync.store(true, Ordering::Release); + cfgs[1].routing_handler.request_full_sync.store(true, Ordering::Release); + let peers = create_network(2, &cfgs); + + let secp_ctx = Secp256k1::new(); + let a_id = PublicKey::from_secret_key(&secp_ctx, &peers[0].our_node_secret); + let mut fd_a = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; + let mut fd_b = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; + let initial_data = peers[1].new_outbound_connection(a_id, fd_b.clone()).unwrap(); + peers[0].new_inbound_connection(fd_a.clone()).unwrap(); + + // If we get a single timer tick before completion, that's fine + assert_eq!(peers[0].peers.lock().unwrap().peers.len(), 1); + peers[0].timer_tick_occurred(); + assert_eq!(peers[0].peers.lock().unwrap().peers.len(), 1); + + assert_eq!(peers[0].read_event(&mut fd_a, &initial_data).unwrap(), false); + peers[0].process_events(); + assert_eq!(peers[1].read_event(&mut fd_b, &fd_a.outbound_data.lock().unwrap().split_off(0)).unwrap(), false); + peers[1].process_events(); + + // ...but if we get a second timer tick, we should disconnect the peer + peers[0].timer_tick_occurred(); + assert_eq!(peers[0].peers.lock().unwrap().peers.len(), 0); + + assert!(peers[0].read_event(&mut fd_a, &fd_b.outbound_data.lock().unwrap().split_off(0)).is_err()); + } } From 0caa8bb5d5c5f4d271587ed9b185ae71f98411b5 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Wed, 27 Oct 2021 20:06:13 +0000 Subject: [PATCH 7/7] Log peer public key more thoroughly when logging in peer_handler --- lightning/src/ln/peer_handler.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index e1da618ec..0d8bc1c98 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -493,6 +493,17 @@ impl PeerManager" for an optional pubkey. +/// This works around `format!()` taking a reference to each argument, preventing +/// `if let Some(node_id) = peer.their_node_id { format!(.., node_id) } else { .. }` from compiling +/// due to lifetime errors. +struct OptionalFromDebugger<'a>(&'a Option); +impl core::fmt::Display for OptionalFromDebugger<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { + if let Some(node_id) = self.0 { write!(f, " from {}", log_pubkey!(node_id)) } else { Ok(()) } + } +} + impl PeerManager where CM::Target: ChannelMessageHandler, RM::Target: RoutingMessageHandler, @@ -804,19 +815,19 @@ impl P match e.action { msgs::ErrorAction::DisconnectPeer { msg: _ } => { //TODO: Try to push msg - log_debug!(self.logger, "Error handling message; disconnecting peer with: {}", e.err); + log_debug!(self.logger, "Error handling message{}; disconnecting peer with: {}", OptionalFromDebugger(&peer.their_node_id), e.err); return Err(PeerHandleError{ no_connection_possible: false }); }, msgs::ErrorAction::IgnoreAndLog(level) => { - log_given_level!(self.logger, level, "Error handling message; ignoring: {}", e.err); + log_given_level!(self.logger, level, "Error handling message{}; ignoring: {}", OptionalFromDebugger(&peer.their_node_id), e.err); continue }, msgs::ErrorAction::IgnoreError => { - log_debug!(self.logger, "Error handling message; ignoring: {}", e.err); + log_debug!(self.logger, "Error handling message{}; ignoring: {}", OptionalFromDebugger(&peer.their_node_id), e.err); continue; }, msgs::ErrorAction::SendErrorMessage { msg } => { - log_debug!(self.logger, "Error handling message; sending error message with: {}", e.err); + log_debug!(self.logger, "Error handling message{}; sending error message with: {}", OptionalFromDebugger(&peer.their_node_id), e.err); self.enqueue_message(peer, &msg); continue; }, @@ -982,7 +993,7 @@ impl P return Err(PeerHandleError{ no_connection_possible: false }.into()); } - log_info!(self.logger, "Received peer Init message: {}", msg.features); + log_info!(self.logger, "Received peer Init message from {}: {}", log_pubkey!(peer.their_node_id.unwrap()), msg.features); if msg.features.initial_routing_sync() { peer.sync_status = InitSyncTracker::ChannelsSyncing(0);