peer: send reestablish, shutdown messages before starting writeHandler

This is to avoid a potential race on WriteMessage and Flush internals. Because there is no locking on WriteMessage and Flush, if we allow writeMessage calls in Start after the writeHandler has started, the writeMessage calls may call WriteMessage/Flush at the same time that writeMessage calls from the writeHandler does. Since there is no locking, internals like b.nextHeaderSend can race and cause panics.
2025-03-26 08:55:59 +01:00 · 2023-11-16 11:02:23 -05:00 · 2023-11-16 11:02:23 -05:00 · 7556402ea4
commit 7556402ea4
parent ed179e3e7d
1 changed files with 23 additions and 17 deletions
--- a/peer/brontide.go
+++ b/peer/brontide.go
@ -703,6 +703,23 @@ func (p *Brontide) Start() error {

 	p.startTime = time.Now()

+	// Before launching the writeHandler goroutine, we send any channel
+	// sync messages that must be resent for borked channels. We do this to
+	// avoid data races with WriteMessage & Flush calls.
+	if len(msgs) > 0 {
+		p.log.Infof("Sending %d channel sync messages to peer after "+
+			"loading active channels", len(msgs))
+
+		// Send the messages directly via writeMessage and bypass the
+		// writeHandler goroutine.
+		for _, msg := range msgs {
+			if err := p.writeMessage(msg); err != nil {
+				return fmt.Errorf("unable to send "+
+					"reestablish msg: %v", err)
+			}
+		}
+	}
+
 	err = p.pingManager.Start()
 	if err != nil {
 		return fmt.Errorf("could not start ping manager %w", err)
@ -717,23 +734,6 @@ func (p *Brontide) Start() error {
 	// Signal to any external processes that the peer is now active.
 	close(p.activeSignal)

-	// Now that the peer has started up, we send any channel sync messages
-	// that must be resent for borked channels.
-	if len(msgs) > 0 {
-		p.log.Infof("Sending %d channel sync messages to peer after "+
-			"loading active channels", len(msgs))
-
-		// Send the messages directly via writeMessage and bypass the
-		// writeHandler goroutine to avoid cases where writeHandler
-		// may exit and cause a deadlock.
-		for _, msg := range msgs {
-			if err := p.writeMessage(msg); err != nil {
-				return fmt.Errorf("unable to send reestablish"+
-					"msg: %v", err)
-			}
-		}
-	}
-
 	// Node announcements don't propagate very well throughout the network
 	// as there isn't a way to efficiently query for them through their
 	// timestamp, mostly affecting nodes that were offline during the time
@ -2093,6 +2093,12 @@ func (p *Brontide) logWireMessage(msg lnwire.Message, read bool) {
 // message buffered on the connection. It is safe to call this method again
 // with a nil message iff a timeout error is returned. This will continue to
 // flush the pending message to the wire.
+//
+// NOTE:
+// Besides its usage in Start, this function should not be used elsewhere
+// except in writeHandler. If multiple goroutines call writeMessage at the same
+// time, panics can occur because WriteMessage and Flush don't use any locking
+// internally.
 func (p *Brontide) writeMessage(msg lnwire.Message) error {
 	// Only log the message on the first attempt.
 	if msg != nil {