peer: ensure the peer has been started before Disconnect can be called

In this commit, we make sure that all the `wg.Add(1)` calls succeed
before we attempt to wait on the shutdown of all the goroutines. Under
rare scheduling scenarios, if both `Start` and `Disconnect` are called
concurrently, then this internal race error can be hit, causing the
panic to occur.

Fixes https://github.com/lightningnetwork/lnd/issues/7853
This commit is contained in:
Olaoluwa Osuntokun 2023-07-25 11:14:41 -07:00
parent 798821db17
commit 325e844a96
No known key found for this signature in database
GPG Key ID: 3BBD59E99B280306
2 changed files with 30 additions and 2 deletions

View File

@ -194,6 +194,10 @@ unlock or create.
Tor](https://github.com/lightningnetwork/lnd/pull/7783), even when
`tor.skip-proxy-for-clearnet-targets=true` was set.
* Fix a [concurrency issue related to rapid peer teardown and
creation](https://github.com/lightningnetwork/lnd/pull/7856) that can arise
under rare scenarios.
### Tooling and documentation
* Add support for [custom `RPCHOST` and
@ -226,6 +230,7 @@ unlock or create.
* Maxwell Sayles
* Michael Street
* MG-ng
* Olaoluwa Osuntokun
* Oliver Gugger
* Pierre Beugnet
* Satarupa Deb

View File

@ -469,8 +469,9 @@ type Brontide struct {
// potentially holding lots of un-consumed events.
channelEventClient *subscribe.Client
quit chan struct{}
wg sync.WaitGroup
startReady chan struct{}
quit chan struct{}
wg sync.WaitGroup
// log is a peer-specific logging instance.
log btclog.Logger
@ -500,6 +501,7 @@ func NewBrontide(cfg Config) *Brontide {
linkFailures: make(chan linkFailureReport),
chanCloseMsgs: make(chan *closeMsg),
resentChanSyncMsg: make(map[lnwire.ChannelID]struct{}),
startReady: make(chan struct{}),
quit: make(chan struct{}),
log: build.NewPrefixLog(logPrefix, peerLog),
}
@ -514,6 +516,11 @@ func (p *Brontide) Start() error {
return nil
}
// Once we've finished starting up the peer, we'll signal to other
// goroutines that the they can move forward to tear down the peer, or
// carry out other relevant changes.
defer close(p.startReady)
p.log.Tracef("starting with conn[%v->%v]",
p.cfg.Conn.LocalAddr(), p.cfg.Conn.RemoteAddr())
@ -1050,6 +1057,14 @@ func (p *Brontide) maybeSendNodeAnn(channels []*channeldb.OpenChannel) {
// calling Disconnect will signal the quit channel and the method will not
// block, since no goroutines were spawned.
func (p *Brontide) WaitForDisconnect(ready chan struct{}) {
// Before we try to call the `Wait` goroutine, we'll make sure the main
// set of goroutines are already active.
select {
case <-p.startReady:
case <-p.quit:
return
}
select {
case <-ready:
case <-p.quit:
@ -1066,6 +1081,14 @@ func (p *Brontide) Disconnect(reason error) {
return
}
// Make sure initialization has completed before we try to tear things
// down.
select {
case <-p.startReady:
case <-p.quit:
return
}
err := fmt.Errorf("disconnecting %s, reason: %v", p, reason)
p.storeError(err)