mirror of
https://github.com/ACINQ/eclair.git
synced 2025-02-23 22:46:44 +01:00
Better randomization of reconnection delay (#1250)
Randomization is necessary, otherwise if two peers attempt to reconnect to each other in a synchronized fashion, they will enter in a disconnect-reconnect loop. We already had randomization for the initial reconnection attempt, but further reconnection attempts were using a deterministic schedule following an exponential backoff curve. Fixes #1238.
This commit is contained in:
parent
e2d472a08e
commit
e1c48ebda1
2 changed files with 11 additions and 5 deletions
|
@ -534,7 +534,7 @@ class Peer(val nodeParams: NodeParams, remoteNodeId: PublicKey, authenticator: A
|
|||
*/
|
||||
onTransition {
|
||||
case INSTANTIATING -> DISCONNECTED => ()
|
||||
case _ -> DISCONNECTED if nodeParams.autoReconnect => setTimer(RECONNECT_TIMER, Reconnect, Random.nextInt(nodeParams.initialRandomReconnectDelay.toMillis.toInt).millis, repeat = false) // we add some randomization to not have peers reconnect to each other exactly at the same time
|
||||
case _ -> DISCONNECTED if nodeParams.autoReconnect => setTimer(RECONNECT_TIMER, Reconnect, randomizeDelay(nodeParams.initialRandomReconnectDelay), repeat = false) // we add some randomization to not have peers reconnect to each other exactly at the same time
|
||||
case DISCONNECTED -> _ if nodeParams.autoReconnect => cancelTimer(RECONNECT_TIMER)
|
||||
}
|
||||
|
||||
|
@ -615,7 +615,7 @@ object Peer {
|
|||
def channels: Map[_ <: ChannelId, ActorRef] // will be overridden by Map[FinalChannelId, ActorRef] or Map[ChannelId, ActorRef]
|
||||
}
|
||||
case class Nothing() extends Data { override def address_opt = None; override def channels = Map.empty }
|
||||
case class DisconnectedData(address_opt: Option[InetSocketAddress], channels: Map[FinalChannelId, ActorRef], nextReconnectionDelay: FiniteDuration = 10 seconds) extends Data
|
||||
case class DisconnectedData(address_opt: Option[InetSocketAddress], channels: Map[FinalChannelId, ActorRef], nextReconnectionDelay: FiniteDuration = randomizeDelay(10 seconds)) extends Data
|
||||
case class InitializingData(address_opt: Option[InetSocketAddress], transport: ActorRef, channels: Map[FinalChannelId, ActorRef], origin_opt: Option[ActorRef], localInit: wire.Init) extends Data
|
||||
case class ConnectedData(address_opt: Option[InetSocketAddress], transport: ActorRef, localInit: wire.Init, remoteInit: wire.Init, channels: Map[ChannelId, ActorRef], rebroadcastDelay: FiniteDuration, gossipTimestampFilter: Option[GossipTimestampFilter] = None, behavior: Behavior = Behavior(), expectedPong_opt: Option[ExpectedPong] = None) extends Data
|
||||
case class ExpectedPong(ping: Ping, timestamp: Long = Platform.currentTime)
|
||||
|
@ -708,6 +708,11 @@ object Peer {
|
|||
|
||||
def hostAndPort2InetSocketAddress(hostAndPort: HostAndPort): InetSocketAddress = new InetSocketAddress(hostAndPort.getHost, hostAndPort.getPort)
|
||||
|
||||
/**
|
||||
* This helps preventing peers reconnection loops due to synchronization of reconnection attempts.
|
||||
*/
|
||||
def randomizeDelay(initialRandomReconnectDelay: FiniteDuration): FiniteDuration = Random.nextInt(initialRandomReconnectDelay.toMillis.toInt).millis
|
||||
|
||||
/**
|
||||
* Exponential backoff retry with a finite max
|
||||
*/
|
||||
|
|
|
@ -198,11 +198,12 @@ class PeerSpec extends TestkitBaseClass with StateTestsHelperMethods {
|
|||
connect(remoteNodeId, authenticator, watcher, router, relayer, connection, transport, peer, channels = Set(ChannelCodecsSpec.normal))
|
||||
probe.send(transport.ref, PoisonPill)
|
||||
awaitCond(peer.stateName === DISCONNECTED)
|
||||
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (10 seconds))
|
||||
val initialReconnectDelay = peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay
|
||||
assert(initialReconnectDelay <= (10 seconds))
|
||||
probe.send(peer, Reconnect)
|
||||
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (20 seconds))
|
||||
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (initialReconnectDelay * 2))
|
||||
probe.send(peer, Reconnect)
|
||||
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (40 seconds))
|
||||
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (initialReconnectDelay * 4))
|
||||
}
|
||||
|
||||
test("disconnect if incompatible features") { f =>
|
||||
|
|
Loading…
Add table
Reference in a new issue