1
0
Fork 0
mirror of https://github.com/ACINQ/eclair.git synced 2025-02-23 22:46:44 +01:00

Better randomization of reconnection delay (#1250)

Randomization is necessary, otherwise if two peers attempt to reconnect
to each other in a synchronized fashion, they will enter in a
disconnect-reconnect loop.

We already had randomization for the initial reconnection attempt, but
further reconnection attempts were using a deterministic schedule
following an exponential backoff curve.

Fixes #1238.
This commit is contained in:
Pierre-Marie Padiou 2019-12-11 14:16:48 +01:00 committed by GitHub
parent e2d472a08e
commit e1c48ebda1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 5 deletions

View file

@ -534,7 +534,7 @@ class Peer(val nodeParams: NodeParams, remoteNodeId: PublicKey, authenticator: A
*/
onTransition {
case INSTANTIATING -> DISCONNECTED => ()
case _ -> DISCONNECTED if nodeParams.autoReconnect => setTimer(RECONNECT_TIMER, Reconnect, Random.nextInt(nodeParams.initialRandomReconnectDelay.toMillis.toInt).millis, repeat = false) // we add some randomization to not have peers reconnect to each other exactly at the same time
case _ -> DISCONNECTED if nodeParams.autoReconnect => setTimer(RECONNECT_TIMER, Reconnect, randomizeDelay(nodeParams.initialRandomReconnectDelay), repeat = false) // we add some randomization to not have peers reconnect to each other exactly at the same time
case DISCONNECTED -> _ if nodeParams.autoReconnect => cancelTimer(RECONNECT_TIMER)
}
@ -615,7 +615,7 @@ object Peer {
def channels: Map[_ <: ChannelId, ActorRef] // will be overridden by Map[FinalChannelId, ActorRef] or Map[ChannelId, ActorRef]
}
case class Nothing() extends Data { override def address_opt = None; override def channels = Map.empty }
case class DisconnectedData(address_opt: Option[InetSocketAddress], channels: Map[FinalChannelId, ActorRef], nextReconnectionDelay: FiniteDuration = 10 seconds) extends Data
case class DisconnectedData(address_opt: Option[InetSocketAddress], channels: Map[FinalChannelId, ActorRef], nextReconnectionDelay: FiniteDuration = randomizeDelay(10 seconds)) extends Data
case class InitializingData(address_opt: Option[InetSocketAddress], transport: ActorRef, channels: Map[FinalChannelId, ActorRef], origin_opt: Option[ActorRef], localInit: wire.Init) extends Data
case class ConnectedData(address_opt: Option[InetSocketAddress], transport: ActorRef, localInit: wire.Init, remoteInit: wire.Init, channels: Map[ChannelId, ActorRef], rebroadcastDelay: FiniteDuration, gossipTimestampFilter: Option[GossipTimestampFilter] = None, behavior: Behavior = Behavior(), expectedPong_opt: Option[ExpectedPong] = None) extends Data
case class ExpectedPong(ping: Ping, timestamp: Long = Platform.currentTime)
@ -708,6 +708,11 @@ object Peer {
def hostAndPort2InetSocketAddress(hostAndPort: HostAndPort): InetSocketAddress = new InetSocketAddress(hostAndPort.getHost, hostAndPort.getPort)
/**
* This helps preventing peers reconnection loops due to synchronization of reconnection attempts.
*/
def randomizeDelay(initialRandomReconnectDelay: FiniteDuration): FiniteDuration = Random.nextInt(initialRandomReconnectDelay.toMillis.toInt).millis
/**
* Exponential backoff retry with a finite max
*/

View file

@ -198,11 +198,12 @@ class PeerSpec extends TestkitBaseClass with StateTestsHelperMethods {
connect(remoteNodeId, authenticator, watcher, router, relayer, connection, transport, peer, channels = Set(ChannelCodecsSpec.normal))
probe.send(transport.ref, PoisonPill)
awaitCond(peer.stateName === DISCONNECTED)
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (10 seconds))
val initialReconnectDelay = peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay
assert(initialReconnectDelay <= (10 seconds))
probe.send(peer, Reconnect)
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (20 seconds))
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (initialReconnectDelay * 2))
probe.send(peer, Reconnect)
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (40 seconds))
assert(peer.stateData.asInstanceOf[DisconnectedData].nextReconnectionDelay === (initialReconnectDelay * 4))
}
test("disconnect if incompatible features") { f =>