1
0
Fork 0
mirror of https://github.com/ACINQ/eclair.git synced 2025-02-22 22:25:26 +01:00

Postgres: add safety checks at startup (#2140)

When using postgres, at startup we optionnally run a few basic safety
checks, e.g. the number of local channels, how long since the last local
channel update, etc. The goal is to make sure that we are connected to
the correct database instance.
This commit is contained in:
Pierre-Marie Padiou 2022-01-21 14:45:16 +01:00 committed by GitHub
parent f8d507bbdd
commit c180ca2ef1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 222 additions and 5 deletions

View file

@ -75,6 +75,28 @@ This is particularly useful for payment hubs that generate a lot of invoices (e.
Eclair includes a small `payment_metadata` field in all invoices it generates.
This lets node operators verify that payers actually support that feature.
### Optional safety checks when using Postgres
When using postgres, at startup we optionally run a few basic safety checks, e.g. the number of local channels, how long since the last local channel update, etc. The goal is to make sure that we are connected to the correct database instance.
Those checks are disabled by default because they wouldn't pass on a fresh new node with zero channels. You should enable them when you already have channels, so that there is something to compare to, and the values should be specific to your setup, particularly for local channels. Configuration is done by overriding `max-age` and `min-count` values in your `eclair.conf`:
```
eclair.db.postgres.safety-checks
{
enabled = true
max-age {
local-channels = 3 minutes
network-nodes = 30 minutes
audit-relayed = 10 minutes
}
min-count {
local-channels = 10
network-nodes = 3000
network-channels = 20000
}
}
```
### API changes
#### Timestamps

View file

@ -327,6 +327,25 @@ eclair {
lock-timeout = 5 seconds // timeout for the lock statement on the lease table
auto-release-at-shutdown = true // automatically release the lock when eclair is stopping
}
safety-checks {
// A set of basic checks on data to make sure we use the correct database
// Those checks are disabled by default because they wouldn't pass on a fresh new node with
// zero channels. You should enable them when you already have channels, so that there is
// something to compare to, and the values should be specific to your setup, especially
// for local channels. If your operate a busy node, you can reduce max-age.local-channels
// and max-age.audit-relayed to just a few minutes, this will significantly improve the safety.
enabled = false
max-age {
local-channels = 15 minutes // last time a local channel was updated
network-nodes = 30 minutes // most recent public node announcement
audit-relayed = 1 hour // last time a payment was relayed
}
min-count {
local-channels = 10 // minimum number of local channels, this entirely depends on your setup
network-nodes = 3000 // minimum number of public nodes in the routing table
network-channels = 20000 // minimum number of public channels in the routing table
}
}
}
}

View file

@ -20,6 +20,7 @@ import akka.Done
import akka.actor.{ActorSystem, CoordinatedShutdown}
import com.typesafe.config.Config
import com.zaxxer.hikari.{HikariConfig, HikariDataSource}
import fr.acinq.eclair.TimestampMilli
import fr.acinq.eclair.db.pg.PgUtils.PgLock.LockFailureHandler
import fr.acinq.eclair.db.pg.PgUtils._
import fr.acinq.eclair.db.pg._
@ -30,6 +31,7 @@ import java.io.File
import java.nio.file._
import java.sql.Connection
import java.util.UUID
import java.util.concurrent.TimeUnit
import scala.concurrent.Future
import scala.concurrent.duration._
@ -94,12 +96,22 @@ object Databases extends Logging {
}
object PostgresDatabases {
case class SafetyChecks(localChannelsMaxAge: FiniteDuration,
networkNodesMaxAge: FiniteDuration,
auditRelayedMaxAge: FiniteDuration,
localChannelsMinCount: Int,
networkNodesMinCount: Int,
networkChannelsMinCount: Int
)
def apply(hikariConfig: HikariConfig,
instanceId: UUID,
lock: PgLock = PgLock.NoLock,
jdbcUrlFile_opt: Option[File],
readOnlyUser_opt: Option[String],
resetJsonColumns: Boolean)(implicit system: ActorSystem): PostgresDatabases = {
resetJsonColumns: Boolean,
safetyChecks_opt: Option[SafetyChecks])(implicit system: ActorSystem): PostgresDatabases = {
jdbcUrlFile_opt.foreach(jdbcUrlFile => checkIfDatabaseUrlIsUnchanged(hikariConfig.getJdbcUrl, jdbcUrlFile))
@ -162,6 +174,71 @@ object Databases extends Logging {
}
}
safetyChecks_opt foreach { initChecks =>
PgUtils.inTransaction { connection =>
using(connection.createStatement()) { statement =>
def checkMaxAge(name: String, maxAge: FiniteDuration, sqlQuery: String): Unit = {
import ExtendedResultSet._
val smallestAge_opt = statement
.executeQuery(sqlQuery)
.headOption // sql max() will always return a result, with perhaps a null value if there was no records
.flatMap(_.getTimestampNullable("max"))
.map(ts => TimestampMilli.now() - TimestampMilli.fromSqlTimestamp(ts))
require(smallestAge_opt.isDefined, s"db check failed: no $name found")
require(smallestAge_opt.get <= maxAge, s"db check failed: most recent $name is too old (${smallestAge_opt.get.toMinutes} minutes > ${maxAge.toMinutes} minutes)")
logger.info(s"db check ok: max age ${smallestAge_opt.get.toMinutes} minutes <= ${maxAge.toMinutes} minutes for $name")
}
checkMaxAge(name = "local channel",
maxAge = initChecks.localChannelsMaxAge,
sqlQuery =
"""
|SELECT MAX(GREATEST(created_timestamp, last_payment_sent_timestamp, last_payment_received_timestamp, last_connected_timestamp, closed_timestamp))
|FROM local.channels
|WHERE NOT is_closed""".stripMargin)
checkMaxAge(name = "network node",
maxAge = initChecks.networkNodesMaxAge,
sqlQuery =
"""
|SELECT MAX((json->'timestamp'->>'iso')::timestamptz)
|FROM network.nodes""".stripMargin)
checkMaxAge(name = "audit relayed",
maxAge = initChecks.auditRelayedMaxAge,
sqlQuery =
"""
|SELECT MAX(timestamp)
|FROM audit.relayed""".stripMargin)
def checkMinCount(name: String, minCount: Int, sqlQuery: String): Unit = {
import ExtendedResultSet._
val count = statement
.executeQuery(sqlQuery)
.map(_.getInt("count"))
.head // NB: COUNT(*) always returns exactly one row
require(count >= minCount, s"db check failed: min count not reached for $name ($count < $minCount)")
logger.info(s"db check ok: min count $count > $minCount for $name")
}
checkMinCount(name = "local channels",
minCount = initChecks.localChannelsMinCount,
sqlQuery = "SELECT COUNT(*) FROM local.channels")
checkMinCount(name = "network node",
minCount = initChecks.networkNodesMinCount,
sqlQuery = "SELECT COUNT(*) FROM network.nodes")
checkMinCount(name = "network channels",
minCount = initChecks.networkChannelsMinCount,
sqlQuery = "SELECT COUNT(*) FROM network.public_channels")
}
}
}
databases
}
@ -245,13 +322,25 @@ object Databases extends Logging {
val jdbcUrlFile = new File(dbdir, "last_jdbcurl")
val safetyChecks_opt = if (dbConfig.getBoolean("postgres.safety-checks.enabled")) {
Some(PostgresDatabases.SafetyChecks(
localChannelsMaxAge = FiniteDuration(dbConfig.getDuration("postgres.safety-checks.max-age.local-channels").getSeconds, TimeUnit.SECONDS),
networkNodesMaxAge = FiniteDuration(dbConfig.getDuration("postgres.safety-checks.max-age.network-nodes").getSeconds, TimeUnit.SECONDS),
auditRelayedMaxAge = FiniteDuration(dbConfig.getDuration("postgres.safety-checks.max-age.audit-relayed").getSeconds, TimeUnit.SECONDS),
localChannelsMinCount = dbConfig.getInt("postgres.safety-checks.min-count.local-channels"),
networkNodesMinCount = dbConfig.getInt("postgres.safety-checks.min-count.network-nodes"),
networkChannelsMinCount = dbConfig.getInt("postgres.safety-checks.min-count.network-channels"),
))
} else None
Databases.PostgresDatabases(
hikariConfig = hikariConfig,
instanceId = instanceId,
lock = lock,
jdbcUrlFile_opt = Some(jdbcUrlFile),
readOnlyUser_opt = readOnlyUser_opt,
resetJsonColumns = resetJsonColumns
resetJsonColumns = resetJsonColumns,
safetyChecks_opt = safetyChecks_opt
)
}

View file

@ -111,7 +111,7 @@ object TestDatabases {
// @formatter:off
override val connection: PgConnection = pg.getPostgresDatabase.getConnection.asInstanceOf[PgConnection]
// NB: we use a lazy val here: databases won't be initialized until we reference that variable
override lazy val db: Databases = Databases.PostgresDatabases(hikariConfig, UUID.randomUUID(), lock, jdbcUrlFile_opt = Some(jdbcUrlFile), readOnlyUser_opt = None, resetJsonColumns = false)
override lazy val db: Databases = Databases.PostgresDatabases(hikariConfig, UUID.randomUUID(), lock, jdbcUrlFile_opt = Some(jdbcUrlFile), readOnlyUser_opt = None, resetJsonColumns = false, safetyChecks_opt = None)
override def close(): Unit = pg.close()
// @formatter:on
}

View file

@ -1,11 +1,16 @@
package fr.acinq.eclair.db
import com.opentable.db.postgres.embedded.EmbeddedPostgres
import com.typesafe.config.{Config, ConfigFactory}
import com.typesafe.config.{Config, ConfigFactory, ConfigValue}
import fr.acinq.eclair.db.DbEventHandler.ChannelEvent
import fr.acinq.eclair.db.pg.PgUtils.ExtendedResultSet._
import fr.acinq.eclair.db.pg.PgUtils.PgLock.{LeaseLock, LockFailure, LockFailureHandler}
import fr.acinq.eclair.db.pg.PgUtils.{JdbcUrlChanged, migrateTable, using}
import fr.acinq.eclair.{TestKitBaseClass, TestUtils}
import fr.acinq.eclair.payment.ChannelPaymentRelayed
import fr.acinq.eclair.router.Announcements
import fr.acinq.eclair.wire.internal.channel.ChannelCodecsSpec
import fr.acinq.eclair.wire.protocol.Color
import fr.acinq.eclair.{Features, MilliSatoshiLong, TestKitBaseClass, TestUtils, TimestampMilli, TimestampSecond, randomBytes32, randomKey}
import grizzled.slf4j.{Logger, Logging}
import org.postgresql.jdbc.PgConnection
import org.postgresql.util.PGInterval
@ -153,6 +158,74 @@ class PgUtilsSpec extends TestKitBaseClass with AnyFunSuiteLike with Eventually
Databases.postgres(config, UUID.randomUUID(), datadir, LockFailureHandler.logAndThrow)
}
test("safety checks") {
val pg = EmbeddedPostgres.start()
val baseConfig = ConfigFactory.parseString("postgres.lock-type=none").withFallback(PgUtilsSpec.testConfig(pg.getPort))
val datadir = new File(TestUtils.BUILD_DIRECTORY, s"pg_test_${UUID.randomUUID()}")
datadir.mkdirs()
{
val db = Databases.postgres(baseConfig, UUID.randomUUID(), datadir, LockFailureHandler.logAndThrow)
db.channels.addOrUpdateChannel(ChannelCodecsSpec.normal)
db.channels.updateChannelMeta(ChannelCodecsSpec.normal.channelId, ChannelEvent.EventType.Created)
db.network.addNode(Announcements.makeNodeAnnouncement(randomKey(), "node-A", Color(50, 99, -80), Nil, Features.empty, TimestampSecond.now() - 45.days))
db.network.addNode(Announcements.makeNodeAnnouncement(randomKey(), "node-B", Color(50, 99, -80), Nil, Features.empty, TimestampSecond.now() - 3.days))
db.network.addNode(Announcements.makeNodeAnnouncement(randomKey(), "node-C", Color(50, 99, -80), Nil, Features.empty, TimestampSecond.now() - 7.minutes))
db.audit.add(ChannelPaymentRelayed(421 msat, 400 msat, randomBytes32(), randomBytes32(), randomBytes32(), TimestampMilli.now() - 3.seconds))
db.dataSource.close()
}
{
val safetyConfig = ConfigFactory.parseString(
s"""
|postgres {
| safety-checks {
| // a set of basic checks on data to make sure we use the correct database
| enabled = true
| max-age {
| local-channels = 3 minutes
| network-nodes = 30 minutes
| audit-relayed = 10 minutes
| }
| min-count {
| local-channels = 1
| network-nodes = 2
| network-channels = 0
| }
| }
|}""".stripMargin)
val config = safetyConfig.withFallback(baseConfig)
val db = Databases.postgres(config, UUID.randomUUID(), datadir, LockFailureHandler.logAndThrow)
db.dataSource.close()
}
{
val safetyConfig = ConfigFactory.parseString(
s"""
|postgres {
| safety-checks {
| // a set of basic checks on data to make sure we use the correct database
| enabled = true
| max-age {
| local-channels = 3 minutes
| network-nodes = 30 minutes
| audit-relayed = 10 minutes
| }
| min-count {
| local-channels = 10
| network-nodes = 2
| network-channels = 0
| }
| }
|}""".stripMargin)
val config = safetyConfig.withFallback(baseConfig)
intercept[IllegalArgumentException] {
Databases.postgres(config, UUID.randomUUID(), datadir, LockFailureHandler.logAndThrow)
}
}
}
test("migration test") {
val pg = EmbeddedPostgres.start()
using(pg.getPostgresDatabase.getConnection.createStatement()) { statement =>
@ -218,6 +291,20 @@ object PgUtilsSpec extends Logging {
| lock-timeout = 5 seconds // timeout for the lock statement on the lease table
| auto-release-at-shutdown = false // automatically release the lock when eclair is stopping
| }
| safety-checks {
| // a set of basic checks on data to make sure we use the correct database
| enabled = false
| max-age {
| local-channels = 3 minutes
| network-nodes = 30 minutes
| audit-relayed = 10 minutes
| }
| min-count {
| local-channels = 10
| network-nodes = 3000
| network-channels = 20000
| }
| }
|}
|""".stripMargin
)