lnd+lncfg: make etcd fully remote

This commit gets rid of the concept of a local and remote database when
etcd is used. Instead the same backend is now used for both the
(previously renamed from local and remote DB) graph and channel state
databases.
This will make path finding extremely slow on etcd and will require
further optimizations and possibly a write-through cache for the graph
DB. But this is a requirement for making lnd itself fully stateless.
This commit is contained in:
Oliver Gugger 2021-08-03 09:57:28 +02:00
parent c4917ae7fc
commit 0d3647d715
No known key found for this signature in database
GPG Key ID: 8E4256593F177720
2 changed files with 100 additions and 116 deletions

View File

@ -10,10 +10,15 @@ import (
)
const (
dbName = "channel.db"
dbName = "channel.db"
BoltBackend = "bolt"
EtcdBackend = "etcd"
DefaultBatchCommitInterval = 500 * time.Millisecond
// NSChannelDB is the namespace name that we use for the combined graph
// and channel state DB.
NSChannelDB = "channeldb"
)
// DB holds database configuration for LND.
@ -81,7 +86,6 @@ func (db *DB) Init(ctx context.Context, dbPath string) error {
// DatabaseBackends is a two-tuple that holds the set of active database
// backends for the daemon. The two backends we expose are the graph database
// backend, and the channel state backend.
// TODO(guggero): Actually make fully remote.
type DatabaseBackends struct {
// GraphDB points to the database backend that contains the less
// critical data that is accessed often, such as the channel graph and
@ -95,27 +99,45 @@ type DatabaseBackends struct {
// HeightHintDB points to a possibly networked replicated backend that
// contains the chain height hint related data.
HeightHintDB kvdb.Backend
// Remote indicates whether the database backends are remote, possibly
// replicated instances or local bbolt backed databases.
Remote bool
// CloseFuncs is a map of close functions for each of the initialized
// DB backends keyed by their namespace name.
CloseFuncs map[string]func() error
}
// GetBackends returns a set of kvdb.Backends as set in the DB config.
func (db *DB) GetBackends(ctx context.Context, dbPath string) (
*DatabaseBackends, error) {
var (
localDB, remoteDB kvdb.Backend
err error
)
// We keep track of all the kvdb backends we actually open and return a
// reference to their close function so they can be cleaned up properly
// on error or shutdown.
closeFuncs := make(map[string]func() error)
if db.Backend == EtcdBackend {
remoteDB, err = kvdb.Open(
etcdBackend, err := kvdb.Open(
kvdb.EtcdBackendName, ctx, db.Etcd,
)
if err != nil {
return nil, err
return nil, fmt.Errorf("error opening etcd DB: %v", err)
}
closeFuncs[NSChannelDB] = etcdBackend.Close
return &DatabaseBackends{
GraphDB: etcdBackend,
ChanStateDB: etcdBackend,
HeightHintDB: etcdBackend,
Remote: true,
CloseFuncs: closeFuncs,
}, nil
}
localDB, err = kvdb.GetBoltBackend(&kvdb.BoltBackendConfig{
// We're using all bbolt based databases by default.
boltBackend, err := kvdb.GetBoltBackend(&kvdb.BoltBackendConfig{
DBPath: dbPath,
DBFileName: dbName,
DBTimeout: db.Bolt.DBTimeout,
@ -124,13 +146,15 @@ func (db *DB) GetBackends(ctx context.Context, dbPath string) (
AutoCompactMinAge: db.Bolt.AutoCompactMinAge,
})
if err != nil {
return nil, err
return nil, fmt.Errorf("error opening bolt DB: %v", err)
}
closeFuncs[NSChannelDB] = boltBackend.Close
return &DatabaseBackends{
GraphDB: localDB,
ChanStateDB: remoteDB,
HeightHintDB: localDB,
GraphDB: boltBackend,
ChanStateDB: boltBackend,
HeightHintDB: boltBackend,
CloseFuncs: closeFuncs,
}, nil
}

166
lnd.go
View File

@ -474,16 +474,8 @@ func Main(cfg *Config, lisCfg ListenerCfg, interceptor signal.Interceptor) error
// When "running locally", LND will use the bbolt wallet.db to
// store the wallet located in the chain data dir, parametrized
// by the active network.
chainConfig := cfg.Bitcoin
if cfg.registeredChains.PrimaryChain() == chainreg.LitecoinChain {
chainConfig = cfg.Litecoin
}
dbDirPath := btcwallet.NetworkDir(
chainConfig.ChainDir, cfg.ActiveNetParams.Params,
)
loaderOpt = btcwallet.LoaderWithLocalWalletDB(
dbDirPath, !cfg.SyncFreelist, cfg.DB.Bolt.DBTimeout,
cfg.networkDir, !cfg.SyncFreelist, cfg.DB.Bolt.DBTimeout,
)
}
@ -1633,7 +1625,6 @@ type databaseInstances struct {
// initializeDatabases extracts the current databases that we'll use for normal
// operation in the daemon. A function closure that closes all opened databases
// is also returned.
// TODO(guggero): Actually make fully remote.
func initializeDatabases(ctx context.Context,
cfg *Config) (*databaseInstances, func(), error) {
@ -1654,102 +1645,71 @@ func initializeDatabases(ctx context.Context,
"backends: %v", err)
}
// If the remoteDB is nil, then we'll just open a local DB as normal,
// having the remote and local pointer be the exact same instance.
var (
dbs = &databaseInstances{
heightHintDB: databaseBackends.HeightHintDB,
}
closeFuncs []func()
)
if databaseBackends.ChanStateDB == nil {
// Open the channeldb, which is dedicated to storing channel,
// and network related metadata.
dbs.graphDB, err = channeldb.CreateWithBackend(
databaseBackends.GraphDB,
channeldb.OptionSetRejectCacheSize(cfg.Caches.RejectCacheSize),
channeldb.OptionSetChannelCacheSize(cfg.Caches.ChannelCacheSize),
channeldb.OptionSetBatchCommitInterval(cfg.DB.BatchCommitInterval),
channeldb.OptionDryRunMigration(cfg.DryRunMigration),
)
switch {
case err == channeldb.ErrDryRunMigrationOK:
return nil, nil, err
case err != nil:
err := fmt.Errorf("unable to open local channeldb: %v", err)
ltndLog.Error(err)
return nil, nil, err
}
closeFuncs = append(closeFuncs, func() {
dbs.graphDB.Close()
})
dbs.chanStateDB = dbs.graphDB
} else {
ltndLog.Infof("Database replication is available! Creating " +
"local and remote channeldb instances")
// Otherwise, we'll open two instances, one for the state we
// only need locally, and the other for things we want to
// ensure are replicated.
dbs.graphDB, err = channeldb.CreateWithBackend(
databaseBackends.GraphDB,
channeldb.OptionSetRejectCacheSize(cfg.Caches.RejectCacheSize),
channeldb.OptionSetChannelCacheSize(cfg.Caches.ChannelCacheSize),
channeldb.OptionSetBatchCommitInterval(cfg.DB.BatchCommitInterval),
channeldb.OptionDryRunMigration(cfg.DryRunMigration),
)
switch {
// As we want to allow both versions to get thru the dry run
// migration, we'll only exit the second time here once the
// remote instance has had a time to migrate as well.
case err == channeldb.ErrDryRunMigrationOK:
ltndLog.Infof("Local DB dry run migration successful")
case err != nil:
err := fmt.Errorf("unable to open local channeldb: %v", err)
ltndLog.Error(err)
return nil, nil, err
}
closeFuncs = append(closeFuncs, func() {
dbs.graphDB.Close()
})
ltndLog.Infof("Opening replicated database instance...")
dbs.chanStateDB, err = channeldb.CreateWithBackend(
databaseBackends.ChanStateDB,
channeldb.OptionDryRunMigration(cfg.DryRunMigration),
channeldb.OptionSetBatchCommitInterval(cfg.DB.BatchCommitInterval),
)
switch {
case err == channeldb.ErrDryRunMigrationOK:
return nil, nil, err
case err != nil:
dbs.graphDB.Close()
err := fmt.Errorf("unable to open remote channeldb: %v", err)
ltndLog.Error(err)
return nil, nil, err
}
closeFuncs = append(closeFuncs, func() {
dbs.chanStateDB.Close()
})
// With the full remote mode we made sure both the graph and channel
// state DB point to the same local or remote DB and the same namespace
// within that DB.
dbs := &databaseInstances{
heightHintDB: databaseBackends.HeightHintDB,
}
cleanUp := func() {
// We can just close the returned close functions directly. Even
// if we decorate the channel DB with an additional struct, its
// close function still just points to the kvdb backend.
for name, closeFunc := range databaseBackends.CloseFuncs {
if err := closeFunc(); err != nil {
ltndLog.Errorf("Error closing %s "+
"database: %v", name, err)
}
}
}
if databaseBackends.Remote {
ltndLog.Infof("Using remote %v database! Creating "+
"graph and channel state DB instances", cfg.DB.Backend)
} else {
ltndLog.Infof("Creating local graph and channel state DB " +
"instances")
}
// Otherwise, we'll open two instances, one for the state we only need
// locally, and the other for things we want to ensure are replicated.
dbs.graphDB, err = channeldb.CreateWithBackend(
databaseBackends.GraphDB,
channeldb.OptionSetRejectCacheSize(cfg.Caches.RejectCacheSize),
channeldb.OptionSetChannelCacheSize(cfg.Caches.ChannelCacheSize),
channeldb.OptionSetBatchCommitInterval(cfg.DB.BatchCommitInterval),
channeldb.OptionDryRunMigration(cfg.DryRunMigration),
)
switch {
// Give the DB a chance to dry run the migration. Since we know that
// both the channel state and graph DBs are still always behind the same
// backend, we know this would be applied to both of those DBs.
case err == channeldb.ErrDryRunMigrationOK:
ltndLog.Infof("Graph DB dry run migration successful")
return nil, nil, err
case err != nil:
cleanUp()
err := fmt.Errorf("unable to open graph DB: %v", err)
ltndLog.Error(err)
return nil, nil, err
}
// For now, we don't _actually_ split the graph and channel state DBs on
// the code level. Since they both are based upon the *channeldb.DB
// struct it will require more refactoring to fully separate them. With
// the full remote mode we at least know for now that they both point to
// the same DB backend (and also namespace within that) so we only need
// to apply any migration once.
//
// TODO(guggero): Once the full separation of anything graph related
// from the channeldb.DB is complete, the decorated instance of the
// channel state DB should be created here individually instead of just
// using the same struct (and DB backend) instance.
dbs.chanStateDB = dbs.graphDB
openTime := time.Since(startOpenTime)
ltndLog.Infof("Database now open (time_to_open=%v)!", openTime)
cleanUp := func() {
for _, closeFunc := range closeFuncs {
closeFunc()
}
}
ltndLog.Infof("Database(s) now open (time_to_open=%v)!", openTime)
return dbs, cleanUp, nil
}