lntemp+itest: refactor testEtcdFailover

This commit is contained in:
yyforyongyu 2022-08-05 03:34:42 +08:00
parent e953034b83
commit c1b3481354
No known key found for this signature in database
GPG key ID: 9BCD95C4FF296868
9 changed files with 202 additions and 132 deletions

View file

@ -14,6 +14,7 @@ import (
"github.com/btcsuite/btcd/txscript"
"github.com/btcsuite/btcd/wire"
"github.com/go-errors/errors"
"github.com/lightningnetwork/lnd/kvdb/etcd"
"github.com/lightningnetwork/lnd/lnrpc"
"github.com/lightningnetwork/lnd/lnrpc/routerrpc"
"github.com/lightningnetwork/lnd/lnrpc/walletrpc"
@ -155,6 +156,16 @@ func (h *HarnessTest) ChainBackendName() string {
return h.manager.chainBackend.Name()
}
// Context returns the run context used in this test. Usaually it should be
// managed by the test itself otherwise undefined behaviors will occur. It can
// be used, however, when a test needs to have its own context being managed
// differently. In that case, instead of using a background context, the run
// context should be used such that the test context scope can be fully
// controlled.
func (h *HarnessTest) Context() context.Context {
return h.runCtx
}
// SetUp starts the initial seeder nodes within the test harness. The initial
// node's wallets will be funded wallets with 10x10 BTC outputs each.
func (h *HarnessTest) SetupStandbyNodes() {
@ -587,6 +598,61 @@ func (h *HarnessTest) RestoreNodeWithSeed(name string, extraArgs []string,
return node
}
// NewNodeEtcd starts a new node with seed that'll use an external etcd
// database as its storage. The passed cluster flag indicates that we'd like
// the node to join the cluster leader election. We won't wait until RPC is
// available (this is useful when the node is not expected to become the leader
// right away).
func (h *HarnessTest) NewNodeEtcd(name string, etcdCfg *etcd.Config,
password []byte, cluster bool,
leaderSessionTTL int) *node.HarnessNode {
// We don't want to use the embedded etcd instance.
h.manager.dbBackend = lntest.BackendBbolt
extraArgs := node.ExtraArgsEtcd(
etcdCfg, name, cluster, leaderSessionTTL,
)
node, err := h.manager.newNode(h.T, name, extraArgs, password, true)
require.NoError(h, err, "failed to create new node with etcd")
// Start the node daemon only.
err = node.StartLndCmd(h.runCtx)
require.NoError(h, err, "failed to start node %s", node.Name())
return node
}
// NewNodeWithSeedEtcd starts a new node with seed that'll use an external etcd
// database as its storage. The passed cluster flag indicates that we'd like
// the node to join the cluster leader election.
func (h *HarnessTest) NewNodeWithSeedEtcd(name string, etcdCfg *etcd.Config,
password []byte, entropy []byte, statelessInit, cluster bool,
leaderSessionTTL int) (*node.HarnessNode, []string, []byte) {
// We don't want to use the embedded etcd instance.
h.manager.dbBackend = lntest.BackendBbolt
// Create a request to generate a new aezeed. The new seed will have
// the same password as the internal wallet.
req := &lnrpc.GenSeedRequest{
AezeedPassphrase: password,
SeedEntropy: nil,
}
extraArgs := node.ExtraArgsEtcd(
etcdCfg, name, cluster, leaderSessionTTL,
)
return h.newNodeWithSeed(name, extraArgs, req, statelessInit)
}
// KillNode kills the node (but won't wait for the node process to stop).
func (h *HarnessTest) KillNode(hn *node.HarnessNode) {
require.NoErrorf(h, hn.Kill(), "%s: kill got error", hn.Name())
delete(h.manager.activeNodes, hn.Cfg.NodeID)
}
// SetFeeEstimate sets a fee rate to be returned from fee estimator.
//
// NOTE: this method will set the fee rate for a conf target of 1, which is the

View file

@ -884,7 +884,11 @@ func (h *HarnessTest) assertPaymentStatusWithTimeout(stream rpc.PaymentClient,
err := wait.NoError(func() error {
// Consume one message. This will raise an error if the message
// is not received within DefaultTimeout.
payment := h.ReceivePaymentUpdate(stream)
payment, err := h.ReceivePaymentUpdate(stream)
if err != nil {
return fmt.Errorf("received error from payment "+
"stream: %s", err)
}
// Return if the desired payment state is reached.
if payment.Status == status {
@ -895,8 +899,8 @@ func (h *HarnessTest) assertPaymentStatusWithTimeout(stream rpc.PaymentClient,
// Return the err so that it can be used for debugging when
// timeout is reached.
return fmt.Errorf("payment status, got %v, want %v",
payment.Status, status)
return fmt.Errorf("payment %v status, got %v, want %v",
payment.PaymentHash, payment.Status, status)
}, timeout)
require.NoError(h, err, "timeout while waiting payment")
@ -907,7 +911,7 @@ func (h *HarnessTest) assertPaymentStatusWithTimeout(stream rpc.PaymentClient,
// ReceivePaymentUpdate waits until a message is received on the payment client
// stream or the timeout is reached.
func (h *HarnessTest) ReceivePaymentUpdate(
stream rpc.PaymentClient) *lnrpc.Payment {
stream rpc.PaymentClient) (*lnrpc.Payment, error) {
chanMsg := make(chan *lnrpc.Payment, 1)
errChan := make(chan error, 1)
@ -926,16 +930,14 @@ func (h *HarnessTest) ReceivePaymentUpdate(
select {
case <-time.After(DefaultTimeout):
require.Fail(h, "timeout", "timeout waiting for payment update")
return nil, nil
case err := <-errChan:
require.Failf(h, "payment stream",
"received err from payment stream: %v", err)
return nil, err
case updateMsg := <-chanMsg:
return updateMsg
return updateMsg, nil
}
return nil
}
// AssertInvoiceSettled asserts a given invoice specified by its payment

View file

@ -71,8 +71,7 @@ func (nm *nodeManager) nextNodeID() uint32 {
// node can be used immediately. Otherwise, the node will require an additional
// initialization phase where the wallet is either created or restored.
func (nm *nodeManager) newNode(t *testing.T, name string, extraArgs []string,
password []byte, useSeed bool,
opts ...node.Option) (*node.HarnessNode, error) {
password []byte, useSeed bool) (*node.HarnessNode, error) {
cfg := &node.BaseNodeConfig{
Name: name,
@ -87,9 +86,6 @@ func (nm *nodeManager) newNode(t *testing.T, name string, extraArgs []string,
NetParams: harnessNetParams,
HasSeed: useSeed,
}
for _, opt := range opts {
opt(cfg)
}
node, err := node.NewHarnessNode(t, cfg)
if err != nil {

View file

@ -7,6 +7,7 @@ import (
"github.com/btcsuite/btcd/chaincfg"
"github.com/lightningnetwork/lnd/chanbackup"
"github.com/lightningnetwork/lnd/kvdb/etcd"
"github.com/lightningnetwork/lnd/lntest"
)
@ -232,3 +233,33 @@ func (cfg *BaseNodeConfig) GenArgs() []string {
return args
}
// ExtraArgsEtcd returns extra args for configuring LND to use an external etcd
// database (for remote channel DB and wallet DB).
func ExtraArgsEtcd(etcdCfg *etcd.Config, name string, cluster bool,
leaderSessionTTL int) []string {
extraArgs := []string{
"--db.backend=etcd",
fmt.Sprintf("--db.etcd.host=%v", etcdCfg.Host),
fmt.Sprintf("--db.etcd.user=%v", etcdCfg.User),
fmt.Sprintf("--db.etcd.pass=%v", etcdCfg.Pass),
fmt.Sprintf("--db.etcd.namespace=%v", etcdCfg.Namespace),
}
if etcdCfg.InsecureSkipVerify {
extraArgs = append(extraArgs, "--db.etcd.insecure_skip_verify")
}
if cluster {
clusterArgs := []string{
"--cluster.enable-leader-election",
fmt.Sprintf("--cluster.id=%v", name),
fmt.Sprintf("--cluster.leader-session-ttl=%v",
leaderSessionTTL),
}
extraArgs = append(extraArgs, clusterArgs...)
}
return extraArgs
}

View file

@ -215,6 +215,33 @@ func (hn *HarnessNode) WaitUntilServerActive() error {
})
}
// WaitUntilLeader attempts to finish the start procedure by initiating an RPC
// connection and setting up the wallet unlocker client. This is needed when
// a node that has recently been started was waiting to become the leader and
// we're at the point when we expect that it is the leader now (awaiting
// unlock).
func (hn *HarnessNode) WaitUntilLeader(timeout time.Duration) error {
var (
conn *grpc.ClientConn
connErr error
)
if err := wait.NoError(func() error {
conn, connErr = hn.ConnectRPCWithMacaroon(nil)
return connErr
}, timeout); err != nil {
return err
}
// Since the conn is not authed, only the `WalletUnlocker` and `State`
// clients can be inited from this conn.
hn.conn = conn
hn.RPC = rpc.NewHarnessRPC(hn.runCtx, hn.T, conn, hn.Name())
// Wait till the server is starting.
return hn.WaitUntilStarted()
}
// Unlock attempts to unlock the wallet of the target HarnessNode. This method
// should be called after the restart of a HarnessNode that was created with a
// seed+password. Once this method returns, the HarnessNode will be ready to
@ -399,7 +426,7 @@ func (hn *HarnessNode) Start(ctxt context.Context) error {
conn, err := hn.ConnectRPC()
if err != nil {
err = fmt.Errorf("ConnectRPC err: %w", err)
cmdErr := hn.kill()
cmdErr := hn.Kill()
if cmdErr != nil {
err = fmt.Errorf("kill process got err: %w: %v",
cmdErr, err)
@ -453,6 +480,11 @@ func (hn *HarnessNode) InitNode(macBytes []byte) error {
// Init all the RPC clients.
hn.InitRPCClients(conn)
// Wait till the server is starting.
if err := hn.WaitUntilStarted(); err != nil {
return fmt.Errorf("waiting for start got: %w", err)
}
return hn.initLightningClient()
}
@ -670,7 +702,7 @@ func (hn *HarnessNode) Stop() error {
// If the rpc clients are not initiated, we'd kill the process
// manually.
hn.printErrf("found nil RPC clients")
if err := hn.kill(); err != nil {
if err := hn.Kill(); err != nil {
return fmt.Errorf("killing process got: %v", err)
}
}
@ -719,8 +751,8 @@ func (hn *HarnessNode) Shutdown() error {
return nil
}
// kill kills the lnd process.
func (hn *HarnessNode) kill() error {
// Kill kills the lnd process.
func (hn *HarnessNode) Kill() error {
return hn.cmd.Process.Kill()
}

View file

@ -231,4 +231,8 @@ var allTestCasesTemp = []*lntemp.TestCase{
Name: "neutrino kit",
TestFunc: testNeutrino,
},
{
Name: "etcd failover",
TestFunc: testEtcdFailover,
},
}

View file

@ -4,7 +4,6 @@
package itest
import (
"context"
"testing"
"time"
@ -13,27 +12,24 @@ import (
"github.com/lightningnetwork/lnd/kvdb"
"github.com/lightningnetwork/lnd/lncfg"
"github.com/lightningnetwork/lnd/lnrpc"
"github.com/lightningnetwork/lnd/lnrpc/routerrpc"
"github.com/lightningnetwork/lnd/lntemp"
"github.com/lightningnetwork/lnd/lntest"
"github.com/stretchr/testify/require"
)
func assertLeader(ht *harnessTest, observer cluster.LeaderElector,
func assertLeader(ht *lntemp.HarnessTest, observer cluster.LeaderElector,
expected string) {
leader, err := observer.Leader(context.Background())
if err != nil {
ht.Fatalf("Unable to query leader: %v", err)
}
if leader != expected {
ht.Fatalf("Leader should be '%v', got: '%v'", expected, leader)
}
leader, err := observer.Leader(ht.Context())
require.NoError(ht, err, "Unable to query leader")
require.Equalf(ht, expected, leader,
"Leader should be '%v', got: '%v'", expected, leader)
}
// testEtcdFailover tests that in a cluster setup where two LND nodes form a
// single cluster (sharing the same identity) one can hand over the leader role
// to the other (failing over after graceful shutdown or forceful abort).
func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) {
func testEtcdFailover(ht *lntemp.HarnessTest) {
testCases := []struct {
name string
kill bool
@ -48,148 +44,97 @@ func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) {
for _, test := range testCases {
test := test
ht.t.Run(test.name, func(t1 *testing.T) {
ht1 := newHarnessTest(t1, ht.lndHarness)
ht1.RunTestCase(&testCase{
name: test.name,
test: func(_ *lntest.NetworkHarness,
tt *harnessTest) {
testEtcdFailoverCase(net, tt, test.kill)
},
})
success := ht.Run(test.name, func(t1 *testing.T) {
st := ht.Subtest(t1)
testEtcdFailoverCase(st, test.kill)
})
if !success {
return
}
}
}
func testEtcdFailoverCase(net *lntest.NetworkHarness, ht *harnessTest,
kill bool) {
ctxb := context.Background()
func testEtcdFailoverCase(ht *lntemp.HarnessTest, kill bool) {
etcdCfg, cleanup, err := kvdb.StartEtcdTestBackend(
ht.t.TempDir(), uint16(lntest.NextAvailablePort()),
ht.T.TempDir(), uint16(lntest.NextAvailablePort()),
uint16(lntest.NextAvailablePort()), "",
)
if err != nil {
ht.Fatalf("Failed to start etcd instance: %v", err)
}
require.NoError(ht, err, "Failed to start etcd instance")
defer cleanup()
alice := ht.NewNode("Alice", nil)
// Give Alice some coins to fund the channel.
ht.FundCoins(btcutil.SatoshiPerBitcoin, alice)
// Make leader election session TTL 5 sec to make the test run fast.
const leaderSessionTTL = 5
observer, err := cluster.MakeLeaderElector(
ctxb, cluster.EtcdLeaderElector, "observer",
ht.Context(), cluster.EtcdLeaderElector, "observer",
lncfg.DefaultEtcdElectionPrefix, leaderSessionTTL, etcdCfg,
)
if err != nil {
ht.Fatalf("Cannot start election observer: %v", err)
}
require.NoError(ht, err, "Cannot start election observer")
password := []byte("the quick brown fox jumps the lazy dog")
entropy := [16]byte{1, 2, 3}
stateless := false
cluster := true
carol1, _, _, err := net.NewNodeWithSeedEtcd(
carol1, _, _ := ht.NewNodeWithSeedEtcd(
"Carol-1", etcdCfg, password, entropy[:], stateless, cluster,
leaderSessionTTL,
)
if err != nil {
ht.Fatalf("unable to start Carol-1: %v", err)
}
ctxt, _ := context.WithTimeout(ctxb, defaultTimeout)
info1, err := carol1.GetInfo(ctxt, &lnrpc.GetInfoRequest{})
if err != nil {
ht.Fatalf("unable to get info: %v", err)
}
net.ConnectNodes(ht.t, carol1, net.Alice)
info1 := carol1.RPC.GetInfo()
ht.ConnectNodes(carol1, alice)
// Open a channel with 100k satoshis between Carol and Alice with Alice
// being the sole funder of the channel.
chanAmt := btcutil.Amount(100000)
_ = openChannelAndAssert(
ht, net, net.Alice, carol1,
lntest.OpenChannelParams{
Amt: chanAmt,
},
)
chanAmt := btcutil.Amount(100_000)
ht.OpenChannel(alice, carol1, lntemp.OpenChannelParams{Amt: chanAmt})
// At this point Carol-1 is the elected leader, while Carol-2 will wait
// to become the leader when Carol-1 stops.
carol2, err := net.NewNodeEtcd(
"Carol-2", etcdCfg, password, cluster, false, leaderSessionTTL,
carol2 := ht.NewNodeEtcd(
"Carol-2", etcdCfg, password, cluster, leaderSessionTTL,
)
if err != nil {
ht.Fatalf("Unable to start Carol-2: %v", err)
}
assertLeader(ht, observer, "Carol-1")
amt := btcutil.Amount(1000)
payReqs, _, _, err := createPayReqs(carol1, amt, 2)
if err != nil {
ht.Fatalf("Carol-2 is unable to create payment requests: %v",
err)
}
sendAndAssertSuccess(
ht, net.Alice, &routerrpc.SendPaymentRequest{
PaymentRequest: payReqs[0],
TimeoutSeconds: 60,
FeeLimitSat: noFeeLimitMsat,
},
)
payReqs, _, _ := ht.CreatePayReqs(carol1, amt, 2)
ht.CompletePaymentRequests(alice, []string{payReqs[0]})
// Shut down or kill Carol-1 and wait for Carol-2 to become the leader.
failoverTimeout := time.Duration(2*leaderSessionTTL) * time.Second
if kill {
err = net.KillNode(carol1)
if err != nil {
ht.Fatalf("Can't kill Carol-1: %v", err)
}
ht.KillNode(carol1)
} else {
shutdownAndAssert(net, ht, carol1)
ht.Shutdown(carol1)
}
err = carol2.WaitUntilLeader(failoverTimeout)
if err != nil {
ht.Fatalf("Waiting for Carol-2 to become the leader failed: %v",
err)
}
require.NoError(ht, err, "Waiting for Carol-2 to become the leader "+
"failed")
assertLeader(ht, observer, "Carol-2")
err = carol2.Unlock(&lnrpc.UnlockWalletRequest{
WalletPassword: password,
})
if err != nil {
ht.Fatalf("Unlocking Carol-2 was not successful: %v", err)
}
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
req := &lnrpc.UnlockWalletRequest{WalletPassword: password}
err = carol2.Unlock(req)
require.NoError(ht, err, "Unlocking Carol-2 failed")
// Make sure Carol-1 and Carol-2 have the same identity.
info2, err := carol2.GetInfo(ctxt, &lnrpc.GetInfoRequest{})
if err != nil {
ht.Fatalf("unable to get info: %v", err)
}
if info1.IdentityPubkey != info2.IdentityPubkey {
ht.Fatalf("Carol-1 and Carol-2 must have the same identity: "+
"%v vs %v", info1.IdentityPubkey, info2.IdentityPubkey)
}
info2 := carol2.RPC.GetInfo()
require.Equal(ht, info1.IdentityPubkey, info2.IdentityPubkey,
"Carol-1 and Carol-2 must have the same identity")
// Make sure the nodes are connected before moving forward. Otherwise
// we may get a link not found error.
ht.AssertConnected(alice, carol2)
// Now let Alice pay the second invoice but this time we expect Carol-2
// to receive the payment.
sendAndAssertSuccess(
ht, net.Alice, &routerrpc.SendPaymentRequest{
PaymentRequest: payReqs[1],
TimeoutSeconds: 60,
FeeLimitSat: noFeeLimitMsat,
},
)
ht.CompletePaymentRequests(alice, []string{payReqs[1]})
shutdownAndAssert(net, ht, carol2)
// Manually shutdown the node as it will mess up with our cleanup
// process.
ht.Shutdown(carol2)
}

View file

@ -3,10 +3,8 @@
package itest
import (
"github.com/lightningnetwork/lnd/lntest"
)
import "github.com/lightningnetwork/lnd/lntemp"
// testEtcdFailover is an empty itest when LND is not compiled with etcd
// support.
func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) {}
func testEtcdFailover(ht *lntemp.HarnessTest) {}

View file

@ -210,10 +210,6 @@ var allTestCases = []*testCase{
name: "wallet import pubkey",
test: testWalletImportPubKey,
},
{
name: "etcd_failover",
test: testEtcdFailover,
},
{
name: "max htlc pathfind",
test: testMaxHtlcPathfind,