From c1b348135410f5cd017334ed8a783158128d752c Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 5 Aug 2022 03:34:42 +0800 Subject: [PATCH] lntemp+itest: refactor `testEtcdFailover` --- lntemp/harness.go | 66 ++++++++ lntemp/harness_assertion.go | 20 ++- lntemp/harness_node_manager.go | 6 +- lntemp/node/config.go | 31 ++++ lntemp/node/harness_node.go | 40 ++++- lntest/itest/list_on_test.go | 4 + lntest/itest/lnd_etcd_failover_test.go | 157 ++++++------------ .../itest/lnd_no_etcd_dummy_failover_test.go | 6 +- lntest/itest/lnd_test_list_on_test.go | 4 - 9 files changed, 202 insertions(+), 132 deletions(-) diff --git a/lntemp/harness.go b/lntemp/harness.go index d69cc330a..d014aec07 100644 --- a/lntemp/harness.go +++ b/lntemp/harness.go @@ -14,6 +14,7 @@ import ( "github.com/btcsuite/btcd/txscript" "github.com/btcsuite/btcd/wire" "github.com/go-errors/errors" + "github.com/lightningnetwork/lnd/kvdb/etcd" "github.com/lightningnetwork/lnd/lnrpc" "github.com/lightningnetwork/lnd/lnrpc/routerrpc" "github.com/lightningnetwork/lnd/lnrpc/walletrpc" @@ -155,6 +156,16 @@ func (h *HarnessTest) ChainBackendName() string { return h.manager.chainBackend.Name() } +// Context returns the run context used in this test. Usaually it should be +// managed by the test itself otherwise undefined behaviors will occur. It can +// be used, however, when a test needs to have its own context being managed +// differently. In that case, instead of using a background context, the run +// context should be used such that the test context scope can be fully +// controlled. +func (h *HarnessTest) Context() context.Context { + return h.runCtx +} + // SetUp starts the initial seeder nodes within the test harness. The initial // node's wallets will be funded wallets with 10x10 BTC outputs each. func (h *HarnessTest) SetupStandbyNodes() { @@ -587,6 +598,61 @@ func (h *HarnessTest) RestoreNodeWithSeed(name string, extraArgs []string, return node } +// NewNodeEtcd starts a new node with seed that'll use an external etcd +// database as its storage. The passed cluster flag indicates that we'd like +// the node to join the cluster leader election. We won't wait until RPC is +// available (this is useful when the node is not expected to become the leader +// right away). +func (h *HarnessTest) NewNodeEtcd(name string, etcdCfg *etcd.Config, + password []byte, cluster bool, + leaderSessionTTL int) *node.HarnessNode { + + // We don't want to use the embedded etcd instance. + h.manager.dbBackend = lntest.BackendBbolt + + extraArgs := node.ExtraArgsEtcd( + etcdCfg, name, cluster, leaderSessionTTL, + ) + node, err := h.manager.newNode(h.T, name, extraArgs, password, true) + require.NoError(h, err, "failed to create new node with etcd") + + // Start the node daemon only. + err = node.StartLndCmd(h.runCtx) + require.NoError(h, err, "failed to start node %s", node.Name()) + + return node +} + +// NewNodeWithSeedEtcd starts a new node with seed that'll use an external etcd +// database as its storage. The passed cluster flag indicates that we'd like +// the node to join the cluster leader election. +func (h *HarnessTest) NewNodeWithSeedEtcd(name string, etcdCfg *etcd.Config, + password []byte, entropy []byte, statelessInit, cluster bool, + leaderSessionTTL int) (*node.HarnessNode, []string, []byte) { + + // We don't want to use the embedded etcd instance. + h.manager.dbBackend = lntest.BackendBbolt + + // Create a request to generate a new aezeed. The new seed will have + // the same password as the internal wallet. + req := &lnrpc.GenSeedRequest{ + AezeedPassphrase: password, + SeedEntropy: nil, + } + + extraArgs := node.ExtraArgsEtcd( + etcdCfg, name, cluster, leaderSessionTTL, + ) + + return h.newNodeWithSeed(name, extraArgs, req, statelessInit) +} + +// KillNode kills the node (but won't wait for the node process to stop). +func (h *HarnessTest) KillNode(hn *node.HarnessNode) { + require.NoErrorf(h, hn.Kill(), "%s: kill got error", hn.Name()) + delete(h.manager.activeNodes, hn.Cfg.NodeID) +} + // SetFeeEstimate sets a fee rate to be returned from fee estimator. // // NOTE: this method will set the fee rate for a conf target of 1, which is the diff --git a/lntemp/harness_assertion.go b/lntemp/harness_assertion.go index 690fbff44..61e79613a 100644 --- a/lntemp/harness_assertion.go +++ b/lntemp/harness_assertion.go @@ -884,7 +884,11 @@ func (h *HarnessTest) assertPaymentStatusWithTimeout(stream rpc.PaymentClient, err := wait.NoError(func() error { // Consume one message. This will raise an error if the message // is not received within DefaultTimeout. - payment := h.ReceivePaymentUpdate(stream) + payment, err := h.ReceivePaymentUpdate(stream) + if err != nil { + return fmt.Errorf("received error from payment "+ + "stream: %s", err) + } // Return if the desired payment state is reached. if payment.Status == status { @@ -895,8 +899,8 @@ func (h *HarnessTest) assertPaymentStatusWithTimeout(stream rpc.PaymentClient, // Return the err so that it can be used for debugging when // timeout is reached. - return fmt.Errorf("payment status, got %v, want %v", - payment.Status, status) + return fmt.Errorf("payment %v status, got %v, want %v", + payment.PaymentHash, payment.Status, status) }, timeout) require.NoError(h, err, "timeout while waiting payment") @@ -907,7 +911,7 @@ func (h *HarnessTest) assertPaymentStatusWithTimeout(stream rpc.PaymentClient, // ReceivePaymentUpdate waits until a message is received on the payment client // stream or the timeout is reached. func (h *HarnessTest) ReceivePaymentUpdate( - stream rpc.PaymentClient) *lnrpc.Payment { + stream rpc.PaymentClient) (*lnrpc.Payment, error) { chanMsg := make(chan *lnrpc.Payment, 1) errChan := make(chan error, 1) @@ -926,16 +930,14 @@ func (h *HarnessTest) ReceivePaymentUpdate( select { case <-time.After(DefaultTimeout): require.Fail(h, "timeout", "timeout waiting for payment update") + return nil, nil case err := <-errChan: - require.Failf(h, "payment stream", - "received err from payment stream: %v", err) + return nil, err case updateMsg := <-chanMsg: - return updateMsg + return updateMsg, nil } - - return nil } // AssertInvoiceSettled asserts a given invoice specified by its payment diff --git a/lntemp/harness_node_manager.go b/lntemp/harness_node_manager.go index 2b5f87e08..0b6aa038c 100644 --- a/lntemp/harness_node_manager.go +++ b/lntemp/harness_node_manager.go @@ -71,8 +71,7 @@ func (nm *nodeManager) nextNodeID() uint32 { // node can be used immediately. Otherwise, the node will require an additional // initialization phase where the wallet is either created or restored. func (nm *nodeManager) newNode(t *testing.T, name string, extraArgs []string, - password []byte, useSeed bool, - opts ...node.Option) (*node.HarnessNode, error) { + password []byte, useSeed bool) (*node.HarnessNode, error) { cfg := &node.BaseNodeConfig{ Name: name, @@ -87,9 +86,6 @@ func (nm *nodeManager) newNode(t *testing.T, name string, extraArgs []string, NetParams: harnessNetParams, HasSeed: useSeed, } - for _, opt := range opts { - opt(cfg) - } node, err := node.NewHarnessNode(t, cfg) if err != nil { diff --git a/lntemp/node/config.go b/lntemp/node/config.go index f9798ede4..84e1d9a71 100644 --- a/lntemp/node/config.go +++ b/lntemp/node/config.go @@ -7,6 +7,7 @@ import ( "github.com/btcsuite/btcd/chaincfg" "github.com/lightningnetwork/lnd/chanbackup" + "github.com/lightningnetwork/lnd/kvdb/etcd" "github.com/lightningnetwork/lnd/lntest" ) @@ -232,3 +233,33 @@ func (cfg *BaseNodeConfig) GenArgs() []string { return args } + +// ExtraArgsEtcd returns extra args for configuring LND to use an external etcd +// database (for remote channel DB and wallet DB). +func ExtraArgsEtcd(etcdCfg *etcd.Config, name string, cluster bool, + leaderSessionTTL int) []string { + + extraArgs := []string{ + "--db.backend=etcd", + fmt.Sprintf("--db.etcd.host=%v", etcdCfg.Host), + fmt.Sprintf("--db.etcd.user=%v", etcdCfg.User), + fmt.Sprintf("--db.etcd.pass=%v", etcdCfg.Pass), + fmt.Sprintf("--db.etcd.namespace=%v", etcdCfg.Namespace), + } + + if etcdCfg.InsecureSkipVerify { + extraArgs = append(extraArgs, "--db.etcd.insecure_skip_verify") + } + + if cluster { + clusterArgs := []string{ + "--cluster.enable-leader-election", + fmt.Sprintf("--cluster.id=%v", name), + fmt.Sprintf("--cluster.leader-session-ttl=%v", + leaderSessionTTL), + } + extraArgs = append(extraArgs, clusterArgs...) + } + + return extraArgs +} diff --git a/lntemp/node/harness_node.go b/lntemp/node/harness_node.go index 0cb8f2341..f8be79dfc 100644 --- a/lntemp/node/harness_node.go +++ b/lntemp/node/harness_node.go @@ -215,6 +215,33 @@ func (hn *HarnessNode) WaitUntilServerActive() error { }) } +// WaitUntilLeader attempts to finish the start procedure by initiating an RPC +// connection and setting up the wallet unlocker client. This is needed when +// a node that has recently been started was waiting to become the leader and +// we're at the point when we expect that it is the leader now (awaiting +// unlock). +func (hn *HarnessNode) WaitUntilLeader(timeout time.Duration) error { + var ( + conn *grpc.ClientConn + connErr error + ) + + if err := wait.NoError(func() error { + conn, connErr = hn.ConnectRPCWithMacaroon(nil) + return connErr + }, timeout); err != nil { + return err + } + + // Since the conn is not authed, only the `WalletUnlocker` and `State` + // clients can be inited from this conn. + hn.conn = conn + hn.RPC = rpc.NewHarnessRPC(hn.runCtx, hn.T, conn, hn.Name()) + + // Wait till the server is starting. + return hn.WaitUntilStarted() +} + // Unlock attempts to unlock the wallet of the target HarnessNode. This method // should be called after the restart of a HarnessNode that was created with a // seed+password. Once this method returns, the HarnessNode will be ready to @@ -399,7 +426,7 @@ func (hn *HarnessNode) Start(ctxt context.Context) error { conn, err := hn.ConnectRPC() if err != nil { err = fmt.Errorf("ConnectRPC err: %w", err) - cmdErr := hn.kill() + cmdErr := hn.Kill() if cmdErr != nil { err = fmt.Errorf("kill process got err: %w: %v", cmdErr, err) @@ -453,6 +480,11 @@ func (hn *HarnessNode) InitNode(macBytes []byte) error { // Init all the RPC clients. hn.InitRPCClients(conn) + // Wait till the server is starting. + if err := hn.WaitUntilStarted(); err != nil { + return fmt.Errorf("waiting for start got: %w", err) + } + return hn.initLightningClient() } @@ -670,7 +702,7 @@ func (hn *HarnessNode) Stop() error { // If the rpc clients are not initiated, we'd kill the process // manually. hn.printErrf("found nil RPC clients") - if err := hn.kill(); err != nil { + if err := hn.Kill(); err != nil { return fmt.Errorf("killing process got: %v", err) } } @@ -719,8 +751,8 @@ func (hn *HarnessNode) Shutdown() error { return nil } -// kill kills the lnd process. -func (hn *HarnessNode) kill() error { +// Kill kills the lnd process. +func (hn *HarnessNode) Kill() error { return hn.cmd.Process.Kill() } diff --git a/lntest/itest/list_on_test.go b/lntest/itest/list_on_test.go index 91e808e5e..55fee1f16 100644 --- a/lntest/itest/list_on_test.go +++ b/lntest/itest/list_on_test.go @@ -231,4 +231,8 @@ var allTestCasesTemp = []*lntemp.TestCase{ Name: "neutrino kit", TestFunc: testNeutrino, }, + { + Name: "etcd failover", + TestFunc: testEtcdFailover, + }, } diff --git a/lntest/itest/lnd_etcd_failover_test.go b/lntest/itest/lnd_etcd_failover_test.go index 4de2d4490..90bd5fbbe 100644 --- a/lntest/itest/lnd_etcd_failover_test.go +++ b/lntest/itest/lnd_etcd_failover_test.go @@ -4,7 +4,6 @@ package itest import ( - "context" "testing" "time" @@ -13,27 +12,24 @@ import ( "github.com/lightningnetwork/lnd/kvdb" "github.com/lightningnetwork/lnd/lncfg" "github.com/lightningnetwork/lnd/lnrpc" - "github.com/lightningnetwork/lnd/lnrpc/routerrpc" + "github.com/lightningnetwork/lnd/lntemp" "github.com/lightningnetwork/lnd/lntest" + "github.com/stretchr/testify/require" ) -func assertLeader(ht *harnessTest, observer cluster.LeaderElector, +func assertLeader(ht *lntemp.HarnessTest, observer cluster.LeaderElector, expected string) { - leader, err := observer.Leader(context.Background()) - if err != nil { - ht.Fatalf("Unable to query leader: %v", err) - } - - if leader != expected { - ht.Fatalf("Leader should be '%v', got: '%v'", expected, leader) - } + leader, err := observer.Leader(ht.Context()) + require.NoError(ht, err, "Unable to query leader") + require.Equalf(ht, expected, leader, + "Leader should be '%v', got: '%v'", expected, leader) } // testEtcdFailover tests that in a cluster setup where two LND nodes form a // single cluster (sharing the same identity) one can hand over the leader role // to the other (failing over after graceful shutdown or forceful abort). -func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) { +func testEtcdFailover(ht *lntemp.HarnessTest) { testCases := []struct { name string kill bool @@ -48,148 +44,97 @@ func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) { for _, test := range testCases { test := test - ht.t.Run(test.name, func(t1 *testing.T) { - ht1 := newHarnessTest(t1, ht.lndHarness) - ht1.RunTestCase(&testCase{ - name: test.name, - test: func(_ *lntest.NetworkHarness, - tt *harnessTest) { - - testEtcdFailoverCase(net, tt, test.kill) - }, - }) + success := ht.Run(test.name, func(t1 *testing.T) { + st := ht.Subtest(t1) + testEtcdFailoverCase(st, test.kill) }) + if !success { + return + } } } -func testEtcdFailoverCase(net *lntest.NetworkHarness, ht *harnessTest, - kill bool) { - - ctxb := context.Background() - +func testEtcdFailoverCase(ht *lntemp.HarnessTest, kill bool) { etcdCfg, cleanup, err := kvdb.StartEtcdTestBackend( - ht.t.TempDir(), uint16(lntest.NextAvailablePort()), + ht.T.TempDir(), uint16(lntest.NextAvailablePort()), uint16(lntest.NextAvailablePort()), "", ) - if err != nil { - ht.Fatalf("Failed to start etcd instance: %v", err) - } + require.NoError(ht, err, "Failed to start etcd instance") defer cleanup() + alice := ht.NewNode("Alice", nil) + + // Give Alice some coins to fund the channel. + ht.FundCoins(btcutil.SatoshiPerBitcoin, alice) + // Make leader election session TTL 5 sec to make the test run fast. const leaderSessionTTL = 5 observer, err := cluster.MakeLeaderElector( - ctxb, cluster.EtcdLeaderElector, "observer", + ht.Context(), cluster.EtcdLeaderElector, "observer", lncfg.DefaultEtcdElectionPrefix, leaderSessionTTL, etcdCfg, ) - if err != nil { - ht.Fatalf("Cannot start election observer: %v", err) - } + require.NoError(ht, err, "Cannot start election observer") password := []byte("the quick brown fox jumps the lazy dog") entropy := [16]byte{1, 2, 3} stateless := false cluster := true - carol1, _, _, err := net.NewNodeWithSeedEtcd( + carol1, _, _ := ht.NewNodeWithSeedEtcd( "Carol-1", etcdCfg, password, entropy[:], stateless, cluster, leaderSessionTTL, ) - if err != nil { - ht.Fatalf("unable to start Carol-1: %v", err) - } - - ctxt, _ := context.WithTimeout(ctxb, defaultTimeout) - info1, err := carol1.GetInfo(ctxt, &lnrpc.GetInfoRequest{}) - if err != nil { - ht.Fatalf("unable to get info: %v", err) - } - - net.ConnectNodes(ht.t, carol1, net.Alice) + info1 := carol1.RPC.GetInfo() + ht.ConnectNodes(carol1, alice) // Open a channel with 100k satoshis between Carol and Alice with Alice // being the sole funder of the channel. - chanAmt := btcutil.Amount(100000) - _ = openChannelAndAssert( - ht, net, net.Alice, carol1, - lntest.OpenChannelParams{ - Amt: chanAmt, - }, - ) + chanAmt := btcutil.Amount(100_000) + ht.OpenChannel(alice, carol1, lntemp.OpenChannelParams{Amt: chanAmt}) // At this point Carol-1 is the elected leader, while Carol-2 will wait // to become the leader when Carol-1 stops. - carol2, err := net.NewNodeEtcd( - "Carol-2", etcdCfg, password, cluster, false, leaderSessionTTL, + carol2 := ht.NewNodeEtcd( + "Carol-2", etcdCfg, password, cluster, leaderSessionTTL, ) - if err != nil { - ht.Fatalf("Unable to start Carol-2: %v", err) - } - assertLeader(ht, observer, "Carol-1") amt := btcutil.Amount(1000) - payReqs, _, _, err := createPayReqs(carol1, amt, 2) - if err != nil { - ht.Fatalf("Carol-2 is unable to create payment requests: %v", - err) - } - sendAndAssertSuccess( - ht, net.Alice, &routerrpc.SendPaymentRequest{ - PaymentRequest: payReqs[0], - TimeoutSeconds: 60, - FeeLimitSat: noFeeLimitMsat, - }, - ) + payReqs, _, _ := ht.CreatePayReqs(carol1, amt, 2) + ht.CompletePaymentRequests(alice, []string{payReqs[0]}) // Shut down or kill Carol-1 and wait for Carol-2 to become the leader. failoverTimeout := time.Duration(2*leaderSessionTTL) * time.Second if kill { - err = net.KillNode(carol1) - if err != nil { - ht.Fatalf("Can't kill Carol-1: %v", err) - } + ht.KillNode(carol1) } else { - shutdownAndAssert(net, ht, carol1) + ht.Shutdown(carol1) } err = carol2.WaitUntilLeader(failoverTimeout) - if err != nil { - ht.Fatalf("Waiting for Carol-2 to become the leader failed: %v", - err) - } - + require.NoError(ht, err, "Waiting for Carol-2 to become the leader "+ + "failed") assertLeader(ht, observer, "Carol-2") - err = carol2.Unlock(&lnrpc.UnlockWalletRequest{ - WalletPassword: password, - }) - if err != nil { - ht.Fatalf("Unlocking Carol-2 was not successful: %v", err) - } - - ctxt, _ = context.WithTimeout(ctxb, defaultTimeout) + req := &lnrpc.UnlockWalletRequest{WalletPassword: password} + err = carol2.Unlock(req) + require.NoError(ht, err, "Unlocking Carol-2 failed") // Make sure Carol-1 and Carol-2 have the same identity. - info2, err := carol2.GetInfo(ctxt, &lnrpc.GetInfoRequest{}) - if err != nil { - ht.Fatalf("unable to get info: %v", err) - } - if info1.IdentityPubkey != info2.IdentityPubkey { - ht.Fatalf("Carol-1 and Carol-2 must have the same identity: "+ - "%v vs %v", info1.IdentityPubkey, info2.IdentityPubkey) - } + info2 := carol2.RPC.GetInfo() + require.Equal(ht, info1.IdentityPubkey, info2.IdentityPubkey, + "Carol-1 and Carol-2 must have the same identity") + + // Make sure the nodes are connected before moving forward. Otherwise + // we may get a link not found error. + ht.AssertConnected(alice, carol2) // Now let Alice pay the second invoice but this time we expect Carol-2 // to receive the payment. - sendAndAssertSuccess( - ht, net.Alice, &routerrpc.SendPaymentRequest{ - PaymentRequest: payReqs[1], - TimeoutSeconds: 60, - FeeLimitSat: noFeeLimitMsat, - }, - ) + ht.CompletePaymentRequests(alice, []string{payReqs[1]}) - shutdownAndAssert(net, ht, carol2) + // Manually shutdown the node as it will mess up with our cleanup + // process. + ht.Shutdown(carol2) } diff --git a/lntest/itest/lnd_no_etcd_dummy_failover_test.go b/lntest/itest/lnd_no_etcd_dummy_failover_test.go index c2a9a188b..721b60cf6 100644 --- a/lntest/itest/lnd_no_etcd_dummy_failover_test.go +++ b/lntest/itest/lnd_no_etcd_dummy_failover_test.go @@ -3,10 +3,8 @@ package itest -import ( - "github.com/lightningnetwork/lnd/lntest" -) +import "github.com/lightningnetwork/lnd/lntemp" // testEtcdFailover is an empty itest when LND is not compiled with etcd // support. -func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) {} +func testEtcdFailover(ht *lntemp.HarnessTest) {} diff --git a/lntest/itest/lnd_test_list_on_test.go b/lntest/itest/lnd_test_list_on_test.go index fe56fcf0d..b7aaf9c69 100644 --- a/lntest/itest/lnd_test_list_on_test.go +++ b/lntest/itest/lnd_test_list_on_test.go @@ -210,10 +210,6 @@ var allTestCases = []*testCase{ name: "wallet import pubkey", test: testWalletImportPubKey, }, - { - name: "etcd_failover", - test: testEtcdFailover, - }, { name: "max htlc pathfind", test: testMaxHtlcPathfind,