lntemp+itest: refactor testEtcdFailover

2025-03-13 11:09:23 +01:00 · 2022-08-05 03:34:42 +08:00 · 2022-08-05 03:34:42 +08:00 · c1b3481354
commit c1b3481354
parent e953034b83
9 changed files with 202 additions and 132 deletions
--- a/lntemp/harness.go
+++ b/lntemp/harness.go
@ -14,6 +14,7 @@ import (
 	"github.com/btcsuite/btcd/txscript"
 	"github.com/btcsuite/btcd/wire"
 	"github.com/go-errors/errors"
+	"github.com/lightningnetwork/lnd/kvdb/etcd"
 	"github.com/lightningnetwork/lnd/lnrpc"
 	"github.com/lightningnetwork/lnd/lnrpc/routerrpc"
 	"github.com/lightningnetwork/lnd/lnrpc/walletrpc"
@ -155,6 +156,16 @@ func (h *HarnessTest) ChainBackendName() string {
 	return h.manager.chainBackend.Name()
 }

+// Context returns the run context used in this test. Usaually it should be
+// managed by the test itself otherwise undefined behaviors will occur. It can
+// be used, however, when a test needs to have its own context being managed
+// differently. In that case, instead of using a background context, the run
+// context should be used such that the test context scope can be fully
+// controlled.
+func (h *HarnessTest) Context() context.Context {
+	return h.runCtx
+}
+
 // SetUp starts the initial seeder nodes within the test harness. The initial
 // node's wallets will be funded wallets with 10x10 BTC outputs each.
 func (h *HarnessTest) SetupStandbyNodes() {
@ -587,6 +598,61 @@ func (h *HarnessTest) RestoreNodeWithSeed(name string, extraArgs []string,
 	return node
 }

+// NewNodeEtcd starts a new node with seed that'll use an external etcd
+// database as its storage. The passed cluster flag indicates that we'd like
+// the node to join the cluster leader election. We won't wait until RPC is
+// available (this is useful when the node is not expected to become the leader
+// right away).
+func (h *HarnessTest) NewNodeEtcd(name string, etcdCfg *etcd.Config,
+	password []byte, cluster bool,
+	leaderSessionTTL int) *node.HarnessNode {
+
+	// We don't want to use the embedded etcd instance.
+	h.manager.dbBackend = lntest.BackendBbolt
+
+	extraArgs := node.ExtraArgsEtcd(
+		etcdCfg, name, cluster, leaderSessionTTL,
+	)
+	node, err := h.manager.newNode(h.T, name, extraArgs, password, true)
+	require.NoError(h, err, "failed to create new node with etcd")
+
+	// Start the node daemon only.
+	err = node.StartLndCmd(h.runCtx)
+	require.NoError(h, err, "failed to start node %s", node.Name())
+
+	return node
+}
+
+// NewNodeWithSeedEtcd starts a new node with seed that'll use an external etcd
+// database as its storage. The passed cluster flag indicates that we'd like
+// the node to join the cluster leader election.
+func (h *HarnessTest) NewNodeWithSeedEtcd(name string, etcdCfg *etcd.Config,
+	password []byte, entropy []byte, statelessInit, cluster bool,
+	leaderSessionTTL int) (*node.HarnessNode, []string, []byte) {
+
+	// We don't want to use the embedded etcd instance.
+	h.manager.dbBackend = lntest.BackendBbolt
+
+	// Create a request to generate a new aezeed. The new seed will have
+	// the same password as the internal wallet.
+	req := &lnrpc.GenSeedRequest{
+		AezeedPassphrase: password,
+		SeedEntropy:      nil,
+	}
+
+	extraArgs := node.ExtraArgsEtcd(
+		etcdCfg, name, cluster, leaderSessionTTL,
+	)
+
+	return h.newNodeWithSeed(name, extraArgs, req, statelessInit)
+}
+
+// KillNode kills the node (but won't wait for the node process to stop).
+func (h *HarnessTest) KillNode(hn *node.HarnessNode) {
+	require.NoErrorf(h, hn.Kill(), "%s: kill got error", hn.Name())
+	delete(h.manager.activeNodes, hn.Cfg.NodeID)
+}
+
 // SetFeeEstimate sets a fee rate to be returned from fee estimator.
 //
 // NOTE: this method will set the fee rate for a conf target of 1, which is the
--- a/lntemp/harness_assertion.go
+++ b/lntemp/harness_assertion.go
@ -884,7 +884,11 @@ func (h *HarnessTest) assertPaymentStatusWithTimeout(stream rpc.PaymentClient,
 	err := wait.NoError(func() error {
 		// Consume one message. This will raise an error if the message
 		// is not received within DefaultTimeout.
-		payment := h.ReceivePaymentUpdate(stream)
+		payment, err := h.ReceivePaymentUpdate(stream)
+		if err != nil {
+			return fmt.Errorf("received error from payment "+
+				"stream: %s", err)
+		}

 		// Return if the desired payment state is reached.
 		if payment.Status == status {
@ -895,8 +899,8 @@ func (h *HarnessTest) assertPaymentStatusWithTimeout(stream rpc.PaymentClient,

 		// Return the err so that it can be used for debugging when
 		// timeout is reached.
-		return fmt.Errorf("payment status, got %v, want %v",
-			payment.Status, status)
+		return fmt.Errorf("payment %v status, got %v, want %v",
+			payment.PaymentHash, payment.Status, status)
 	}, timeout)

 	require.NoError(h, err, "timeout while waiting payment")
@ -907,7 +911,7 @@ func (h *HarnessTest) assertPaymentStatusWithTimeout(stream rpc.PaymentClient,
 // ReceivePaymentUpdate waits until a message is received on the payment client
 // stream or the timeout is reached.
 func (h *HarnessTest) ReceivePaymentUpdate(
-	stream rpc.PaymentClient) *lnrpc.Payment {
+	stream rpc.PaymentClient) (*lnrpc.Payment, error) {

 	chanMsg := make(chan *lnrpc.Payment, 1)
 	errChan := make(chan error, 1)
@ -926,16 +930,14 @@ func (h *HarnessTest) ReceivePaymentUpdate(
 	select {
 	case <-time.After(DefaultTimeout):
 		require.Fail(h, "timeout", "timeout waiting for payment update")
+		return nil, nil

 	case err := <-errChan:
-		require.Failf(h, "payment stream",
-			"received err from payment stream: %v", err)
+		return nil, err

 	case updateMsg := <-chanMsg:
-		return updateMsg
+		return updateMsg, nil
 	}
-
-	return nil
 }

 // AssertInvoiceSettled asserts a given invoice specified by its payment
--- a/lntemp/harness_node_manager.go
+++ b/lntemp/harness_node_manager.go
@ -71,8 +71,7 @@ func (nm *nodeManager) nextNodeID() uint32 {
 // node can be used immediately. Otherwise, the node will require an additional
 // initialization phase where the wallet is either created or restored.
 func (nm *nodeManager) newNode(t *testing.T, name string, extraArgs []string,
-	password []byte, useSeed bool,
-	opts ...node.Option) (*node.HarnessNode, error) {
+	password []byte, useSeed bool) (*node.HarnessNode, error) {

 	cfg := &node.BaseNodeConfig{
 		Name:              name,
@ -87,9 +86,6 @@ func (nm *nodeManager) newNode(t *testing.T, name string, extraArgs []string,
 		NetParams:         harnessNetParams,
 		HasSeed:           useSeed,
 	}
-	for _, opt := range opts {
-		opt(cfg)
-	}

 	node, err := node.NewHarnessNode(t, cfg)
 	if err != nil {
--- a/lntemp/node/config.go
+++ b/lntemp/node/config.go
@ -7,6 +7,7 @@ import (

 	"github.com/btcsuite/btcd/chaincfg"
 	"github.com/lightningnetwork/lnd/chanbackup"
+	"github.com/lightningnetwork/lnd/kvdb/etcd"
 	"github.com/lightningnetwork/lnd/lntest"
 )

@ -232,3 +233,33 @@ func (cfg *BaseNodeConfig) GenArgs() []string {

 	return args
 }
+
+// ExtraArgsEtcd returns extra args for configuring LND to use an external etcd
+// database (for remote channel DB and wallet DB).
+func ExtraArgsEtcd(etcdCfg *etcd.Config, name string, cluster bool,
+	leaderSessionTTL int) []string {
+
+	extraArgs := []string{
+		"--db.backend=etcd",
+		fmt.Sprintf("--db.etcd.host=%v", etcdCfg.Host),
+		fmt.Sprintf("--db.etcd.user=%v", etcdCfg.User),
+		fmt.Sprintf("--db.etcd.pass=%v", etcdCfg.Pass),
+		fmt.Sprintf("--db.etcd.namespace=%v", etcdCfg.Namespace),
+	}
+
+	if etcdCfg.InsecureSkipVerify {
+		extraArgs = append(extraArgs, "--db.etcd.insecure_skip_verify")
+	}
+
+	if cluster {
+		clusterArgs := []string{
+			"--cluster.enable-leader-election",
+			fmt.Sprintf("--cluster.id=%v", name),
+			fmt.Sprintf("--cluster.leader-session-ttl=%v",
+				leaderSessionTTL),
+		}
+		extraArgs = append(extraArgs, clusterArgs...)
+	}
+
+	return extraArgs
+}
--- a/lntemp/node/harness_node.go
+++ b/lntemp/node/harness_node.go
@ -215,6 +215,33 @@ func (hn *HarnessNode) WaitUntilServerActive() error {
 	})
 }

+// WaitUntilLeader attempts to finish the start procedure by initiating an RPC
+// connection and setting up the wallet unlocker client. This is needed when
+// a node that has recently been started was waiting to become the leader and
+// we're at the point when we expect that it is the leader now (awaiting
+// unlock).
+func (hn *HarnessNode) WaitUntilLeader(timeout time.Duration) error {
+	var (
+		conn    *grpc.ClientConn
+		connErr error
+	)
+
+	if err := wait.NoError(func() error {
+		conn, connErr = hn.ConnectRPCWithMacaroon(nil)
+		return connErr
+	}, timeout); err != nil {
+		return err
+	}
+
+	// Since the conn is not authed, only the `WalletUnlocker` and `State`
+	// clients can be inited from this conn.
+	hn.conn = conn
+	hn.RPC = rpc.NewHarnessRPC(hn.runCtx, hn.T, conn, hn.Name())
+
+	// Wait till the server is starting.
+	return hn.WaitUntilStarted()
+}
+
 // Unlock attempts to unlock the wallet of the target HarnessNode. This method
 // should be called after the restart of a HarnessNode that was created with a
 // seed+password. Once this method returns, the HarnessNode will be ready to
@ -399,7 +426,7 @@ func (hn *HarnessNode) Start(ctxt context.Context) error {
 	conn, err := hn.ConnectRPC()
 	if err != nil {
 		err = fmt.Errorf("ConnectRPC err: %w", err)
-		cmdErr := hn.kill()
+		cmdErr := hn.Kill()
 		if cmdErr != nil {
 			err = fmt.Errorf("kill process got err: %w: %v",
 				cmdErr, err)
@ -453,6 +480,11 @@ func (hn *HarnessNode) InitNode(macBytes []byte) error {
 	// Init all the RPC clients.
 	hn.InitRPCClients(conn)

+	// Wait till the server is starting.
+	if err := hn.WaitUntilStarted(); err != nil {
+		return fmt.Errorf("waiting for start got: %w", err)
+	}
+
 	return hn.initLightningClient()
 }

@ -670,7 +702,7 @@ func (hn *HarnessNode) Stop() error {
 		// If the rpc clients are not initiated, we'd kill the process
 		// manually.
 		hn.printErrf("found nil RPC clients")
-		if err := hn.kill(); err != nil {
+		if err := hn.Kill(); err != nil {
 			return fmt.Errorf("killing process got: %v", err)
 		}
 	}
@ -719,8 +751,8 @@ func (hn *HarnessNode) Shutdown() error {
 	return nil
 }

-// kill kills the lnd process.
-func (hn *HarnessNode) kill() error {
+// Kill kills the lnd process.
+func (hn *HarnessNode) Kill() error {
 	return hn.cmd.Process.Kill()
 }

--- a/lntest/itest/list_on_test.go
+++ b/lntest/itest/list_on_test.go
@ -231,4 +231,8 @@ var allTestCasesTemp = []*lntemp.TestCase{
 		Name:     "neutrino kit",
 		TestFunc: testNeutrino,
 	},
+	{
+		Name:     "etcd failover",
+		TestFunc: testEtcdFailover,
+	},
 }
--- a/lntest/itest/lnd_etcd_failover_test.go
+++ b/lntest/itest/lnd_etcd_failover_test.go
@ -4,7 +4,6 @@
 package itest

 import (
-	"context"
 	"testing"
 	"time"

@ -13,27 +12,24 @@ import (
 	"github.com/lightningnetwork/lnd/kvdb"
 	"github.com/lightningnetwork/lnd/lncfg"
 	"github.com/lightningnetwork/lnd/lnrpc"
-	"github.com/lightningnetwork/lnd/lnrpc/routerrpc"
+	"github.com/lightningnetwork/lnd/lntemp"
 	"github.com/lightningnetwork/lnd/lntest"
+	"github.com/stretchr/testify/require"
 )

-func assertLeader(ht *harnessTest, observer cluster.LeaderElector,
+func assertLeader(ht *lntemp.HarnessTest, observer cluster.LeaderElector,
 	expected string) {

-	leader, err := observer.Leader(context.Background())
-	if err != nil {
-		ht.Fatalf("Unable to query leader: %v", err)
-	}
-
-	if leader != expected {
-		ht.Fatalf("Leader should be '%v', got: '%v'", expected, leader)
-	}
+	leader, err := observer.Leader(ht.Context())
+	require.NoError(ht, err, "Unable to query leader")
+	require.Equalf(ht, expected, leader,
+		"Leader should be '%v', got: '%v'", expected, leader)
 }

 // testEtcdFailover tests that in a cluster setup where two LND nodes form a
 // single cluster (sharing the same identity) one can hand over the leader role
 // to the other (failing over after graceful shutdown or forceful abort).
-func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) {
+func testEtcdFailover(ht *lntemp.HarnessTest) {
 	testCases := []struct {
 		name string
 		kill bool
@ -48,148 +44,97 @@ func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) {
 	for _, test := range testCases {
 		test := test

-		ht.t.Run(test.name, func(t1 *testing.T) {
-			ht1 := newHarnessTest(t1, ht.lndHarness)
-			ht1.RunTestCase(&testCase{
-				name: test.name,
-				test: func(_ *lntest.NetworkHarness,
-					tt *harnessTest) {
-
-					testEtcdFailoverCase(net, tt, test.kill)
-				},
-			})
+		success := ht.Run(test.name, func(t1 *testing.T) {
+			st := ht.Subtest(t1)
+			testEtcdFailoverCase(st, test.kill)
 		})
+		if !success {
+			return
+		}
 	}
 }

-func testEtcdFailoverCase(net *lntest.NetworkHarness, ht *harnessTest,
-	kill bool) {
-
-	ctxb := context.Background()
-
+func testEtcdFailoverCase(ht *lntemp.HarnessTest, kill bool) {
 	etcdCfg, cleanup, err := kvdb.StartEtcdTestBackend(
-		ht.t.TempDir(), uint16(lntest.NextAvailablePort()),
+		ht.T.TempDir(), uint16(lntest.NextAvailablePort()),
 		uint16(lntest.NextAvailablePort()), "",
 	)
-	if err != nil {
-		ht.Fatalf("Failed to start etcd instance: %v", err)
-	}
+	require.NoError(ht, err, "Failed to start etcd instance")
 	defer cleanup()

+	alice := ht.NewNode("Alice", nil)
+
+	// Give Alice some coins to fund the channel.
+	ht.FundCoins(btcutil.SatoshiPerBitcoin, alice)
+
 	// Make leader election session TTL 5 sec to make the test run fast.
 	const leaderSessionTTL = 5

 	observer, err := cluster.MakeLeaderElector(
-		ctxb, cluster.EtcdLeaderElector, "observer",
+		ht.Context(), cluster.EtcdLeaderElector, "observer",
 		lncfg.DefaultEtcdElectionPrefix, leaderSessionTTL, etcdCfg,
 	)
-	if err != nil {
-		ht.Fatalf("Cannot start election observer: %v", err)
-	}
+	require.NoError(ht, err, "Cannot start election observer")

 	password := []byte("the quick brown fox jumps the lazy dog")
 	entropy := [16]byte{1, 2, 3}
 	stateless := false
 	cluster := true

-	carol1, _, _, err := net.NewNodeWithSeedEtcd(
+	carol1, _, _ := ht.NewNodeWithSeedEtcd(
 		"Carol-1", etcdCfg, password, entropy[:], stateless, cluster,
 		leaderSessionTTL,
 	)
-	if err != nil {
-		ht.Fatalf("unable to start Carol-1: %v", err)
-	}
-
-	ctxt, _ := context.WithTimeout(ctxb, defaultTimeout)
-	info1, err := carol1.GetInfo(ctxt, &lnrpc.GetInfoRequest{})
-	if err != nil {
-		ht.Fatalf("unable to get info: %v", err)
-	}
-
-	net.ConnectNodes(ht.t, carol1, net.Alice)
+	info1 := carol1.RPC.GetInfo()
+	ht.ConnectNodes(carol1, alice)

 	// Open a channel with 100k satoshis between Carol and Alice with Alice
 	// being the sole funder of the channel.
-	chanAmt := btcutil.Amount(100000)
-	_ = openChannelAndAssert(
-		ht, net, net.Alice, carol1,
-		lntest.OpenChannelParams{
-			Amt: chanAmt,
-		},
-	)
+	chanAmt := btcutil.Amount(100_000)
+	ht.OpenChannel(alice, carol1, lntemp.OpenChannelParams{Amt: chanAmt})

 	// At this point Carol-1 is the elected leader, while Carol-2 will wait
 	// to become the leader when Carol-1 stops.
-	carol2, err := net.NewNodeEtcd(
-		"Carol-2", etcdCfg, password, cluster, false, leaderSessionTTL,
+	carol2 := ht.NewNodeEtcd(
+		"Carol-2", etcdCfg, password, cluster, leaderSessionTTL,
 	)
-	if err != nil {
-		ht.Fatalf("Unable to start Carol-2: %v", err)
-	}
-
 	assertLeader(ht, observer, "Carol-1")

 	amt := btcutil.Amount(1000)
-	payReqs, _, _, err := createPayReqs(carol1, amt, 2)
-	if err != nil {
-		ht.Fatalf("Carol-2 is unable to create payment requests: %v",
-			err)
-	}
-	sendAndAssertSuccess(
-		ht, net.Alice, &routerrpc.SendPaymentRequest{
-			PaymentRequest: payReqs[0],
-			TimeoutSeconds: 60,
-			FeeLimitSat:    noFeeLimitMsat,
-		},
-	)
+	payReqs, _, _ := ht.CreatePayReqs(carol1, amt, 2)
+	ht.CompletePaymentRequests(alice, []string{payReqs[0]})

 	// Shut down or kill Carol-1 and wait for Carol-2 to become the leader.
 	failoverTimeout := time.Duration(2*leaderSessionTTL) * time.Second
 	if kill {
-		err = net.KillNode(carol1)
-		if err != nil {
-			ht.Fatalf("Can't kill Carol-1: %v", err)
-		}
+		ht.KillNode(carol1)
 	} else {
-		shutdownAndAssert(net, ht, carol1)
+		ht.Shutdown(carol1)
 	}

 	err = carol2.WaitUntilLeader(failoverTimeout)
-	if err != nil {
-		ht.Fatalf("Waiting for Carol-2 to become the leader failed: %v",
-			err)
-	}
-
+	require.NoError(ht, err, "Waiting for Carol-2 to become the leader "+
+		"failed")
 	assertLeader(ht, observer, "Carol-2")

-	err = carol2.Unlock(&lnrpc.UnlockWalletRequest{
-		WalletPassword: password,
-	})
-	if err != nil {
-		ht.Fatalf("Unlocking Carol-2 was not successful: %v", err)
-	}
-
-	ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
+	req := &lnrpc.UnlockWalletRequest{WalletPassword: password}
+	err = carol2.Unlock(req)
+	require.NoError(ht, err, "Unlocking Carol-2 failed")

 	// Make sure Carol-1 and Carol-2 have the same identity.
-	info2, err := carol2.GetInfo(ctxt, &lnrpc.GetInfoRequest{})
-	if err != nil {
-		ht.Fatalf("unable to get info: %v", err)
-	}
-	if info1.IdentityPubkey != info2.IdentityPubkey {
-		ht.Fatalf("Carol-1 and Carol-2 must have the same identity: "+
-			"%v vs %v", info1.IdentityPubkey, info2.IdentityPubkey)
-	}
+	info2 := carol2.RPC.GetInfo()
+	require.Equal(ht, info1.IdentityPubkey, info2.IdentityPubkey,
+		"Carol-1 and Carol-2 must have the same identity")
+
+	// Make sure the nodes are connected before moving forward. Otherwise
+	// we may get a link not found error.
+	ht.AssertConnected(alice, carol2)

 	// Now let Alice pay the second invoice but this time we expect Carol-2
 	// to receive the payment.
-	sendAndAssertSuccess(
-		ht, net.Alice, &routerrpc.SendPaymentRequest{
-			PaymentRequest: payReqs[1],
-			TimeoutSeconds: 60,
-			FeeLimitSat:    noFeeLimitMsat,
-		},
-	)
+	ht.CompletePaymentRequests(alice, []string{payReqs[1]})

-	shutdownAndAssert(net, ht, carol2)
+	// Manually shutdown the node as it will mess up with our cleanup
+	// process.
+	ht.Shutdown(carol2)
 }
--- a/lntest/itest/lnd_no_etcd_dummy_failover_test.go
+++ b/lntest/itest/lnd_no_etcd_dummy_failover_test.go
@ -3,10 +3,8 @@

 package itest

-import (
-	"github.com/lightningnetwork/lnd/lntest"
-)
+import "github.com/lightningnetwork/lnd/lntemp"

 // testEtcdFailover is an empty itest when LND is not compiled with etcd
 // support.
-func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) {}
+func testEtcdFailover(ht *lntemp.HarnessTest) {}
--- a/lntest/itest/lnd_test_list_on_test.go
+++ b/lntest/itest/lnd_test_list_on_test.go
@ -210,10 +210,6 @@ var allTestCases = []*testCase{
 		name: "wallet import pubkey",
 		test: testWalletImportPubKey,
 	},
-	{
-		name: "etcd_failover",
-		test: testEtcdFailover,
-	},
 	{
 		name: "max htlc pathfind",
 		test: testMaxHtlcPathfind,