routing/notifs: order invariant testing of ntfn delivery (#238)

Modifies the test cases in `TestEdgeUpdateNotification` and
`TestNodeUpdateNotification` to check for the possibility of notifications
being delivered out of order.  This addresses some sporadic failures that
were observed when running the test suite. 

I looked through some of the open issues but didn't see any addressing this
issue in particular, but if someone could point me to any relevant issues
that would be much appreciated!

Issue
-----
Currently the test suite validates notifications received in the order they
are submitted. The check fails because the verification of each
notification is statically linked to the order in which they are delivered,
seen
[here](1be4d67ce4/routing/notifications_test.go (L403))
and
[here](1be4d67ce4/routing/notifications_test.go (L499))
in `routing/notifications_test.go`.  The notifications are typically
delivered in this order, but causes the test to fail otherwise.

Proposed Changes
-------------------
Construct an index that maps a public key to its corresponding edges and/or
nodes.  When a notification is received, use its identifying public key and
the index to look up the edge/node to use for validation. Entries are
removed from the index after they are verified to ensure that the same
entry is validated twice. The logic to dynamically handle the verification
of incoming notifications rests can be found here
[here](https://github.com/cfromknecht/lnd/blob/order-invariant-ntfns/routing/notifications_test.go#L420)
and
[here](https://github.com/cfromknecht/lnd/blob/order-invariant-ntfns/routing/notifications_test.go#L539).

Encountered Errors
--------------------
 * `TestEdgeUpdateNotification`: notifications_test.go:379: min HTLC of
   edge doesn't match: expected 16.7401473 BTC, got 19.4852751 BTC
 * `TestNodeUpdateNotification`: notifications_test.go:485: node identity
   keys don't match: expected
   027b139b2153ac5f3c83c2022e58b3219297d0fb3170739ee6391cddf2e06fe3e7, got
   03921deafb61ee13d18e9d96c3ecd9e572e59c8dbd0bb922b5b6ac609d10fe4ee4


Recreating Failing Behavior
---------------------------
The failures can be somewhat difficult to recreate, I was able to reproduce
them by running the unit tests repeatedly until they showed up.  I used the
following commands to bring them out of hiding:

```
./gotest.sh -i
go test -test.v ./routing && while [ $? -eq 0 ]; do go test -test.v ./routing; done
```

I was unable to recreate these errors, or any others in this package, after
making the proposed changes and leaving the script running continuously for
~30 minutes. Previously, I could consistently generate an error after ~20
seconds had elapsed on the latest commit in master at the time of writing:
78f6caf5d2. Moar stability ftw!
This commit is contained in:
Conner Fromknecht 2017-07-31 21:38:03 -07:00 committed by Olaoluwa Osuntokun
parent 0377a4f99d
commit 14a06526b8

View File

@ -395,32 +395,63 @@ func TestEdgeUpdateNotification(t *testing.T) {
}
}
// Create lookup map for notifications we are intending to receive. Entries
// are removed from the map when the anticipated notification is received.
var waitingFor = map[vertex]int{
newVertex(node1.PubKey): 1,
newVertex(node2.PubKey): 2,
}
const numEdgePolicies = 2
for i := 0; i < numEdgePolicies; i++ {
select {
case ntfn := <-ntfnClient.TopologyChanges:
// For each processed announcement we should only receive a
// single announcement in a batch.
if len(ntfn.ChannelEdgeUpdates) != 1 {
t.Fatalf("expected 1 notification, instead have %v",
len(ntfn.ChannelEdgeUpdates))
}
edgeUpdate := ntfn.ChannelEdgeUpdates[0]
if i == 0 {
assertEdgeCorrect(t, edgeUpdate, edge1)
if !edgeUpdate.AdvertisingNode.IsEqual(node1.PubKey) {
t.Fatal("advertising node mismatch")
}
if !edgeUpdate.ConnectingNode.IsEqual(node2.PubKey) {
t.Fatal("connecting node mismatch")
nodeVertex := newVertex(edgeUpdate.AdvertisingNode)
if idx, ok := waitingFor[nodeVertex]; ok {
switch idx {
case 1:
// Received notification corresponding to edge1.
assertEdgeCorrect(t, edgeUpdate, edge1)
if !edgeUpdate.AdvertisingNode.IsEqual(node1.PubKey) {
t.Fatal("advertising node mismatch")
}
if !edgeUpdate.ConnectingNode.IsEqual(node2.PubKey) {
t.Fatal("connecting node mismatch")
}
case 2:
// Received notification corresponding to edge2.
assertEdgeCorrect(t, edgeUpdate, edge2)
if !edgeUpdate.AdvertisingNode.IsEqual(node2.PubKey) {
t.Fatal("advertising node mismatch")
}
if !edgeUpdate.ConnectingNode.IsEqual(node1.PubKey) {
t.Fatal("connecting node mismatch")
}
default:
t.Fatal("invalid edge index")
}
continue
// Remove entry from waitingFor map to ensure we don't double count a
// repeat notification.
delete(waitingFor, nodeVertex)
} else {
t.Fatal("unexpected edge update received")
}
assertEdgeCorrect(t, edgeUpdate, edge2)
if !edgeUpdate.ConnectingNode.IsEqual(node1.PubKey) {
t.Fatal("connecting node mismatch")
}
if !edgeUpdate.AdvertisingNode.IsEqual(node2.PubKey) {
t.Fatal("advertising node mismatch")
}
case <-time.After(time.Second * 5):
t.Fatal("update not received")
t.Fatal("edge update not received")
}
}
}
@ -463,46 +494,71 @@ func TestNodeUpdateNotification(t *testing.T) {
}
assertNodeNtfnCorrect := func(t *testing.T, ann *channeldb.LightningNode,
ntfns []*NetworkNodeUpdate) {
// For each processed announcement we should only receive a
// single announcement in a batch.
if len(ntfns) != 1 {
t.Fatalf("expected 1 notification, instead have %v",
len(ntfns))
}
nodeUpdate *NetworkNodeUpdate) {
// The notification received should directly map the
// announcement originally sent.
nodeNtfn := ntfns[0]
if nodeNtfn.Addresses[0] != ann.Addresses[0] {
if nodeUpdate.Addresses[0] != ann.Addresses[0] {
t.Fatalf("node address doesn't match: expected %v, got %v",
nodeNtfn.Addresses[0], ann.Addresses[0])
nodeUpdate.Addresses[0], ann.Addresses[0])
}
if !nodeNtfn.IdentityKey.IsEqual(ann.PubKey) {
if !nodeUpdate.IdentityKey.IsEqual(ann.PubKey) {
t.Fatalf("node identity keys don't match: expected %x, "+
"got %x", ann.PubKey.SerializeCompressed(),
nodeNtfn.IdentityKey.SerializeCompressed())
nodeUpdate.IdentityKey.SerializeCompressed())
}
if nodeNtfn.Alias != ann.Alias {
if nodeUpdate.Alias != ann.Alias {
t.Fatalf("node alias doesn't match: expected %v, got %v",
ann.Alias, nodeNtfn.Alias)
ann.Alias, nodeUpdate.Alias)
}
}
// Create lookup map for notifications we are intending to receive. Entries
// are removed from the map when the anticipated notification is received.
var waitingFor = map[vertex]int{
newVertex(node1.PubKey): 1,
newVertex(node2.PubKey): 2,
}
// Exactly two notifications should be sent, each corresponding to the
// node announcement messages sent above.
const numAnns = 2
for i := 0; i < numAnns; i++ {
select {
case ntfn := <-ntfnClient.TopologyChanges:
if i == 0 {
assertNodeNtfnCorrect(t, node1, ntfn.NodeUpdates)
continue
// For each processed announcement we should only receive a
// single announcement in a batch.
if len(ntfn.NodeUpdates) != 1 {
t.Fatalf("expected 1 notification, instead have %v",
len(ntfn.NodeUpdates))
}
nodeUpdate := ntfn.NodeUpdates[0]
nodeVertex := newVertex(nodeUpdate.IdentityKey)
if idx, ok := waitingFor[nodeVertex]; ok {
switch idx {
case 1:
// Received notification corresponding to node1.
assertNodeNtfnCorrect(t, node1, nodeUpdate)
case 2:
// Received notification corresponding to node2.
assertNodeNtfnCorrect(t, node2, nodeUpdate)
default:
t.Fatal("invalid node index")
}
// Remove entry from waitingFor map to ensure we don't double count a
// repeat notification.
delete(waitingFor, nodeVertex)
} else {
t.Fatal("unexpected node update received")
}
assertNodeNtfnCorrect(t, node2, ntfn.NodeUpdates)
case <-time.After(time.Second * 5):
t.Fatal("node update not received")
}
}
@ -521,8 +577,18 @@ func TestNodeUpdateNotification(t *testing.T) {
// date node announcement.
select {
case ntfn := <-ntfnClient.TopologyChanges:
assertNodeNtfnCorrect(t, &nodeUpdateAnn, ntfn.NodeUpdates)
// For each processed announcement we should only receive a
// single announcement in a batch.
if len(ntfn.NodeUpdates) != 1 {
t.Fatalf("expected 1 notification, instead have %v",
len(ntfn.NodeUpdates))
}
nodeUpdate := ntfn.NodeUpdates[0]
assertNodeNtfnCorrect(t, &nodeUpdateAnn, nodeUpdate)
case <-time.After(time.Second * 5):
t.Fatal("update not received")
}
}