diff --git a/routing/result_interpretation.go b/routing/result_interpretation.go index 44841096d..00703c289 100644 --- a/routing/result_interpretation.go +++ b/routing/result_interpretation.go @@ -2,14 +2,14 @@ package routing import ( "github.com/lightningnetwork/lnd/channeldb" - "github.com/lightningnetwork/lnd/lnwire" "github.com/lightningnetwork/lnd/routing/route" ) // Instantiate variables to allow taking a reference from the failure reason. var ( - reasonError = channeldb.FailureReasonError + reasonError = channeldb.FailureReasonError + reasonIncorrectDetails = channeldb.FailureReasonIncorrectPaymentDetails ) // interpretedResult contains the result of the interpretation of a payment @@ -44,10 +44,7 @@ func interpretResult(rt *route.Route, failureSrcIdx *int, pairResults: make(map[DirectedNodePair]lnwire.MilliSatoshi), } - final, reason := i.processFail(rt, failureSrcIdx, failure) - if final { - i.finalFailureReason = &reason - } + i.processFail(rt, failureSrcIdx, failure) return i } @@ -55,187 +52,240 @@ func interpretResult(rt *route.Route, failureSrcIdx *int, // processFail processes a failed payment attempt. func (i *interpretedResult) processFail( rt *route.Route, errSourceIdx *int, - failure lnwire.FailureMessage) (bool, channeldb.FailureReason) { + failure lnwire.FailureMessage) { if errSourceIdx == nil { i.processPaymentOutcomeUnknown(rt) - return false, 0 + return } - var failureVertex route.Vertex + switch *errSourceIdx { - failureSourceIdxInt := *errSourceIdx - if failureSourceIdxInt > 0 { - failureVertex = rt.Hops[failureSourceIdxInt-1].PubKeyBytes - } else { - failureVertex = rt.SourcePubKey + // We are the source of the failure. + case 0: + i.processPaymentOutcomeSelf(rt, failure) + + // A failure from the final hop was received. + case len(rt.Hops): + i.processPaymentOutcomeFinal( + rt, failure, + ) + + // An intermediate hop failed. Interpret the outcome, update reputation + // and try again. + default: + i.processPaymentOutcomeIntermediate( + rt, *errSourceIdx, failure, + ) } - log.Tracef("Node %x (index %v) reported failure when sending htlc", - failureVertex, errSourceIdx) +} - // Always determine chan id ourselves, because a channel update with id - // may not be available. - failedPair, failedAmt := getFailedPair( - rt, failureSourceIdxInt, - ) +// processPaymentOutcomeSelf handles failures sent by ourselves. +func (i *interpretedResult) processPaymentOutcomeSelf( + rt *route.Route, failure lnwire.FailureMessage) { switch failure.(type) { - // If the end destination didn't know the payment hash or we sent the - // wrong payment amount to the destination, then we'll terminate - // immediately. - case *lnwire.FailIncorrectDetails: - // TODO(joostjager): Check onionErr.Amount() whether it matches - // what we expect. (Will it ever not match, because if not - // final_incorrect_htlc_amount would be returned?) + // We receive a malformed htlc failure from our peer. We trust ourselves + // to send the correct htlc, so our peer must be at fault. + case *lnwire.FailInvalidOnionVersion, + *lnwire.FailInvalidOnionHmac, + *lnwire.FailInvalidOnionKey: - return true, channeldb.FailureReasonIncorrectPaymentDetails + i.failNode(rt, 1) - // If we sent the wrong amount to the destination, then we'll exit - // early. - case *lnwire.FailIncorrectPaymentAmount: - return true, channeldb.FailureReasonIncorrectPaymentDetails + // If this was a payment to a direct peer, we can stop trying. + if len(rt.Hops) == 1 { + i.finalFailureReason = &reasonError + } - // If the time-lock that was extended to the final node was incorrect, - // then we can't proceed. - case *lnwire.FailFinalIncorrectCltvExpiry: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc that expires too soon. - // In that case we should continue routing. - return true, channeldb.FailureReasonError - - // If we crafted an invalid onion payload for the final node, then we'll - // exit early. - case *lnwire.FailFinalIncorrectHtlcAmount: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc with a too low value. In - // that case we should continue routing. - return true, channeldb.FailureReasonError - - // Similarly, if the HTLC expiry that we extended to the final hop - // expires too soon, then will fail the payment. - // - // TODO(roasbeef): can happen to to race condition, try again with - // recent block height - case *lnwire.FailFinalExpiryTooSoon: - // TODO(joostjager): Take into account that any hop may have - // delayed. Ideally we should continue routing. Knowing the - // delaying node at this point would help. - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If we erroneously attempted to cross a chain border, then we'll - // cancel the payment. - case *lnwire.FailInvalidRealm: - return true, channeldb.FailureReasonError - - // If we get a notice that the expiry was too soon for an intermediate - // node, then we'll prune out the node that sent us this error, as it - // doesn't now what the correct block height is. - case *lnwire.FailExpiryTooSoon: - i.nodeFailure = &failureVertex - return false, 0 - - // If we hit an instance of onion payload corruption or an invalid - // version, then we'll exit early as this shouldn't happen in the - // typical case. - // - // TODO(joostjager): Take into account that the previous hop may have - // tampered with the onion. Routing should continue using other paths. - case *lnwire.FailInvalidOnionVersion: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionHmac: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionKey: - return true, channeldb.FailureReasonError - - // If we get a failure due to violating the minimum amount, we'll apply - // the new minimum amount and retry routing. - case *lnwire.FailAmountBelowMinimum: - i.policyFailure = &failedPair - i.pairResults[failedPair] = 0 - return false, 0 - - // If we get a failure due to a fee, we'll apply the new fee update, and - // retry our attempt using the newly updated fees. - case *lnwire.FailFeeInsufficient: - i.policyFailure = &failedPair - i.pairResults[failedPair] = 0 - return false, 0 - - // If we get the failure for an intermediate node that disagrees with - // our time lock values, then we'll apply the new delta value and try it - // once more. - case *lnwire.FailIncorrectCltvExpiry: - i.policyFailure = &failedPair - i.pairResults[failedPair] = 0 - return false, 0 - - // The outgoing channel that this node was meant to forward one is - // currently disabled, so we'll apply the update and continue. - case *lnwire.FailChannelDisabled: - i.pairResults[failedPair] = 0 - return false, 0 - - // It's likely that the outgoing channel didn't have sufficient - // capacity, so we'll prune this edge for now, and continue onwards with - // our path finding. - case *lnwire.FailTemporaryChannelFailure: - i.pairResults[failedPair] = failedAmt - return false, 0 - - // If the send fail due to a node not having the required features, then - // we'll note this error and continue. - case *lnwire.FailRequiredNodeFeatureMissing: - i.nodeFailure = &failureVertex - return false, 0 - - // If the send fail due to a node not having the required features, then - // we'll note this error and continue. - case *lnwire.FailRequiredChannelFeatureMissing: - i.nodeFailure = &failureVertex - return false, 0 - - // If the next hop in the route wasn't known or offline, we'll only the - // channel which we attempted to route over. This is conservative, and - // it can handle faulty channels between nodes properly. Additionally, - // this guards against routing nodes returning errors in order to - // attempt to black list another node. - case *lnwire.FailUnknownNextPeer: - i.pairResults[failedPair] = 0 - return false, 0 - - // If the node wasn't able to forward for which ever reason, then we'll - // note this and continue with the routes. - case *lnwire.FailTemporaryNodeFailure: - i.nodeFailure = &failureVertex - return false, 0 - - case *lnwire.FailPermanentNodeFailure: - i.nodeFailure = &failureVertex - return false, 0 - - // If we crafted a route that contains a too long time lock for an - // intermediate node, we'll prune the node. As there currently is no way - // of knowing that node's maximum acceptable cltv, we cannot take this - // constraint into account during routing. - // - // TODO(joostjager): Record the rejected cltv and use that as a hint - // during future path finding through that node. - case *lnwire.FailExpiryTooFar: - i.nodeFailure = &failureVertex - return false, 0 - - // If we get a permanent channel or node failure, then we'll prune the - // channel in both directions and continue with the rest of the routes. - case *lnwire.FailPermanentChannelFailure: - i.pairResults[failedPair] = 0 - i.pairResults[failedPair.Reverse()] = 0 - return false, 0 - - // Any other failure or an empty failure will get the node pruned. + // Any other failure originating from ourselves should be temporary and + // caused by changing conditions between path finding and execution of + // the payment. We just retry and trust that the information locally + // available in the link has been updated. default: - i.nodeFailure = &failureVertex - return false, 0 + log.Warnf("Routing failure for local channel %v occurred", + rt.Hops[0].ChannelID) + } +} + +// processPaymentOutcomeFinal handles failures sent by the final hop. +func (i *interpretedResult) processPaymentOutcomeFinal( + route *route.Route, failure lnwire.FailureMessage) { + + n := len(route.Hops) + + // If a failure from the final node is received, we will fail the + // payment in almost all cases. Only when the penultimate node sends an + // incorrect htlc, we want to retry via another route. Invalid onion + // failures are not expected, because the final node wouldn't be able to + // encrypt that failure. + switch failure.(type) { + + // Expiry or amount of the HTLC doesn't match the onion, try another + // route. + case *lnwire.FailFinalIncorrectCltvExpiry, + *lnwire.FailFinalIncorrectHtlcAmount: + + // We trust ourselves. If this is a direct payment, we penalize + // the final node and fail the payment. + if n == 1 { + i.failNode(route, n) + i.finalFailureReason = &reasonError + + return + } + + // Otherwise penalize the last pair of the route and retry. + // Either the final node is at fault, or it gets sent a bad htlc + // from its predecessor. + i.failPair(route, n-1) + + // We are using wrong payment hash or amount, fail the payment. + case *lnwire.FailIncorrectPaymentAmount, + *lnwire.FailIncorrectDetails: + + i.finalFailureReason = &reasonIncorrectDetails + + // The HTLC that was extended to the final hop expires too soon. Fail + // the payment, because we may be using the wrong final cltv delta. + case *lnwire.FailFinalExpiryTooSoon: + // TODO(roasbeef): can happen to to race condition, try again + // with recent block height + + // TODO(joostjager): can also happen because a node delayed + // deliberately. What to penalize? + i.finalFailureReason = &reasonIncorrectDetails + + default: + // All other errors are considered terminal if coming from the + // final hop. They indicate that something is wrong at the + // recipient, so we do apply a penalty. + i.failNode(route, n) + i.finalFailureReason = &reasonError + } +} + +// processPaymentOutcomeIntermediate handles failures sent by an intermediate +// hop. +func (i *interpretedResult) processPaymentOutcomeIntermediate( + route *route.Route, errorSourceIdx int, + failure lnwire.FailureMessage) { + + reportOutgoing := func() { + i.failPair( + route, errorSourceIdx, + ) + } + + reportOutgoingBalance := func() { + i.failPairBalance( + route, errorSourceIdx, + ) + } + + reportIncoming := func() { + // We trust ourselves. If the error comes from the first hop, we + // can penalize the whole node. In that case there is no + // uncertainty as to which node to blame. + if errorSourceIdx == 1 { + i.failNode(route, errorSourceIdx) + return + } + + // Otherwise report the incoming pair. + i.failPair( + route, errorSourceIdx-1, + ) + } + + reportAll := func() { + // We trust ourselves. If the error comes from the first hop, we + // can penalize the whole node. In that case there is no + // uncertainty as to which node to blame. + if errorSourceIdx == 1 { + i.failNode(route, errorSourceIdx) + return + } + + // Otherwise penalize all pairs up to the error source. This + // includes our own outgoing connection. + i.failPairRange( + route, 0, errorSourceIdx-1, + ) + } + + switch failure.(type) { + + // If a node reports onion payload corruption or an invalid version, + // that node may be responsible, but it could also be that it is just + // relaying a malformed htlc failure from it successor. By reporting the + // outgoing channel set, we will surely hit the responsible node. At + // this point, it is not possible that the node's predecessor corrupted + // the onion blob. If the predecessor would have corrupted the payload, + // the error source wouldn't have been able to encrypt this failure + // message for us. + case *lnwire.FailInvalidOnionVersion, + *lnwire.FailInvalidOnionHmac, + *lnwire.FailInvalidOnionKey: + + reportOutgoing() + + // If the next hop in the route wasn't known or offline, we'll only + // penalize the channel set which we attempted to route over. This is + // conservative, and it can handle faulty channels between nodes + // properly. Additionally, this guards against routing nodes returning + // errors in order to attempt to black list another node. + case *lnwire.FailUnknownNextPeer: + reportOutgoing() + + // If we get a permanent channel, we'll prune the channel set in both + // directions and continue with the rest of the routes. + case *lnwire.FailPermanentChannelFailure: + reportOutgoing() + + // When an HTLC parameter is incorrect, the node sending the error may + // be doing something wrong. But it could also be that its predecessor + // is intentionally modifying the htlc parameters that we instructed it + // via the hop payload. Therefore we penalize the incoming node pair. A + // third cause of this error may be that we have an out of date channel + // update. This is handled by the second chance logic up in mission + // control. + case *lnwire.FailAmountBelowMinimum, + *lnwire.FailFeeInsufficient, + *lnwire.FailIncorrectCltvExpiry, + *lnwire.FailChannelDisabled: + + // Set the node pair for which a channel update may be out of + // date. The second chance logic uses the policyFailure field. + i.policyFailure = &DirectedNodePair{ + From: route.Hops[errorSourceIdx-1].PubKeyBytes, + To: route.Hops[errorSourceIdx].PubKeyBytes, + } + + // We report incoming channel. If a second pair is granted in + // mission control, this report is ignored. + reportIncoming() + + // If the outgoing channel doesn't have enough capacity, we penalize. + // But we penalize only in a single direction and only for amounts + // greater than the attempted amount. + case *lnwire.FailTemporaryChannelFailure: + reportOutgoingBalance() + + // If FailExpiryTooSoon is received, there must have been some delay + // along the path. We can't know which node is causing the delay, so we + // penalize all of them up to the error source. + // + // Alternatively it could also be that we ourselves have fallen behind + // somehow. We ignore that case for now. + case *lnwire.FailExpiryTooSoon: + reportAll() + + // In all other cases, we penalize the reporting node. These are all + // failures that should not happen. + default: + i.failNode(route, errorSourceIdx) } } @@ -263,7 +313,8 @@ func (i *interpretedResult) failNode(rt *route.Route, idx int) { i.nodeFailure = &rt.Hops[idx-1].PubKeyBytes } -// failPairRange marks the node pairs from node fromIdx to node toIdx as failed. +// failPairRange marks the node pairs from node fromIdx to node toIdx as failed +// in both direction. func (i *interpretedResult) failPairRange( rt *route.Route, fromIdx, toIdx int) { @@ -283,6 +334,15 @@ func (i *interpretedResult) failPair( i.pairResults[pair.Reverse()] = 0 } +// failPairBalance marks a pair as failed with a minimum penalization amount. +func (i *interpretedResult) failPairBalance( + rt *route.Route, channelIdx int) { + + pair, amt := getPair(rt, channelIdx) + + i.pairResults[pair] = amt +} + // getPair returns a node pair from the route and the amount passed between that // pair. func getPair(rt *route.Route, channelIdx int) (DirectedNodePair, @@ -306,36 +366,3 @@ func getPair(rt *route.Route, channelIdx int) (DirectedNodePair, return pair, amt } - -// getFailedPair tries to locate the failing pair given a route and the pubkey -// of the node that sent the failure. It will assume that the failure is -// associated with the outgoing channel set of the failing node. As a second -// result, it returns the amount sent between the pair. -func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair, - lnwire.MilliSatoshi) { - - // Determine if we have a failure from the final hop. If it is, we - // assume that the failing channel is the incoming channel. - // - // TODO(joostjager): In this case, certain types of failures are not - // expected. For example FailUnknownNextPeer. This could be a reason to - // prune the node? - if failureSource == len(route.Hops) { - failureSource-- - } - - // As this failure indicates that the target channel was unable to carry - // this HTLC (for w/e reason), we'll return the _outgoing_ channel that - // the source of the failure was meant to pass the HTLC along to. - if failureSource == 0 { - return NewDirectedNodePair( - route.SourcePubKey, - route.Hops[0].PubKeyBytes, - ), route.TotalAmount - } - - return NewDirectedNodePair( - route.Hops[failureSource-1].PubKeyBytes, - route.Hops[failureSource].PubKeyBytes, - ), route.Hops[failureSource-1].AmtToForward -} diff --git a/routing/result_interpretation_test.go b/routing/result_interpretation_test.go index b7b295771..86b65aa4c 100644 --- a/routing/result_interpretation_test.go +++ b/routing/result_interpretation_test.go @@ -14,6 +14,14 @@ var ( {1, 0}, {1, 1}, {1, 2}, {1, 3}, {1, 4}, } + routeOneHop = route.Route{ + SourcePubKey: hops[0], + TotalAmount: 100, + Hops: []*route.Hop{ + {PubKeyBytes: hops[1], AmtToForward: 99}, + }, + } + routeTwoHop = route.Route{ SourcePubKey: hops[0], TotalAmount: 100, @@ -35,6 +43,10 @@ var ( } ) +func getTestPair(from, to int) DirectedNodePair { + return NewDirectedNodePair(hops[from], hops[to]) +} + type resultTestCase struct { name string route *route.Route @@ -55,7 +67,7 @@ var resultTestCases = []resultTestCase{ expectedResult: &interpretedResult{ pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{ - NewDirectedNodePair(hops[1], hops[2]): 99, + getTestPair(1, 2): 99, }, }, }, @@ -68,7 +80,40 @@ var resultTestCases = []resultTestCase{ failure: lnwire.NewExpiryTooSoon(lnwire.ChannelUpdate{}), expectedResult: &interpretedResult{ - nodeFailure: &hops[3], + pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{ + getTestPair(0, 1): 0, + getTestPair(1, 0): 0, + getTestPair(1, 2): 0, + getTestPair(2, 1): 0, + getTestPair(2, 3): 0, + getTestPair(3, 2): 0, + }, + }, + }, + + // Tests a malformed htlc from a direct peer. + { + name: "fail malformed htlc from direct peer", + route: &routeTwoHop, + failureSrcIdx: 0, + failure: lnwire.NewInvalidOnionKey(nil), + + expectedResult: &interpretedResult{ + nodeFailure: &hops[1], + }, + }, + + // Tests a malformed htlc from a direct peer that is also the final + // destination. + { + name: "fail malformed htlc from direct final peer", + route: &routeOneHop, + failureSrcIdx: 0, + failure: lnwire.NewInvalidOnionKey(nil), + + expectedResult: &interpretedResult{ + finalFailureReason: &reasonError, + nodeFailure: &hops[1], }, }, }