lnd/routing/result_interpretation.go
Carla Kirk-Cohen 4071db77b5
routing: add handling for blinded errors from final node
We do not expect blinding errors from the final node:
1. If the introduction is the recipient, they should use regular errors.
2. Otherwise, nodes have no business sending this error when they are
   not part of a blinded route.
2023-12-18 12:07:56 -05:00

650 lines
21 KiB
Go

package routing
import (
"fmt"
"github.com/lightningnetwork/lnd/channeldb"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/routing/route"
)
// Instantiate variables to allow taking a reference from the failure reason.
var (
reasonError = channeldb.FailureReasonError
reasonIncorrectDetails = channeldb.FailureReasonPaymentDetails
)
// pairResult contains the result of the interpretation of a payment attempt for
// a specific node pair.
type pairResult struct {
// amt is the amount that was forwarded for this pair. Can be set to
// zero for failures that are amount independent.
amt lnwire.MilliSatoshi
// success indicates whether the payment attempt was successful through
// this pair.
success bool
}
// failPairResult creates a new result struct for a failure.
func failPairResult(minPenalizeAmt lnwire.MilliSatoshi) pairResult {
return pairResult{
amt: minPenalizeAmt,
}
}
// newSuccessPairResult creates a new result struct for a success.
func successPairResult(successAmt lnwire.MilliSatoshi) pairResult {
return pairResult{
success: true,
amt: successAmt,
}
}
// String returns the human-readable representation of a pair result.
func (p pairResult) String() string {
var resultType string
if p.success {
resultType = "success"
} else {
resultType = "failed"
}
return fmt.Sprintf("%v (amt=%v)", resultType, p.amt)
}
// interpretedResult contains the result of the interpretation of a payment
// attempt.
type interpretedResult struct {
// nodeFailure points to a node pubkey if all channels of that node are
// responsible for the result.
nodeFailure *route.Vertex
// pairResults contains a map of node pairs for which we have a result.
pairResults map[DirectedNodePair]pairResult
// finalFailureReason is set to a non-nil value if it makes no more
// sense to start another payment attempt. It will contain the reason
// why.
finalFailureReason *channeldb.FailureReason
// policyFailure is set to a node pair if there is a policy failure on
// that connection. This is used to control the second chance logic for
// policy failures.
policyFailure *DirectedNodePair
}
// interpretResult interprets a payment outcome and returns an object that
// contains information required to update mission control.
func interpretResult(rt *route.Route, success bool, failureSrcIdx *int,
failure lnwire.FailureMessage) *interpretedResult {
i := &interpretedResult{
pairResults: make(map[DirectedNodePair]pairResult),
}
if success {
i.processSuccess(rt)
} else {
i.processFail(rt, failureSrcIdx, failure)
}
return i
}
// processSuccess processes a successful payment attempt.
func (i *interpretedResult) processSuccess(route *route.Route) {
// For successes, all nodes must have acted in the right way. Therefore
// we mark all of them with a success result.
i.successPairRange(route, 0, len(route.Hops)-1)
}
// processFail processes a failed payment attempt.
func (i *interpretedResult) processFail(
rt *route.Route, errSourceIdx *int,
failure lnwire.FailureMessage) {
if errSourceIdx == nil {
i.processPaymentOutcomeUnknown(rt)
return
}
// If the payment was to a blinded route and we received an error from
// after the introduction point, handle this error separately - there
// has been a protocol violation from the introduction node. This
// penalty applies regardless of the error code that is returned.
introIdx, isBlinded := introductionPointIndex(rt)
if isBlinded && introIdx < *errSourceIdx {
i.processPaymentOutcomeBadIntro(rt, introIdx, *errSourceIdx)
return
}
switch *errSourceIdx {
// We are the source of the failure.
case 0:
i.processPaymentOutcomeSelf(rt, failure)
// A failure from the final hop was received.
case len(rt.Hops):
i.processPaymentOutcomeFinal(
rt, failure,
)
// An intermediate hop failed. Interpret the outcome, update reputation
// and try again.
default:
i.processPaymentOutcomeIntermediate(
rt, *errSourceIdx, failure,
)
}
}
// processPaymentOutcomeBadIntro handles the case where we have made payment
// to a blinded route, but received an error from a node after the introduction
// node. This indicates that the introduction node is not obeying the route
// blinding specification, as we expect all errors from the introduction node
// to be source from it.
func (i *interpretedResult) processPaymentOutcomeBadIntro(route *route.Route,
introIdx, errSourceIdx int) {
// We fail the introduction node for not obeying the specification.
i.failNode(route, introIdx)
// Other preceding channels in the route forwarded correctly. Note
// that we do not assign success to the incoming link to the
// introduction node because it has not handled the error correctly.
if introIdx > 1 {
i.successPairRange(route, 0, introIdx-2)
}
// If the source of the failure was from the final node, we also set
// a final failure reason because the recipient can't process the
// payment (independent of the introduction failing to convert the
// error, we can't complete the payment if the last hop fails).
if errSourceIdx == len(route.Hops) {
i.finalFailureReason = &reasonError
}
}
// processPaymentOutcomeSelf handles failures sent by ourselves.
func (i *interpretedResult) processPaymentOutcomeSelf(
rt *route.Route, failure lnwire.FailureMessage) {
switch failure.(type) {
// We receive a malformed htlc failure from our peer. We trust ourselves
// to send the correct htlc, so our peer must be at fault.
case *lnwire.FailInvalidOnionVersion,
*lnwire.FailInvalidOnionHmac,
*lnwire.FailInvalidOnionKey:
i.failNode(rt, 1)
// If this was a payment to a direct peer, we can stop trying.
if len(rt.Hops) == 1 {
i.finalFailureReason = &reasonError
}
// Any other failure originating from ourselves should be temporary and
// caused by changing conditions between path finding and execution of
// the payment. We just retry and trust that the information locally
// available in the link has been updated.
default:
log.Warnf("Routing failure for local channel %v occurred",
rt.Hops[0].ChannelID)
}
}
// processPaymentOutcomeFinal handles failures sent by the final hop.
func (i *interpretedResult) processPaymentOutcomeFinal(
route *route.Route, failure lnwire.FailureMessage) {
n := len(route.Hops)
failNode := func() {
i.failNode(route, n)
// Other channels in the route forwarded correctly.
if n > 1 {
i.successPairRange(route, 0, n-2)
}
i.finalFailureReason = &reasonError
}
// If a failure from the final node is received, we will fail the
// payment in almost all cases. Only when the penultimate node sends an
// incorrect htlc, we want to retry via another route. Invalid onion
// failures are not expected, because the final node wouldn't be able to
// encrypt that failure.
switch failure.(type) {
// Expiry or amount of the HTLC doesn't match the onion, try another
// route.
case *lnwire.FailFinalIncorrectCltvExpiry,
*lnwire.FailFinalIncorrectHtlcAmount:
// We trust ourselves. If this is a direct payment, we penalize
// the final node and fail the payment.
if n == 1 {
i.failNode(route, n)
i.finalFailureReason = &reasonError
return
}
// Otherwise penalize the last pair of the route and retry.
// Either the final node is at fault, or it gets sent a bad htlc
// from its predecessor.
i.failPair(route, n-1)
// The other hops relayed correctly, so assign those pairs a
// success result. At this point, n >= 2.
i.successPairRange(route, 0, n-2)
// We are using wrong payment hash or amount, fail the payment.
case *lnwire.FailIncorrectPaymentAmount,
*lnwire.FailIncorrectDetails:
// Assign all pairs a success result, as the payment reached the
// destination correctly.
i.successPairRange(route, 0, n-1)
i.finalFailureReason = &reasonIncorrectDetails
// The HTLC that was extended to the final hop expires too soon. Fail
// the payment, because we may be using the wrong final cltv delta.
case *lnwire.FailFinalExpiryTooSoon:
// TODO(roasbeef): can happen to to race condition, try again
// with recent block height
// TODO(joostjager): can also happen because a node delayed
// deliberately. What to penalize?
i.finalFailureReason = &reasonIncorrectDetails
case *lnwire.FailMPPTimeout:
// Assign all pairs a success result, as the payment reached the
// destination correctly. Continue the payment process.
i.successPairRange(route, 0, n-1)
// We do not expect to receive an invalid blinding error from the final
// node in the route. This could erroneously happen in the following
// cases:
// 1. Unblinded node: misuses the error code.
// 2. A receiving introduction node: erroneously sends the error code,
// as the spec indicates that receiving introduction nodes should
// use regular errors.
//
// Note that we expect the case where this error is sent from a node
// after the introduction node to be handled elsewhere as this is part
// of a more general class of errors where the introduction node has
// failed to convert errors for the blinded route.
case *lnwire.FailInvalidBlinding:
failNode()
// All other errors are considered terminal if coming from the
// final hop. They indicate that something is wrong at the
// recipient, so we do apply a penalty.
default:
failNode()
}
}
// processPaymentOutcomeIntermediate handles failures sent by an intermediate
// hop.
func (i *interpretedResult) processPaymentOutcomeIntermediate(
route *route.Route, errorSourceIdx int,
failure lnwire.FailureMessage) {
reportOutgoing := func() {
i.failPair(
route, errorSourceIdx,
)
}
reportOutgoingBalance := func() {
i.failPairBalance(
route, errorSourceIdx,
)
// All nodes up to the failing pair must have forwarded
// successfully.
i.successPairRange(route, 0, errorSourceIdx-1)
}
reportIncoming := func() {
// We trust ourselves. If the error comes from the first hop, we
// can penalize the whole node. In that case there is no
// uncertainty as to which node to blame.
if errorSourceIdx == 1 {
i.failNode(route, errorSourceIdx)
return
}
// Otherwise report the incoming pair.
i.failPair(
route, errorSourceIdx-1,
)
// All nodes up to the failing pair must have forwarded
// successfully.
if errorSourceIdx > 1 {
i.successPairRange(route, 0, errorSourceIdx-2)
}
}
reportNode := func() {
// Fail only the node that reported the failure.
i.failNode(route, errorSourceIdx)
// Other preceding channels in the route forwarded correctly.
if errorSourceIdx > 1 {
i.successPairRange(route, 0, errorSourceIdx-2)
}
}
reportAll := func() {
// We trust ourselves. If the error comes from the first hop, we
// can penalize the whole node. In that case there is no
// uncertainty as to which node to blame.
if errorSourceIdx == 1 {
i.failNode(route, errorSourceIdx)
return
}
// Otherwise penalize all pairs up to the error source. This
// includes our own outgoing connection.
i.failPairRange(
route, 0, errorSourceIdx-1,
)
}
switch failure.(type) {
// If a node reports onion payload corruption or an invalid version,
// that node may be responsible, but it could also be that it is just
// relaying a malformed htlc failure from it successor. By reporting the
// outgoing channel set, we will surely hit the responsible node. At
// this point, it is not possible that the node's predecessor corrupted
// the onion blob. If the predecessor would have corrupted the payload,
// the error source wouldn't have been able to encrypt this failure
// message for us.
case *lnwire.FailInvalidOnionVersion,
*lnwire.FailInvalidOnionHmac,
*lnwire.FailInvalidOnionKey:
reportOutgoing()
// If InvalidOnionPayload is received, we penalize only the reporting
// node. We know the preceding hop didn't corrupt the onion, since the
// reporting node is able to send the failure. We assume that we
// constructed a valid onion payload and that the failure is most likely
// an unknown required type or a bug in their implementation.
case *lnwire.InvalidOnionPayload:
reportNode()
// If the next hop in the route wasn't known or offline, we'll only
// penalize the channel set which we attempted to route over. This is
// conservative, and it can handle faulty channels between nodes
// properly. Additionally, this guards against routing nodes returning
// errors in order to attempt to black list another node.
case *lnwire.FailUnknownNextPeer:
reportOutgoing()
// Some implementations use this error when the next hop is offline, so we
// do the same as FailUnknownNextPeer and also process the channel update.
case *lnwire.FailChannelDisabled:
// Set the node pair for which a channel update may be out of
// date. The second chance logic uses the policyFailure field.
i.policyFailure = &DirectedNodePair{
From: route.Hops[errorSourceIdx-1].PubKeyBytes,
To: route.Hops[errorSourceIdx].PubKeyBytes,
}
reportOutgoing()
// All nodes up to the failing pair must have forwarded
// successfully.
i.successPairRange(route, 0, errorSourceIdx-1)
// If we get a permanent channel, we'll prune the channel set in both
// directions and continue with the rest of the routes.
case *lnwire.FailPermanentChannelFailure:
reportOutgoing()
// When an HTLC parameter is incorrect, the node sending the error may
// be doing something wrong. But it could also be that its predecessor
// is intentionally modifying the htlc parameters that we instructed it
// via the hop payload. Therefore we penalize the incoming node pair. A
// third cause of this error may be that we have an out of date channel
// update. This is handled by the second chance logic up in mission
// control.
case *lnwire.FailAmountBelowMinimum,
*lnwire.FailFeeInsufficient,
*lnwire.FailIncorrectCltvExpiry:
// Set the node pair for which a channel update may be out of
// date. The second chance logic uses the policyFailure field.
i.policyFailure = &DirectedNodePair{
From: route.Hops[errorSourceIdx-1].PubKeyBytes,
To: route.Hops[errorSourceIdx].PubKeyBytes,
}
// We report incoming channel. If a second pair is granted in
// mission control, this report is ignored.
reportIncoming()
// If the outgoing channel doesn't have enough capacity, we penalize.
// But we penalize only in a single direction and only for amounts
// greater than the attempted amount.
case *lnwire.FailTemporaryChannelFailure:
reportOutgoingBalance()
// If FailExpiryTooSoon is received, there must have been some delay
// along the path. We can't know which node is causing the delay, so we
// penalize all of them up to the error source.
//
// Alternatively it could also be that we ourselves have fallen behind
// somehow. We ignore that case for now.
case *lnwire.FailExpiryTooSoon:
reportAll()
// We only expect to get FailInvalidBlinding from an introduction node
// in a blinded route. The introduction node in a blinded route is
// always responsible for reporting errors for the blinded portion of
// the route (to protect the privacy of the members of the route), so
// we need to be careful not to unfairly "shoot the messenger".
//
// The introduction node has no incentive to falsely report errors to
// sabotage the blinded route because:
// 1. Its ability to route this payment is strictly tied to the
// blinded route.
// 2. The pubkeys in the blinded route are ephemeral, so doing so
// will have no impact on the nodes beyond the individual payment.
//
// Here we handle a few cases where we could unexpectedly receive this
// error:
// 1. Outside of a blinded route: erring node is not spec compliant.
// 2. Before the introduction point: erring node is not spec compliant.
//
// Note that we expect the case where this error is sent from a node
// after the introduction node to be handled elsewhere as this is part
// of a more general class of errors where the introduction node has
// failed to convert errors for the blinded route.
case *lnwire.FailInvalidBlinding:
introIdx, isBlinded := introductionPointIndex(route)
// Deal with cases where a node has incorrectly returned a
// blinding error:
// 1. A node before the introduction point returned it.
// 2. A node in a non-blinded route returned it.
if errorSourceIdx < introIdx || !isBlinded {
reportNode()
return
}
// Otherwise, the error was at the introduction node. All
// nodes up until the introduction node forwarded correctly,
// so we award them as successful.
if introIdx >= 1 {
i.successPairRange(route, 0, introIdx-1)
}
// If the hop after the introduction node that sent us an
// error is the final recipient, then we finally fail the
// payment because the receiver has generated a blinded route
// that they're unable to use. We have this special case so
// that we don't penalize the introduction node, and there is
// no point in retrying the payment while LND only supports
// one blinded route per payment.
//
// Note that if LND is extended to support multiple blinded
// routes, this will terminate the payment without re-trying
// the other routes.
if introIdx == len(route.Hops)-1 {
i.finalFailureReason = &reasonError
} else {
// If there are other hops between the recipient and
// introduction node, then we just penalize the last
// hop in the blinded route to minimize the storage of
// results for ephemeral keys.
i.failPairBalance(
route, len(route.Hops)-1,
)
}
// In all other cases, we penalize the reporting node. These are all
// failures that should not happen.
default:
i.failNode(route, errorSourceIdx)
}
}
// introductionPointIndex returns the index of an introduction point in a
// route, using the same indexing in the route that we use for errorSourceIdx
// (i.e., that we consider our own node to be at index zero). A boolean is
// returned to indicate whether the route contains a blinded portion at all.
func introductionPointIndex(route *route.Route) (int, bool) {
for i, hop := range route.Hops {
if hop.BlindingPoint != nil {
return i + 1, true
}
}
return 0, false
}
// processPaymentOutcomeUnknown processes a payment outcome for which no failure
// message or source is available.
func (i *interpretedResult) processPaymentOutcomeUnknown(route *route.Route) {
n := len(route.Hops)
// If this is a direct payment, the destination must be at fault.
if n == 1 {
i.failNode(route, n)
i.finalFailureReason = &reasonError
return
}
// Otherwise penalize all channels in the route to make sure the
// responsible node is at least hit too. We even penalize the connection
// to our own peer, because that peer could also be responsible.
i.failPairRange(route, 0, n-1)
}
// failNode marks the node indicated by idx in the route as failed. It also
// marks the incoming and outgoing channels of the node as failed. This function
// intentionally panics when the self node is failed.
func (i *interpretedResult) failNode(rt *route.Route, idx int) {
// Mark the node as failing.
i.nodeFailure = &rt.Hops[idx-1].PubKeyBytes
// Mark the incoming connection as failed for the node. We intent to
// penalize as much as we can for a node level failure, including future
// outgoing traffic for this connection. The pair as it is returned by
// getPair is penalized in the original and the reversed direction. Note
// that this will also affect the score of the failing node's peers.
// This is necessary to prevent future routes from keep going into the
// same node again.
incomingChannelIdx := idx - 1
inPair, _ := getPair(rt, incomingChannelIdx)
i.pairResults[inPair] = failPairResult(0)
i.pairResults[inPair.Reverse()] = failPairResult(0)
// If not the ultimate node, mark the outgoing connection as failed for
// the node.
if idx < len(rt.Hops) {
outgoingChannelIdx := idx
outPair, _ := getPair(rt, outgoingChannelIdx)
i.pairResults[outPair] = failPairResult(0)
i.pairResults[outPair.Reverse()] = failPairResult(0)
}
}
// failPairRange marks the node pairs from node fromIdx to node toIdx as failed
// in both direction.
func (i *interpretedResult) failPairRange(
rt *route.Route, fromIdx, toIdx int) {
for idx := fromIdx; idx <= toIdx; idx++ {
i.failPair(rt, idx)
}
}
// failPair marks a pair as failed in both directions.
func (i *interpretedResult) failPair(
rt *route.Route, idx int) {
pair, _ := getPair(rt, idx)
// Report pair in both directions without a minimum penalization amount.
i.pairResults[pair] = failPairResult(0)
i.pairResults[pair.Reverse()] = failPairResult(0)
}
// failPairBalance marks a pair as failed with a minimum penalization amount.
func (i *interpretedResult) failPairBalance(
rt *route.Route, channelIdx int) {
pair, amt := getPair(rt, channelIdx)
i.pairResults[pair] = failPairResult(amt)
}
// successPairRange marks the node pairs from node fromIdx to node toIdx as
// succeeded.
func (i *interpretedResult) successPairRange(
rt *route.Route, fromIdx, toIdx int) {
for idx := fromIdx; idx <= toIdx; idx++ {
pair, amt := getPair(rt, idx)
i.pairResults[pair] = successPairResult(amt)
}
}
// getPair returns a node pair from the route and the amount passed between that
// pair.
func getPair(rt *route.Route, channelIdx int) (DirectedNodePair,
lnwire.MilliSatoshi) {
nodeTo := rt.Hops[channelIdx].PubKeyBytes
var (
nodeFrom route.Vertex
amt lnwire.MilliSatoshi
)
if channelIdx == 0 {
nodeFrom = rt.SourcePubKey
amt = rt.TotalAmount
} else {
nodeFrom = rt.Hops[channelIdx-1].PubKeyBytes
amt = rt.Hops[channelIdx-1].AmtToForward
}
pair := NewDirectedNodePair(nodeFrom, nodeTo)
return pair, amt
}