2017-10-17 03:57:30 +02:00
|
|
|
package routing
|
|
|
|
|
|
|
|
import (
|
2019-03-19 17:09:27 +01:00
|
|
|
"math"
|
2017-10-17 03:57:30 +02:00
|
|
|
"sync"
|
|
|
|
"time"
|
2017-10-18 04:41:46 +02:00
|
|
|
|
2019-06-26 13:00:35 +02:00
|
|
|
"github.com/coreos/bbolt"
|
2019-06-26 09:49:16 +02:00
|
|
|
"github.com/lightningnetwork/lnd/channeldb"
|
2018-03-27 05:53:46 +02:00
|
|
|
"github.com/lightningnetwork/lnd/lnwire"
|
2019-04-05 17:36:11 +02:00
|
|
|
"github.com/lightningnetwork/lnd/routing/route"
|
2017-10-17 03:57:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2019-05-22 11:56:04 +02:00
|
|
|
// DefaultPenaltyHalfLife is the default half-life duration. The
|
2019-03-19 17:09:27 +01:00
|
|
|
// half-life duration defines after how much time a penalized node or
|
|
|
|
// channel is back at 50% probability.
|
2019-05-22 11:56:04 +02:00
|
|
|
DefaultPenaltyHalfLife = time.Hour
|
2019-06-26 08:39:34 +02:00
|
|
|
|
|
|
|
// minSecondChanceInterval is the minimum time required between
|
|
|
|
// second-chance failures.
|
|
|
|
//
|
|
|
|
// If nodes return a channel policy related failure, they may get a
|
|
|
|
// second chance to forward the payment. It could be that the channel
|
|
|
|
// policy that we are aware of is not up to date. This is especially
|
|
|
|
// important in case of mobile apps that are mostly offline.
|
|
|
|
//
|
|
|
|
// However, we don't want to give nodes the option to endlessly return
|
|
|
|
// new channel updates so that we are kept busy trying to route through
|
|
|
|
// that node until the payment loop times out.
|
|
|
|
//
|
|
|
|
// Therefore we only grant a second chance to a node if the previous
|
|
|
|
// second chance is sufficiently long ago. This is what
|
|
|
|
// minSecondChanceInterval defines. If a second policy failure comes in
|
|
|
|
// within that interval, we will apply a penalty.
|
|
|
|
//
|
|
|
|
// Second chances granted are tracked on the level of node pairs. This
|
|
|
|
// means that if a node has multiple channels to the same peer, they
|
|
|
|
// will only get a single second chance to route to that peer again.
|
|
|
|
// Nodes forward non-strict, so it isn't necessary to apply a less
|
|
|
|
// restrictive channel level tracking scheme here.
|
|
|
|
minSecondChanceInterval = time.Minute
|
2019-06-26 13:00:35 +02:00
|
|
|
|
|
|
|
// DefaultMaxMcHistory is the default maximum history size.
|
|
|
|
DefaultMaxMcHistory = 1000
|
2017-10-17 03:57:30 +02:00
|
|
|
)
|
|
|
|
|
2019-05-23 20:05:30 +02:00
|
|
|
// MissionControl contains state which summarizes the past attempts of HTLC
|
2019-03-19 17:09:27 +01:00
|
|
|
// routing by external callers when sending payments throughout the network. It
|
|
|
|
// acts as a shared memory during routing attempts with the goal to optimize the
|
|
|
|
// payment attempt success rate.
|
|
|
|
//
|
|
|
|
// Failed payment attempts are reported to mission control. These reports are
|
|
|
|
// used to track the time of the last node or channel level failure. The time
|
|
|
|
// since the last failure is used to estimate a success probability that is fed
|
|
|
|
// into the path finding process for subsequent payment attempts.
|
2019-05-23 20:05:30 +02:00
|
|
|
type MissionControl struct {
|
2019-07-29 15:10:58 +02:00
|
|
|
// lastPairFailure tracks the last payment failure per node pair.
|
|
|
|
lastPairFailure map[DirectedNodePair]pairFailure
|
|
|
|
|
|
|
|
// lastNodeFailure tracks the last node level failure per node.
|
|
|
|
lastNodeFailure map[route.Vertex]time.Time
|
2017-10-17 03:57:30 +02:00
|
|
|
|
2019-06-26 08:39:34 +02:00
|
|
|
// lastSecondChance tracks the last time a second chance was granted for
|
|
|
|
// a directed node pair.
|
|
|
|
lastSecondChance map[DirectedNodePair]time.Time
|
|
|
|
|
2019-03-19 17:09:27 +01:00
|
|
|
// now is expected to return the current time. It is supplied as an
|
|
|
|
// external function to enable deterministic unit tests.
|
|
|
|
now func() time.Time
|
|
|
|
|
2019-05-22 11:56:04 +02:00
|
|
|
cfg *MissionControlConfig
|
2019-03-19 17:09:27 +01:00
|
|
|
|
2019-06-26 13:00:35 +02:00
|
|
|
store *missionControlStore
|
|
|
|
|
2017-10-17 03:57:30 +02:00
|
|
|
sync.Mutex
|
|
|
|
|
|
|
|
// TODO(roasbeef): further counters, if vertex continually unavailable,
|
|
|
|
// add to another generation
|
|
|
|
|
|
|
|
// TODO(roasbeef): also add favorable metrics for nodes
|
|
|
|
}
|
|
|
|
|
2019-05-22 11:56:04 +02:00
|
|
|
// MissionControlConfig defines parameters that control mission control
|
|
|
|
// behaviour.
|
|
|
|
type MissionControlConfig struct {
|
|
|
|
// PenaltyHalfLife defines after how much time a penalized node or
|
|
|
|
// channel is back at 50% probability.
|
|
|
|
PenaltyHalfLife time.Duration
|
|
|
|
|
|
|
|
// AprioriHopProbability is the assumed success probability of a hop in
|
|
|
|
// a route when no other information is available.
|
|
|
|
AprioriHopProbability float64
|
2019-06-26 13:00:35 +02:00
|
|
|
|
|
|
|
// MaxMcHistory defines the maximum number of payment results that are
|
|
|
|
// held on disk.
|
|
|
|
MaxMcHistory int
|
2019-05-22 11:56:04 +02:00
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// pairFailure describes a payment failure for a node pair.
|
|
|
|
type pairFailure struct {
|
|
|
|
// timestamp is the time when this failure result was obtained.
|
|
|
|
timestamp time.Time
|
2017-10-18 04:41:46 +02:00
|
|
|
|
2019-03-19 17:09:27 +01:00
|
|
|
// minPenalizeAmt is the minimum amount for which to take this failure
|
|
|
|
// into account.
|
|
|
|
minPenalizeAmt lnwire.MilliSatoshi
|
2017-10-18 04:41:46 +02:00
|
|
|
}
|
|
|
|
|
2019-05-10 10:38:31 +02:00
|
|
|
// MissionControlSnapshot contains a snapshot of the current state of mission
|
|
|
|
// control.
|
|
|
|
type MissionControlSnapshot struct {
|
|
|
|
// Nodes contains the per node information of this snapshot.
|
|
|
|
Nodes []MissionControlNodeSnapshot
|
2019-07-29 15:10:58 +02:00
|
|
|
|
|
|
|
// Pairs is a list of channels for which specific information is
|
|
|
|
// logged.
|
|
|
|
Pairs []MissionControlPairSnapshot
|
2019-05-10 10:38:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// MissionControlNodeSnapshot contains a snapshot of the current node state in
|
|
|
|
// mission control.
|
|
|
|
type MissionControlNodeSnapshot struct {
|
|
|
|
// Node pubkey.
|
|
|
|
Node route.Vertex
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// LastFail is the time of last failure.
|
|
|
|
LastFail time.Time
|
2019-05-10 10:38:31 +02:00
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// OtherSuccessProb is the success probability for pairs not in
|
|
|
|
// the Pairs slice.
|
|
|
|
OtherSuccessProb float64
|
2019-05-10 10:38:31 +02:00
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// MissionControlPairSnapshot contains a snapshot of the current node pair
|
2019-05-10 10:38:31 +02:00
|
|
|
// state in mission control.
|
2019-07-29 15:10:58 +02:00
|
|
|
type MissionControlPairSnapshot struct {
|
|
|
|
// Pair is the node pair of which the state is described.
|
|
|
|
Pair DirectedNodePair
|
2019-05-10 10:38:31 +02:00
|
|
|
|
|
|
|
// LastFail is the time of last failure.
|
|
|
|
LastFail time.Time
|
|
|
|
|
|
|
|
// MinPenalizeAmt is the minimum amount for which the channel will be
|
|
|
|
// penalized.
|
|
|
|
MinPenalizeAmt lnwire.MilliSatoshi
|
|
|
|
|
|
|
|
// SuccessProb is the success probability estimation for this channel.
|
|
|
|
SuccessProb float64
|
|
|
|
}
|
|
|
|
|
2019-06-26 12:25:23 +02:00
|
|
|
// paymentResult is the information that becomes available when a payment
|
|
|
|
// attempt completes.
|
|
|
|
type paymentResult struct {
|
|
|
|
id uint64
|
|
|
|
timeFwd, timeReply time.Time
|
|
|
|
route *route.Route
|
|
|
|
success bool
|
|
|
|
failureSourceIdx *int
|
|
|
|
failure lnwire.FailureMessage
|
|
|
|
}
|
|
|
|
|
2019-03-19 17:09:27 +01:00
|
|
|
// NewMissionControl returns a new instance of missionControl.
|
2019-06-26 13:00:35 +02:00
|
|
|
func NewMissionControl(db *bbolt.DB, cfg *MissionControlConfig) (
|
|
|
|
*MissionControl, error) {
|
|
|
|
|
2019-05-22 11:56:04 +02:00
|
|
|
log.Debugf("Instantiating mission control with config: "+
|
2019-06-18 18:30:56 +02:00
|
|
|
"PenaltyHalfLife=%v, AprioriHopProbability=%v",
|
|
|
|
cfg.PenaltyHalfLife, cfg.AprioriHopProbability)
|
2017-10-17 03:57:30 +02:00
|
|
|
|
2019-06-26 13:00:35 +02:00
|
|
|
store, err := newMissionControlStore(db, cfg.MaxMcHistory)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
mc := &MissionControl{
|
2019-07-29 15:10:58 +02:00
|
|
|
lastPairFailure: make(map[DirectedNodePair]pairFailure),
|
|
|
|
lastNodeFailure: make(map[route.Vertex]time.Time),
|
2019-06-26 08:39:34 +02:00
|
|
|
lastSecondChance: make(map[DirectedNodePair]time.Time),
|
|
|
|
now: time.Now,
|
|
|
|
cfg: cfg,
|
2019-06-26 13:00:35 +02:00
|
|
|
store: store,
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := mc.init(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return mc, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// init initializes mission control with historical data.
|
|
|
|
func (m *MissionControl) init() error {
|
|
|
|
log.Debugf("Mission control state reconstruction started")
|
|
|
|
|
|
|
|
start := time.Now()
|
|
|
|
|
|
|
|
results, err := m.store.fetchAll()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, result := range results {
|
|
|
|
m.applyPaymentResult(result)
|
2019-05-23 20:05:29 +02:00
|
|
|
}
|
2019-06-26 13:00:35 +02:00
|
|
|
|
|
|
|
log.Debugf("Mission control state reconstruction finished: "+
|
|
|
|
"n=%v, time=%v", len(results), time.Now().Sub(start))
|
|
|
|
|
|
|
|
return nil
|
2019-05-23 20:05:29 +02:00
|
|
|
}
|
|
|
|
|
2019-05-23 20:05:30 +02:00
|
|
|
// ResetHistory resets the history of MissionControl returning it to a state as
|
2017-10-17 03:57:30 +02:00
|
|
|
// if no payment attempts have been made.
|
2019-06-26 13:00:35 +02:00
|
|
|
func (m *MissionControl) ResetHistory() error {
|
2017-10-17 03:57:30 +02:00
|
|
|
m.Lock()
|
2019-03-19 17:09:27 +01:00
|
|
|
defer m.Unlock()
|
|
|
|
|
2019-06-26 13:00:35 +02:00
|
|
|
if err := m.store.clear(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
m.lastPairFailure = make(map[DirectedNodePair]pairFailure)
|
|
|
|
m.lastNodeFailure = make(map[route.Vertex]time.Time)
|
2019-06-26 08:39:34 +02:00
|
|
|
m.lastSecondChance = make(map[DirectedNodePair]time.Time)
|
2019-03-19 17:09:27 +01:00
|
|
|
|
|
|
|
log.Debugf("Mission control history cleared")
|
2019-06-26 13:00:35 +02:00
|
|
|
|
|
|
|
return nil
|
2019-03-19 17:09:27 +01:00
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// GetProbability is expected to return the success probability of a payment
|
2019-03-19 17:09:27 +01:00
|
|
|
// from fromNode along edge.
|
2019-07-29 15:10:58 +02:00
|
|
|
func (m *MissionControl) GetProbability(fromNode, toNode route.Vertex,
|
|
|
|
amt lnwire.MilliSatoshi) float64 {
|
2019-03-19 17:09:27 +01:00
|
|
|
|
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
return m.getPairProbability(fromNode, toNode, amt)
|
2019-03-19 17:09:27 +01:00
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// getProbAfterFail returns a probability estimate based on a last failure time.
|
|
|
|
func (m *MissionControl) getProbAfterFail(lastFailure time.Time) float64 {
|
|
|
|
if lastFailure.IsZero() {
|
2019-05-22 11:56:04 +02:00
|
|
|
return m.cfg.AprioriHopProbability
|
2019-03-19 17:09:27 +01:00
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
timeSinceLastFailure := m.now().Sub(lastFailure)
|
2019-03-19 17:09:27 +01:00
|
|
|
|
|
|
|
// Calculate success probability. It is an exponential curve that brings
|
|
|
|
// the probability down to zero when a failure occurs. From there it
|
|
|
|
// recovers asymptotically back to the a priori probability. The rate at
|
|
|
|
// which this happens is controlled by the penaltyHalfLife parameter.
|
2019-05-22 11:56:04 +02:00
|
|
|
exp := -timeSinceLastFailure.Hours() / m.cfg.PenaltyHalfLife.Hours()
|
|
|
|
probability := m.cfg.AprioriHopProbability * (1 - math.Pow(2, exp))
|
2019-03-19 17:09:27 +01:00
|
|
|
|
|
|
|
return probability
|
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// getPairProbability estimates the probability of successfully
|
|
|
|
// traversing from fromNode to toNode based on historical payment outcomes.
|
|
|
|
func (m *MissionControl) getPairProbability(fromNode,
|
|
|
|
toNode route.Vertex, amt lnwire.MilliSatoshi) float64 {
|
|
|
|
|
|
|
|
// Start by getting the last node level failure. A node failure is
|
|
|
|
// considered a failure that would have affected every edge. Therefore
|
|
|
|
// we insert a node level failure into the history of every channel. If
|
|
|
|
// there is none, lastFail will be zero.
|
|
|
|
lastFail := m.lastNodeFailure[fromNode]
|
|
|
|
|
|
|
|
// Retrieve the last pair outcome.
|
|
|
|
pair := NewDirectedNodePair(fromNode, toNode)
|
|
|
|
lastPairResult, ok := m.lastPairFailure[pair]
|
|
|
|
|
|
|
|
// Only look at the last pair outcome if it happened after the last node
|
|
|
|
// level failure. Otherwise the node level failure is the most recent
|
|
|
|
// and used as the basis for calculation of the probability.
|
|
|
|
if ok && lastPairResult.timestamp.After(lastFail) {
|
|
|
|
// Take into account a minimum penalize amount. For balance
|
|
|
|
// errors, a failure may be reported with such a minimum to
|
|
|
|
// prevent too aggresive penalization. We only take into account
|
|
|
|
// a previous failure if the amount that we currently get the
|
|
|
|
// probability for is greater or equal than the minPenalizeAmt
|
|
|
|
// of the previous failure.
|
|
|
|
if amt >= lastPairResult.minPenalizeAmt {
|
|
|
|
lastFail = lastPairResult.timestamp
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return m.getProbAfterFail(lastFail)
|
|
|
|
}
|
|
|
|
|
2019-06-26 08:39:34 +02:00
|
|
|
// requestSecondChance checks whether the node fromNode can have a second chance
|
|
|
|
// at providing a channel update for its channel with toNode.
|
|
|
|
func (m *MissionControl) requestSecondChance(timestamp time.Time,
|
|
|
|
fromNode, toNode route.Vertex) bool {
|
|
|
|
|
|
|
|
// Look up previous second chance time.
|
|
|
|
pair := DirectedNodePair{
|
|
|
|
From: fromNode,
|
|
|
|
To: toNode,
|
|
|
|
}
|
|
|
|
lastSecondChance, ok := m.lastSecondChance[pair]
|
|
|
|
|
|
|
|
// If the channel hasn't already be given a second chance or its last
|
|
|
|
// second chance was long ago, we give it another chance.
|
|
|
|
if !ok || timestamp.Sub(lastSecondChance) > minSecondChanceInterval {
|
|
|
|
m.lastSecondChance[pair] = timestamp
|
|
|
|
|
|
|
|
log.Debugf("Second chance granted for %v->%v", fromNode, toNode)
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise penalize the channel, because we don't allow channel
|
|
|
|
// updates that are that frequent. This is to prevent nodes from keeping
|
|
|
|
// us busy by continuously sending new channel updates.
|
|
|
|
log.Debugf("Second chance denied for %v->%v, remaining interval: %v",
|
|
|
|
fromNode, toNode, timestamp.Sub(lastSecondChance))
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-06-26 09:49:16 +02:00
|
|
|
// reportVertexFailure reports a node level failure.
|
2019-07-02 11:29:44 +02:00
|
|
|
func (m *MissionControl) reportVertexFailure(timestamp time.Time,
|
|
|
|
v route.Vertex) {
|
2019-03-19 17:09:27 +01:00
|
|
|
|
2019-07-02 11:29:44 +02:00
|
|
|
log.Debugf("Reporting vertex %v failure to Mission Control", v)
|
2019-03-19 17:09:27 +01:00
|
|
|
|
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
m.lastNodeFailure[v] = timestamp
|
2019-03-19 17:09:27 +01:00
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// reportPairPolicyFailure reports a policy related failure.
|
|
|
|
func (m *MissionControl) reportPairPolicyFailure(timestamp time.Time,
|
|
|
|
failedPair DirectedNodePair) {
|
2019-06-26 08:39:34 +02:00
|
|
|
|
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
|
|
|
// We may have an out of date graph. Therefore we don't always penalize
|
|
|
|
// immediately. If some time has passed since the last policy failure,
|
|
|
|
// we grant the node a second chance at forwarding the payment.
|
|
|
|
if m.requestSecondChance(
|
2019-07-29 15:10:58 +02:00
|
|
|
timestamp, failedPair.From, failedPair.To,
|
2019-06-26 08:39:34 +02:00
|
|
|
) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
m.lastNodeFailure[failedPair.From] = timestamp
|
2019-06-26 08:39:34 +02:00
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// reportPairFailure reports a pair level failure.
|
2019-03-19 17:09:27 +01:00
|
|
|
//
|
|
|
|
// TODO(roasbeef): also add value attempted to send and capacity of channel
|
2019-07-29 15:10:58 +02:00
|
|
|
func (m *MissionControl) reportPairFailure(timestamp time.Time,
|
|
|
|
failedPair DirectedNodePair, minPenalizeAmt lnwire.MilliSatoshi) {
|
2019-03-19 17:09:27 +01:00
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
log.Debugf("Reporting pair %v failure to Mission Control", failedPair)
|
2019-03-19 17:09:27 +01:00
|
|
|
|
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
pair := NewDirectedNodePair(failedPair.From, failedPair.To)
|
|
|
|
m.lastPairFailure[pair] = pairFailure{
|
2019-03-19 17:09:27 +01:00
|
|
|
minPenalizeAmt: minPenalizeAmt,
|
2019-07-29 15:10:58 +02:00
|
|
|
timestamp: timestamp,
|
2019-03-19 17:09:27 +01:00
|
|
|
}
|
2017-10-17 03:57:30 +02:00
|
|
|
}
|
2019-05-10 10:38:31 +02:00
|
|
|
|
|
|
|
// GetHistorySnapshot takes a snapshot from the current mission control state
|
|
|
|
// and actual probability estimates.
|
|
|
|
func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot {
|
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
|
|
|
log.Debugf("Requesting history snapshot from mission control: "+
|
2019-07-29 15:10:58 +02:00
|
|
|
"node_failure_count=%v, pair_result_count=%v",
|
|
|
|
len(m.lastNodeFailure), len(m.lastPairFailure))
|
|
|
|
|
|
|
|
nodes := make([]MissionControlNodeSnapshot, 0, len(m.lastNodeFailure))
|
|
|
|
for v, h := range m.lastNodeFailure {
|
|
|
|
otherProb := m.getPairProbability(v, route.Vertex{}, 0)
|
|
|
|
|
|
|
|
nodes = append(nodes, MissionControlNodeSnapshot{
|
|
|
|
Node: v,
|
|
|
|
LastFail: h,
|
|
|
|
OtherSuccessProb: otherProb,
|
|
|
|
})
|
|
|
|
}
|
2019-05-10 10:38:31 +02:00
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
pairs := make([]MissionControlPairSnapshot, 0, len(m.lastPairFailure))
|
2019-05-10 10:38:31 +02:00
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
for v, h := range m.lastPairFailure {
|
|
|
|
// Show probability assuming amount meets min
|
|
|
|
// penalization amount.
|
|
|
|
prob := m.getPairProbability(v.From, v.To, h.minPenalizeAmt)
|
2019-05-10 10:38:31 +02:00
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
pair := MissionControlPairSnapshot{
|
|
|
|
Pair: v,
|
|
|
|
MinPenalizeAmt: h.minPenalizeAmt,
|
|
|
|
LastFail: h.timestamp,
|
|
|
|
SuccessProb: prob,
|
2019-05-10 10:38:31 +02:00
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
pairs = append(pairs, pair)
|
2019-05-10 10:38:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
snapshot := MissionControlSnapshot{
|
|
|
|
Nodes: nodes,
|
2019-07-29 15:10:58 +02:00
|
|
|
Pairs: pairs,
|
2019-05-10 10:38:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return &snapshot
|
|
|
|
}
|
2019-06-26 09:49:16 +02:00
|
|
|
|
|
|
|
// ReportPaymentFail reports a failed payment to mission control as input for
|
2019-06-26 11:34:25 +02:00
|
|
|
// future probability estimates. The failureSourceIdx argument indicates the
|
|
|
|
// failure source. If it is nil, the failure source is unknown. This function
|
2019-08-05 12:13:58 +02:00
|
|
|
// returns a reason if this failure is a final failure. In that case no further
|
|
|
|
// payment attempts need to be made.
|
2019-06-26 11:48:59 +02:00
|
|
|
func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route,
|
2019-08-05 12:13:58 +02:00
|
|
|
failureSourceIdx *int, failure lnwire.FailureMessage) (
|
|
|
|
*channeldb.FailureReason, error) {
|
2019-06-26 09:49:16 +02:00
|
|
|
|
2019-07-02 11:29:44 +02:00
|
|
|
timestamp := m.now()
|
|
|
|
|
2019-06-26 12:25:23 +02:00
|
|
|
// TODO(joostjager): Use actual payment initiation time for timeFwd.
|
|
|
|
result := &paymentResult{
|
|
|
|
success: false,
|
|
|
|
timeFwd: timestamp,
|
|
|
|
timeReply: timestamp,
|
|
|
|
id: paymentID,
|
|
|
|
failureSourceIdx: failureSourceIdx,
|
|
|
|
failure: failure,
|
|
|
|
route: rt,
|
|
|
|
}
|
|
|
|
|
2019-06-26 13:00:35 +02:00
|
|
|
// Store complete result in database.
|
|
|
|
if err := m.store.AddResult(result); err != nil {
|
2019-08-05 12:13:58 +02:00
|
|
|
return nil, err
|
2019-06-26 13:00:35 +02:00
|
|
|
}
|
|
|
|
|
2019-07-02 11:29:44 +02:00
|
|
|
// Apply result to update mission control state.
|
2019-06-26 13:00:35 +02:00
|
|
|
final, reason := m.applyPaymentResult(result)
|
|
|
|
|
2019-08-05 12:13:58 +02:00
|
|
|
// Convert final bool and reason to nillable reason.
|
|
|
|
if final {
|
|
|
|
return &reason, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, nil
|
2019-07-02 11:29:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// applyPaymentResult applies a payment result as input for future probability
|
|
|
|
// estimates. It returns a bool indicating whether this error is a final error
|
|
|
|
// and no further payment attempts need to be made.
|
2019-06-26 12:25:23 +02:00
|
|
|
func (m *MissionControl) applyPaymentResult(result *paymentResult) (
|
|
|
|
bool, channeldb.FailureReason) {
|
2019-07-02 11:29:44 +02:00
|
|
|
|
|
|
|
var (
|
|
|
|
failureSourceIdxInt int
|
|
|
|
failure lnwire.FailureMessage
|
|
|
|
)
|
|
|
|
|
2019-06-26 12:25:23 +02:00
|
|
|
if result.failureSourceIdx == nil {
|
2019-06-26 11:34:25 +02:00
|
|
|
// If the failure message could not be decrypted, attribute the
|
|
|
|
// failure to our own outgoing channel.
|
|
|
|
//
|
|
|
|
// TODO(joostager): Penalize all channels in the route.
|
|
|
|
failureSourceIdxInt = 0
|
|
|
|
failure = lnwire.NewTemporaryChannelFailure(nil)
|
|
|
|
} else {
|
2019-06-26 12:25:23 +02:00
|
|
|
failureSourceIdxInt = *result.failureSourceIdx
|
|
|
|
failure = result.failure
|
2019-06-26 11:34:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
var failureVertex route.Vertex
|
2019-06-26 09:49:16 +02:00
|
|
|
|
2019-06-26 11:34:25 +02:00
|
|
|
if failureSourceIdxInt > 0 {
|
2019-06-26 12:25:23 +02:00
|
|
|
failureVertex = result.route.Hops[failureSourceIdxInt-1].PubKeyBytes
|
2019-06-26 09:49:16 +02:00
|
|
|
} else {
|
2019-06-26 12:25:23 +02:00
|
|
|
failureVertex = result.route.SourcePubKey
|
2019-06-26 09:49:16 +02:00
|
|
|
}
|
|
|
|
log.Tracef("Node %x (index %v) reported failure when sending htlc",
|
2019-06-26 12:25:23 +02:00
|
|
|
failureVertex, result.failureSourceIdx)
|
2019-06-26 09:49:16 +02:00
|
|
|
|
2019-06-26 11:34:25 +02:00
|
|
|
// Always determine chan id ourselves, because a channel update with id
|
|
|
|
// may not be available.
|
2019-07-29 15:10:58 +02:00
|
|
|
failedPair, failedAmt := getFailedPair(
|
2019-06-26 12:25:23 +02:00
|
|
|
result.route, failureSourceIdxInt,
|
2019-07-02 11:29:44 +02:00
|
|
|
)
|
2019-06-26 09:49:16 +02:00
|
|
|
|
|
|
|
switch failure.(type) {
|
|
|
|
|
|
|
|
// If the end destination didn't know the payment
|
|
|
|
// hash or we sent the wrong payment amount to the
|
|
|
|
// destination, then we'll terminate immediately.
|
2019-06-12 11:35:54 +02:00
|
|
|
case *lnwire.FailIncorrectDetails:
|
2019-06-26 09:49:16 +02:00
|
|
|
// TODO(joostjager): Check onionErr.Amount() whether it matches
|
|
|
|
// what we expect. (Will it ever not match, because if not
|
|
|
|
// final_incorrect_htlc_amount would be returned?)
|
|
|
|
|
|
|
|
return true, channeldb.FailureReasonIncorrectPaymentDetails
|
|
|
|
|
|
|
|
// If we sent the wrong amount to the destination, then
|
|
|
|
// we'll exit early.
|
|
|
|
case *lnwire.FailIncorrectPaymentAmount:
|
|
|
|
return true, channeldb.FailureReasonIncorrectPaymentDetails
|
|
|
|
|
|
|
|
// If the time-lock that was extended to the final node
|
|
|
|
// was incorrect, then we can't proceed.
|
|
|
|
case *lnwire.FailFinalIncorrectCltvExpiry:
|
|
|
|
// TODO(joostjager): Take into account that second last hop may
|
|
|
|
// have deliberately handed out an htlc that expires too soon.
|
|
|
|
// In that case we should continue routing.
|
|
|
|
return true, channeldb.FailureReasonError
|
|
|
|
|
|
|
|
// If we crafted an invalid onion payload for the final
|
|
|
|
// node, then we'll exit early.
|
|
|
|
case *lnwire.FailFinalIncorrectHtlcAmount:
|
|
|
|
// TODO(joostjager): Take into account that second last hop may
|
|
|
|
// have deliberately handed out an htlc with a too low value. In
|
|
|
|
// that case we should continue routing.
|
|
|
|
|
|
|
|
return true, channeldb.FailureReasonError
|
|
|
|
|
|
|
|
// Similarly, if the HTLC expiry that we extended to
|
|
|
|
// the final hop expires too soon, then will fail the
|
|
|
|
// payment.
|
|
|
|
//
|
|
|
|
// TODO(roasbeef): can happen to to race condition, try
|
|
|
|
// again with recent block height
|
|
|
|
case *lnwire.FailFinalExpiryTooSoon:
|
|
|
|
// TODO(joostjager): Take into account that any hop may have
|
|
|
|
// delayed. Ideally we should continue routing. Knowing the
|
|
|
|
// delaying node at this point would help.
|
|
|
|
return true, channeldb.FailureReasonIncorrectPaymentDetails
|
|
|
|
|
|
|
|
// If we erroneously attempted to cross a chain border,
|
|
|
|
// then we'll cancel the payment.
|
|
|
|
case *lnwire.FailInvalidRealm:
|
|
|
|
return true, channeldb.FailureReasonError
|
|
|
|
|
|
|
|
// If we get a notice that the expiry was too soon for
|
|
|
|
// an intermediate node, then we'll prune out the node
|
|
|
|
// that sent us this error, as it doesn't now what the
|
|
|
|
// correct block height is.
|
|
|
|
case *lnwire.FailExpiryTooSoon:
|
2019-06-26 12:25:23 +02:00
|
|
|
m.reportVertexFailure(result.timeReply, failureVertex)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// If we hit an instance of onion payload corruption or an invalid
|
|
|
|
// version, then we'll exit early as this shouldn't happen in the
|
|
|
|
// typical case.
|
|
|
|
//
|
|
|
|
// TODO(joostjager): Take into account that the previous hop may have
|
|
|
|
// tampered with the onion. Routing should continue using other paths.
|
|
|
|
case *lnwire.FailInvalidOnionVersion:
|
|
|
|
return true, channeldb.FailureReasonError
|
|
|
|
case *lnwire.FailInvalidOnionHmac:
|
|
|
|
return true, channeldb.FailureReasonError
|
|
|
|
case *lnwire.FailInvalidOnionKey:
|
|
|
|
return true, channeldb.FailureReasonError
|
|
|
|
|
|
|
|
// If we get a failure due to violating the minimum
|
|
|
|
// amount, we'll apply the new minimum amount and retry
|
|
|
|
// routing.
|
|
|
|
case *lnwire.FailAmountBelowMinimum:
|
2019-07-29 15:10:58 +02:00
|
|
|
m.reportPairPolicyFailure(result.timeReply, failedPair)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// If we get a failure due to a fee, we'll apply the
|
|
|
|
// new fee update, and retry our attempt using the
|
|
|
|
// newly updated fees.
|
|
|
|
case *lnwire.FailFeeInsufficient:
|
2019-07-29 15:10:58 +02:00
|
|
|
m.reportPairPolicyFailure(result.timeReply, failedPair)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// If we get the failure for an intermediate node that
|
|
|
|
// disagrees with our time lock values, then we'll
|
|
|
|
// apply the new delta value and try it once more.
|
|
|
|
case *lnwire.FailIncorrectCltvExpiry:
|
2019-07-29 15:10:58 +02:00
|
|
|
m.reportPairPolicyFailure(result.timeReply, failedPair)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// The outgoing channel that this node was meant to
|
|
|
|
// forward one is currently disabled, so we'll apply
|
|
|
|
// the update and continue.
|
|
|
|
case *lnwire.FailChannelDisabled:
|
2019-07-29 15:10:58 +02:00
|
|
|
m.reportPairFailure(result.timeReply, failedPair, 0)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// It's likely that the outgoing channel didn't have
|
2019-07-29 15:10:58 +02:00
|
|
|
// sufficient capacity, so we'll prune this pair for
|
2019-06-26 09:49:16 +02:00
|
|
|
// now, and continue onwards with our path finding.
|
|
|
|
case *lnwire.FailTemporaryChannelFailure:
|
2019-07-29 15:10:58 +02:00
|
|
|
m.reportPairFailure(result.timeReply, failedPair, failedAmt)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// If the send fail due to a node not having the
|
|
|
|
// required features, then we'll note this error and
|
|
|
|
// continue.
|
|
|
|
case *lnwire.FailRequiredNodeFeatureMissing:
|
2019-06-26 12:25:23 +02:00
|
|
|
m.reportVertexFailure(result.timeReply, failureVertex)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// If the send fail due to a node not having the
|
|
|
|
// required features, then we'll note this error and
|
|
|
|
// continue.
|
|
|
|
case *lnwire.FailRequiredChannelFeatureMissing:
|
2019-06-26 12:25:23 +02:00
|
|
|
m.reportVertexFailure(result.timeReply, failureVertex)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// If the next hop in the route wasn't known or
|
|
|
|
// offline, we'll only the channel which we attempted
|
|
|
|
// to route over. This is conservative, and it can
|
|
|
|
// handle faulty channels between nodes properly.
|
|
|
|
// Additionally, this guards against routing nodes
|
|
|
|
// returning errors in order to attempt to black list
|
|
|
|
// another node.
|
|
|
|
case *lnwire.FailUnknownNextPeer:
|
2019-07-29 15:10:58 +02:00
|
|
|
m.reportPairFailure(result.timeReply, failedPair, 0)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// If the node wasn't able to forward for which ever
|
|
|
|
// reason, then we'll note this and continue with the
|
|
|
|
// routes.
|
|
|
|
case *lnwire.FailTemporaryNodeFailure:
|
2019-06-26 12:25:23 +02:00
|
|
|
m.reportVertexFailure(result.timeReply, failureVertex)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
case *lnwire.FailPermanentNodeFailure:
|
2019-06-26 12:25:23 +02:00
|
|
|
m.reportVertexFailure(result.timeReply, failureVertex)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// If we crafted a route that contains a too long time
|
|
|
|
// lock for an intermediate node, we'll prune the node.
|
|
|
|
// As there currently is no way of knowing that node's
|
|
|
|
// maximum acceptable cltv, we cannot take this
|
|
|
|
// constraint into account during routing.
|
|
|
|
//
|
|
|
|
// TODO(joostjager): Record the rejected cltv and use
|
|
|
|
// that as a hint during future path finding through
|
|
|
|
// that node.
|
|
|
|
case *lnwire.FailExpiryTooFar:
|
2019-06-26 12:25:23 +02:00
|
|
|
m.reportVertexFailure(result.timeReply, failureVertex)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// If we get a permanent channel or node failure, then
|
|
|
|
// we'll prune the channel in both directions and
|
|
|
|
// continue with the rest of the routes.
|
|
|
|
case *lnwire.FailPermanentChannelFailure:
|
2019-07-29 15:10:58 +02:00
|
|
|
m.reportPairFailure(result.timeReply, failedPair, 0)
|
|
|
|
m.reportPairFailure(
|
|
|
|
result.timeReply, failedPair.Reverse(), 0,
|
|
|
|
)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
|
|
|
|
// Any other failure or an empty failure will get the node pruned.
|
|
|
|
default:
|
2019-06-26 12:25:23 +02:00
|
|
|
m.reportVertexFailure(result.timeReply, failureVertex)
|
2019-06-26 09:49:16 +02:00
|
|
|
return false, 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
// getFailedPair tries to locate the failing pair given a route and the pubkey
|
|
|
|
// of the node that sent the failure. It will assume that the failure is
|
|
|
|
// associated with the outgoing channel set of the failing node. As a second
|
|
|
|
// result, it returns the amount sent between the pair.
|
|
|
|
func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair,
|
2019-06-26 09:49:16 +02:00
|
|
|
lnwire.MilliSatoshi) {
|
|
|
|
|
|
|
|
// Determine if we have a failure from the final hop. If it is, we
|
|
|
|
// assume that the failing channel is the incoming channel.
|
|
|
|
//
|
|
|
|
// TODO(joostjager): In this case, certain types of failures are not
|
|
|
|
// expected. For example FailUnknownNextPeer. This could be a reason to
|
|
|
|
// prune the node?
|
|
|
|
if failureSource == len(route.Hops) {
|
|
|
|
failureSource--
|
|
|
|
}
|
|
|
|
|
|
|
|
// As this failure indicates that the target channel was unable to carry
|
|
|
|
// this HTLC (for w/e reason), we'll return the _outgoing_ channel that
|
|
|
|
// the source of the failure was meant to pass the HTLC along to.
|
|
|
|
if failureSource == 0 {
|
2019-07-29 15:10:58 +02:00
|
|
|
return NewDirectedNodePair(
|
|
|
|
route.SourcePubKey,
|
|
|
|
route.Hops[0].PubKeyBytes,
|
|
|
|
), route.TotalAmount
|
2019-06-26 09:49:16 +02:00
|
|
|
}
|
|
|
|
|
2019-07-29 15:10:58 +02:00
|
|
|
return NewDirectedNodePair(
|
|
|
|
route.Hops[failureSource-1].PubKeyBytes,
|
|
|
|
route.Hops[failureSource].PubKeyBytes,
|
|
|
|
), route.Hops[failureSource-1].AmtToForward
|
2019-06-26 09:49:16 +02:00
|
|
|
}
|