lnd/routing/probability_bimodal.go

578 lines
20 KiB
Go
Raw Normal View History

package routing
import (
"fmt"
"math"
"time"
"github.com/btcsuite/btcd/btcutil"
"github.com/go-errors/errors"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/routing/route"
)
const (
// DefaultBimodalScaleMsat is the default value for BimodalScaleMsat in
// BimodalConfig. It describes the distribution of funds in the LN based
// on empirical findings. We assume an unbalanced network by default.
DefaultBimodalScaleMsat = lnwire.MilliSatoshi(300_000_000)
// DefaultBimodalNodeWeight is the default value for the
// BimodalNodeWeight in BimodalConfig. It is chosen such that past
// forwardings on other channels of a router are only slightly taken
// into account.
DefaultBimodalNodeWeight = 0.2
// DefaultBimodalDecayTime is the default value for BimodalDecayTime.
// We will forget about previous learnings about channel liquidity on
// the timescale of about a week.
DefaultBimodalDecayTime = 7 * 24 * time.Hour
// BimodalScaleMsatMax is the maximum value for BimodalScaleMsat. We
// limit it here to the fakeHopHintCapacity to avoid issues with hop
// hint probability calculations.
BimodalScaleMsatMax = lnwire.MilliSatoshi(
1000 * fakeHopHintCapacity / 4,
)
// BimodalEstimatorName is used to identify the bimodal estimator.
BimodalEstimatorName = "bimodal"
)
var (
// ErrInvalidScale is returned when we get a scale below or equal zero.
ErrInvalidScale = errors.New("scale must be >= 0 and sane")
// ErrInvalidNodeWeight is returned when we get a node weight that is
// out of range.
ErrInvalidNodeWeight = errors.New("node weight must be in [0, 1]")
// ErrInvalidDecayTime is returned when we get a decay time below zero.
ErrInvalidDecayTime = errors.New("decay time must be larger than zero")
// ErrZeroCapacity is returned when we encounter a channel with zero
// capacity in probability estimation.
ErrZeroCapacity = errors.New("capacity must be larger than zero")
)
// BimodalConfig contains configuration for our probability estimator.
type BimodalConfig struct {
// BimodalNodeWeight defines how strongly other previous forwardings on
// channels of a router should be taken into account when computing a
// channel's probability to route. The allowed values are in the range
// [0, 1], where a value of 0 means that only direct information about a
// channel is taken into account.
BimodalNodeWeight float64
// BimodalScaleMsat describes the scale over which channels
// statistically have some liquidity left. The value determines how
// quickly the bimodal distribution drops off from the edges of a
// channel. A larger value (compared to typical channel capacities)
// means that the drop off is slow and that channel balances are
// distributed more uniformly. A small value leads to the assumption of
// very unbalanced channels.
BimodalScaleMsat lnwire.MilliSatoshi
// BimodalDecayTime is the scale for the exponential information decay
// over time for previous successes or failures.
BimodalDecayTime time.Duration
}
// validate checks the configuration of the estimator for allowed values.
func (p BimodalConfig) validate() error {
if p.BimodalDecayTime <= 0 {
return fmt.Errorf("%v: %w", BimodalEstimatorName,
ErrInvalidDecayTime)
}
if p.BimodalNodeWeight < 0 || p.BimodalNodeWeight > 1 {
return fmt.Errorf("%v: %w", BimodalEstimatorName,
ErrInvalidNodeWeight)
}
if p.BimodalScaleMsat == 0 || p.BimodalScaleMsat > BimodalScaleMsatMax {
return fmt.Errorf("%v: %w", BimodalEstimatorName,
ErrInvalidScale)
}
return nil
}
// DefaultBimodalConfig returns the default configuration for the estimator.
func DefaultBimodalConfig() BimodalConfig {
return BimodalConfig{
BimodalNodeWeight: DefaultBimodalNodeWeight,
BimodalScaleMsat: DefaultBimodalScaleMsat,
BimodalDecayTime: DefaultBimodalDecayTime,
}
}
// BimodalEstimator returns node and pair probabilities based on historical
// payment results based on a liquidity distribution model of the LN. The main
// function is to estimate the direct channel probability based on a depleted
// liquidity distribution model, with additional information decay over time. A
// per-node probability can be mixed with the direct probability, taking into
// account successes/failures on other channels of the forwarder.
type BimodalEstimator struct {
// BimodalConfig contains configuration options for our estimator.
BimodalConfig
}
// NewBimodalEstimator creates a new BimodalEstimator.
func NewBimodalEstimator(cfg BimodalConfig) (*BimodalEstimator, error) {
if err := cfg.validate(); err != nil {
return nil, err
}
return &BimodalEstimator{
BimodalConfig: cfg,
}, nil
}
// Compile-time checks that interfaces are implemented.
var _ Estimator = (*BimodalEstimator)(nil)
var _ estimatorConfig = (*BimodalConfig)(nil)
// config returns the current configuration of the estimator.
func (p *BimodalEstimator) Config() estimatorConfig {
return p.BimodalConfig
}
// String returns the estimator's configuration as a string representation.
func (p *BimodalEstimator) String() string {
return fmt.Sprintf("estimator type: %v, decay time: %v, liquidity "+
"scale: %v, node weight: %v", BimodalEstimatorName,
p.BimodalDecayTime, p.BimodalScaleMsat, p.BimodalNodeWeight)
}
// PairProbability estimates the probability of successfully traversing to
// toNode based on historical payment outcomes for the from node. Those outcomes
// are passed in via the results parameter.
func (p *BimodalEstimator) PairProbability(now time.Time,
results NodeResults, toNode route.Vertex, amt lnwire.MilliSatoshi,
capacity btcutil.Amount) float64 {
// We first compute the probability for the desired hop taking into
// account previous knowledge.
directProbability := p.directProbability(
now, results, toNode, amt, lnwire.NewMSatFromSatoshis(capacity),
)
// The final probability is computed by taking into account other
// channels of the from node.
return p.calculateProbability(directProbability, now, results, toNode)
}
// LocalPairProbability computes the probability to reach toNode given a set of
// previous learnings.
func (p *BimodalEstimator) LocalPairProbability(now time.Time,
results NodeResults, toNode route.Vertex) float64 {
// For direct local probabilities we assume to know exactly how much we
// can send over a channel, which assumes that channels are active and
// have enough liquidity.
directProbability := 1.0
// If we had an unexpected failure for this node, we reduce the
// probability for some time to avoid infinite retries.
result, ok := results[toNode]
if ok && !result.FailTime.IsZero() {
timeAgo := now.Sub(result.FailTime)
// We only expect results in the past to get a probability
// between 0 and 1.
if timeAgo < 0 {
timeAgo = 0
}
exponent := -float64(timeAgo) / float64(p.BimodalDecayTime)
directProbability -= math.Exp(exponent)
}
return directProbability
}
// directProbability computes the probability to reach a node based on the
// liquidity distribution in the LN.
func (p *BimodalEstimator) directProbability(now time.Time,
results NodeResults, toNode route.Vertex, amt lnwire.MilliSatoshi,
capacity lnwire.MilliSatoshi) float64 {
// We first determine the time-adjusted success and failure amounts to
// then compute a probability. We know that we can send a zero amount.
successAmount := lnwire.MilliSatoshi(0)
// We know that we cannot send the full capacity.
failAmount := capacity
// If we have information about past successes or failures, we modify
// them with a time decay.
result, ok := results[toNode]
if ok {
// Apply a time decay for the amount we cannot send.
if !result.FailTime.IsZero() {
failAmount = cannotSend(
result.FailAmt, capacity, now, result.FailTime,
p.BimodalDecayTime,
)
}
// Apply a time decay for the amount we can send.
if !result.SuccessTime.IsZero() {
successAmount = canSend(
result.SuccessAmt, now, result.SuccessTime,
p.BimodalDecayTime,
)
}
}
// Compute the direct channel probability.
probability, err := p.probabilityFormula(
capacity, successAmount, failAmount, amt,
)
if err != nil {
log.Errorf("error computing probability to node: %v "+
"(node: %v, results: %v, amt: %v, capacity: %v)",
err, toNode, results, amt, capacity)
return 0.0
}
return probability
}
// calculateProbability computes the total hop probability combining the channel
// probability and historic forwarding data of other channels of the node we try
// to send from.
//
// Goals:
// * We want to incentivize good routing nodes: the more routable channels a
// node has, the more we want to incentivize (vice versa for failures).
// -> We reduce/increase the direct probability depending on past
// failures/successes for other channels of the node.
//
// * We want to be forgiving/give other nodes a chance as well: we want to
// forget about (non-)routable channels over time.
// -> We weight the successes/failures with a time decay such that they will not
// influence the total probability if a long time went by.
//
// * If we don't have other info, we want to solely rely on the direct
// probability.
//
// * We want to be able to specify how important the other channels are compared
// to the direct channel.
// -> Introduce a node weight factor that weights the direct probability against
// the node-wide average. The larger the node weight, the more important other
// channels of the node are.
//
// How do failures on low fee nodes redirect routing to higher fee nodes?
// Assumptions:
// * attemptCostPPM of 1000 PPM
// * constant direct channel probability of P0 (usually 0.5 for large amounts)
// * node weight w of 0.2
//
// The question we want to answer is:
// How often would a zero-fee node be tried (even if there were failures for its
// other channels) over trying a high-fee node with 2000 PPM and no direct
// knowledge about the channel to send over?
//
// The probability of a route of length l is P(l) = l * P0.
//
// The total probability after n failures (with the implemented method here) is:
// P(l, n) = P(l-1) * P(n)
// = P(l-1) * (P0 + n*0) / (1 + n*w)
// = P(l) / (1 + n*w)
//
// Condition for a high-fee channel to overcome a low fee channel in the
// Dijkstra weight function (only looking at fee and probability PPM terms):
// highFeePPM + attemptCostPPM * 1/P(l) = 0PPM + attemptCostPPM * 1/P(l, n)
// highFeePPM/attemptCostPPM = 1/P(l, n) - 1/P(l) =
// = (1 + n*w)/P(l) - 1/P(l) =
// = n*w/P(l)
//
// Therefore:
// n = (highFeePPM/attemptCostPPM) * (P(l)/w) =
// = (2000/1000) * 0.5 * l / w = l/w
//
// For a one-hop route we get:
// n = 1/0.2 = 5 tolerated failures
//
// For a three-hop route we get:
// n = 3/0.2 = 15 tolerated failures
//
// For more details on the behavior see tests.
func (p *BimodalEstimator) calculateProbability(directProbability float64,
now time.Time, results NodeResults, toNode route.Vertex) float64 {
// If we don't take other channels into account, we can return early.
if p.BimodalNodeWeight == 0.0 {
return directProbability
}
// If we have up-to-date information about the channel we want to use,
// i.e. the info stems from results not longer ago than the decay time,
// we will only use the direct probability. This is needed in order to
// avoid that other previous results (on all other channels of the same
// routing node) will distort and pin the calculated probability even if
// we have accurate direct information. This helps to dip the
// probability below the min probability in case of failures, to start
// the splitting process.
directResult, ok := results[toNode]
if ok {
latest := directResult.SuccessTime
if directResult.FailTime.After(latest) {
latest = directResult.FailTime
}
// We use BimonodalDecayTime to judge the currentness of the
// data. It is the time scale on which we assume to have lost
// information.
if now.Sub(latest) < p.BimodalDecayTime {
log.Tracef("Using direct probability for node %v: %v",
toNode, directResult)
return directProbability
}
}
// w is a parameter which determines how strongly the other channels of
// a node should be incorporated, the higher the stronger.
w := p.BimodalNodeWeight
// dt determines the timeliness of the previous successes/failures
// to be taken into account.
dt := float64(p.BimodalDecayTime)
// The direct channel probability is weighted fully, all other results
// are weighted according to how recent the information is.
totalProbabilities := directProbability
totalWeights := 1.0
for peer, result := range results {
// We don't include the direct hop probability here because it
// is already included in totalProbabilities.
if peer == toNode {
continue
}
// We add probabilities weighted by how recent the info is.
var weight float64
if result.SuccessAmt > 0 {
exponent := -float64(now.Sub(result.SuccessTime)) / dt
weight = math.Exp(exponent)
totalProbabilities += w * weight
totalWeights += w * weight
}
if result.FailAmt > 0 {
exponent := -float64(now.Sub(result.FailTime)) / dt
weight = math.Exp(exponent)
// Failures don't add to total success probability.
totalWeights += w * weight
}
}
return totalProbabilities / totalWeights
}
// canSend returns the sendable amount over the channel, respecting time decay.
// canSend approaches zero, if we wait for a much longer time than the decay
// time.
func canSend(successAmount lnwire.MilliSatoshi, now, successTime time.Time,
decayConstant time.Duration) lnwire.MilliSatoshi {
// The factor approaches 0 for successTime a long time in the past,
// is 1 when the successTime is now.
factor := math.Exp(
-float64(now.Sub(successTime)) / float64(decayConstant),
)
canSend := factor * float64(successAmount)
return lnwire.MilliSatoshi(canSend)
}
// cannotSend returns the not sendable amount over the channel, respecting time
// decay. cannotSend approaches the capacity, if we wait for a much longer time
// than the decay time.
func cannotSend(failAmount, capacity lnwire.MilliSatoshi, now,
failTime time.Time, decayConstant time.Duration) lnwire.MilliSatoshi {
if failAmount > capacity {
failAmount = capacity
}
// The factor approaches 0 for failTime a long time in the past and it
// is 1 when the failTime is now.
factor := math.Exp(
-float64(now.Sub(failTime)) / float64(decayConstant),
)
cannotSend := capacity - lnwire.MilliSatoshi(
factor*float64(capacity-failAmount),
)
return cannotSend
}
// primitive computes the indefinite integral of our assumed (normalized)
// liquidity probability distribution. The distribution of liquidity x here is
// the function P(x) ~ exp(-x/s) + exp((x-c)/s), i.e., two exponentials residing
// at the ends of channels. This means that we expect liquidity to be at either
// side of the channel with capacity c. The s parameter (scale) defines how far
// the liquidity leaks into the channel. A very low scale assumes completely
// unbalanced channels, a very high scale assumes a random distribution. More
// details can be found in
// https://github.com/lightningnetwork/lnd/issues/5988#issuecomment-1131234858.
func (p *BimodalEstimator) primitive(c, x float64) float64 {
s := float64(p.BimodalScaleMsat)
// The indefinite integral of P(x) is given by
// Int P(x) dx = H(x) = s * (-e(-x/s) + e((x-c)/s)),
// and its norm from 0 to c can be computed from it,
// norm = [H(x)]_0^c = s * (-e(-c/s) + 1 -(1 + e(-c/s))).
ecs := math.Exp(-c / s)
exs := math.Exp(-x / s)
// It would be possible to split the next term and reuse the factors
// from before, but this can lead to numerical issues with large
// numbers.
excs := math.Exp((x - c) / s)
// norm can only become zero, if c is zero, which we sorted out before
// calling this method.
norm := -2*ecs + 2
// We end up with the primitive function of the normalized P(x).
return (-exs + excs) / norm
}
// integral computes the integral of our liquidity distribution from the lower
// to the upper value.
func (p *BimodalEstimator) integral(capacity, lower, upper float64) float64 {
if lower < 0 || lower > upper {
log.Errorf("probability integral limits nonsensical: capacity:"+
"%v lower: %v upper: %v", capacity, lower, upper)
return 0.0
}
return p.primitive(capacity, upper) - p.primitive(capacity, lower)
}
// probabilityFormula computes the expected probability for a payment of
// amountMsat given prior learnings for a channel of certain capacity.
// successAmountMsat and failAmountMsat stand for the unsettled success and
// failure amounts, respectively. The formula is derived using the formalism
// presented in Pickhardt et al., https://arxiv.org/abs/2103.08576.
func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,
failAmountMsat, amountMsat lnwire.MilliSatoshi) (float64, error) {
// Convert to positive-valued floats.
capacity := float64(capacityMsat)
successAmount := float64(successAmountMsat)
failAmount := float64(failAmountMsat)
amount := float64(amountMsat)
// In order for this formula to give reasonable results, we need to have
// an estimate of the capacity of a channel (or edge between nodes).
if capacity == 0.0 {
return 0, ErrZeroCapacity
}
// We cannot send more than the capacity.
if amount > capacity {
return 0.0, nil
}
// Mission control may have some outdated values, we correct them here.
// TODO(bitromortac): there may be better decisions to make in these
// cases, e.g., resetting failAmount=cap and successAmount=0.
// failAmount should be capacity at max.
if failAmount > capacity {
2023-03-16 10:03:50 +01:00
log.Debugf("Correcting failAmount %v to capacity %v",
failAmount, capacity)
failAmount = capacity
}
// successAmount should be capacity at max.
if successAmount > capacity {
2023-03-16 10:03:50 +01:00
log.Debugf("Correcting successAmount %v to capacity %v",
successAmount, capacity)
successAmount = capacity
}
// The next statement is a safety check against an illogical condition,
// otherwise the renormalization integral would become zero. This may
// happen if a large channel gets closed and smaller ones remain, but
// it should recover with the time decay.
if failAmount <= successAmount {
2023-03-16 10:03:50 +01:00
log.Tracef("fail amount (%v) is smaller than or equal the "+
"success amount (%v) for capacity (%v)",
failAmountMsat, successAmountMsat, capacityMsat)
return 0.0, nil
}
// We cannot send more than the fail amount.
if amount >= failAmount {
return 0.0, nil
}
// The success probability for payment amount a is the integral over the
// prior distribution P(x), the probability to find liquidity between
// the amount a and channel capacity c (or failAmount a_f):
// P(X >= a | X < a_f) = Integral_{a}^{a_f} P(x) dx
prob := p.integral(capacity, amount, failAmount)
if math.IsNaN(prob) {
return 0.0, fmt.Errorf("non-normalized probability is NaN, "+
"capacity: %v, amount: %v, fail amount: %v",
capacity, amount, failAmount)
}
// If we have payment information, we need to adjust the prior
// distribution P(x) and get the posterior distribution by renormalizing
// the prior distribution in such a way that the probability mass lies
// between a_s and a_f.
reNorm := p.integral(capacity, successAmount, failAmount)
if math.IsNaN(reNorm) {
return 0.0, fmt.Errorf("normalization factor is NaN, "+
"capacity: %v, success amount: %v, fail amount: %v",
capacity, successAmount, failAmount)
}
// The normalization factor can only be zero if the success amount is
// equal or larger than the fail amount. This should not happen as we
// have checked this scenario above.
if reNorm == 0.0 {
return 0.0, fmt.Errorf("normalization factor is zero, "+
"capacity: %v, success amount: %v, fail amount: %v",
capacity, successAmount, failAmount)
}
prob /= reNorm
// Note that for payment amounts smaller than successAmount, we can get
// a value larger than unity, which we cap here to get a proper
// probability.
if prob > 1.0 {
if amount > successAmount {
return 0.0, fmt.Errorf("unexpected large probability "+
"(%v) capacity: %v, amount: %v, success "+
"amount: %v, fail amount: %v", prob, capacity,
amount, successAmount, failAmount)
}
return 1.0, nil
} else if prob < 0.0 {
return 0.0, fmt.Errorf("negative probability "+
"(%v) capacity: %v, amount: %v, success "+
"amount: %v, fail amount: %v", prob, capacity,
amount, successAmount, failAmount)
}
return prob, nil
}