package routing import ( "fmt" "math" "time" "github.com/btcsuite/btcd/btcutil" "github.com/go-errors/errors" "github.com/lightningnetwork/lnd/lnwire" "github.com/lightningnetwork/lnd/routing/route" ) const ( // DefaultBimodalScaleMsat is the default value for BimodalScaleMsat in // BimodalConfig. It describes the distribution of funds in the LN based // on empirical findings. We assume an unbalanced network by default. DefaultBimodalScaleMsat = lnwire.MilliSatoshi(300_000_000) // DefaultBimodalNodeWeight is the default value for the // BimodalNodeWeight in BimodalConfig. It is chosen such that past // forwardings on other channels of a router are only slightly taken // into account. DefaultBimodalNodeWeight = 0.2 // DefaultBimodalDecayTime is the default value for BimodalDecayTime. // We will forget about previous learnings about channel liquidity on // the timescale of about a week. DefaultBimodalDecayTime = 7 * 24 * time.Hour // BimodalScaleMsatMax is the maximum value for BimodalScaleMsat. We // limit it here to the fakeHopHintCapacity to avoid issues with hop // hint probability calculations. BimodalScaleMsatMax = lnwire.MilliSatoshi( 1000 * fakeHopHintCapacity / 4, ) // BimodalEstimatorName is used to identify the bimodal estimator. BimodalEstimatorName = "bimodal" ) var ( // ErrInvalidScale is returned when we get a scale below or equal zero. ErrInvalidScale = errors.New("scale must be >= 0 and sane") // ErrInvalidNodeWeight is returned when we get a node weight that is // out of range. ErrInvalidNodeWeight = errors.New("node weight must be in [0, 1]") // ErrInvalidDecayTime is returned when we get a decay time below zero. ErrInvalidDecayTime = errors.New("decay time must be larger than zero") // ErrZeroCapacity is returned when we encounter a channel with zero // capacity in probability estimation. ErrZeroCapacity = errors.New("capacity must be larger than zero") ) // BimodalConfig contains configuration for our probability estimator. type BimodalConfig struct { // BimodalNodeWeight defines how strongly other previous forwardings on // channels of a router should be taken into account when computing a // channel's probability to route. The allowed values are in the range // [0, 1], where a value of 0 means that only direct information about a // channel is taken into account. BimodalNodeWeight float64 // BimodalScaleMsat describes the scale over which channels // statistically have some liquidity left. The value determines how // quickly the bimodal distribution drops off from the edges of a // channel. A larger value (compared to typical channel capacities) // means that the drop off is slow and that channel balances are // distributed more uniformly. A small value leads to the assumption of // very unbalanced channels. BimodalScaleMsat lnwire.MilliSatoshi // BimodalDecayTime is the scale for the exponential information decay // over time for previous successes or failures. BimodalDecayTime time.Duration } // validate checks the configuration of the estimator for allowed values. func (p BimodalConfig) validate() error { if p.BimodalDecayTime <= 0 { return fmt.Errorf("%v: %w", BimodalEstimatorName, ErrInvalidDecayTime) } if p.BimodalNodeWeight < 0 || p.BimodalNodeWeight > 1 { return fmt.Errorf("%v: %w", BimodalEstimatorName, ErrInvalidNodeWeight) } if p.BimodalScaleMsat == 0 || p.BimodalScaleMsat > BimodalScaleMsatMax { return fmt.Errorf("%v: %w", BimodalEstimatorName, ErrInvalidScale) } return nil } // DefaultBimodalConfig returns the default configuration for the estimator. func DefaultBimodalConfig() BimodalConfig { return BimodalConfig{ BimodalNodeWeight: DefaultBimodalNodeWeight, BimodalScaleMsat: DefaultBimodalScaleMsat, BimodalDecayTime: DefaultBimodalDecayTime, } } // BimodalEstimator returns node and pair probabilities based on historical // payment results based on a liquidity distribution model of the LN. The main // function is to estimate the direct channel probability based on a depleted // liquidity distribution model, with additional information decay over time. A // per-node probability can be mixed with the direct probability, taking into // account successes/failures on other channels of the forwarder. type BimodalEstimator struct { // BimodalConfig contains configuration options for our estimator. BimodalConfig } // NewBimodalEstimator creates a new BimodalEstimator. func NewBimodalEstimator(cfg BimodalConfig) (*BimodalEstimator, error) { if err := cfg.validate(); err != nil { return nil, err } return &BimodalEstimator{ BimodalConfig: cfg, }, nil } // Compile-time checks that interfaces are implemented. var _ Estimator = (*BimodalEstimator)(nil) var _ estimatorConfig = (*BimodalConfig)(nil) // config returns the current configuration of the estimator. func (p *BimodalEstimator) Config() estimatorConfig { return p.BimodalConfig } // String returns the estimator's configuration as a string representation. func (p *BimodalEstimator) String() string { return fmt.Sprintf("estimator type: %v, decay time: %v, liquidity "+ "scale: %v, node weight: %v", BimodalEstimatorName, p.BimodalDecayTime, p.BimodalScaleMsat, p.BimodalNodeWeight) } // PairProbability estimates the probability of successfully traversing to // toNode based on historical payment outcomes for the from node. Those outcomes // are passed in via the results parameter. func (p *BimodalEstimator) PairProbability(now time.Time, results NodeResults, toNode route.Vertex, amt lnwire.MilliSatoshi, capacity btcutil.Amount) float64 { // We first compute the probability for the desired hop taking into // account previous knowledge. directProbability := p.directProbability( now, results, toNode, amt, lnwire.NewMSatFromSatoshis(capacity), ) // The final probability is computed by taking into account other // channels of the from node. return p.calculateProbability(directProbability, now, results, toNode) } // LocalPairProbability computes the probability to reach toNode given a set of // previous learnings. func (p *BimodalEstimator) LocalPairProbability(now time.Time, results NodeResults, toNode route.Vertex) float64 { // For direct local probabilities we assume to know exactly how much we // can send over a channel, which assumes that channels are active and // have enough liquidity. directProbability := 1.0 // If we had an unexpected failure for this node, we reduce the // probability for some time to avoid infinite retries. result, ok := results[toNode] if ok && !result.FailTime.IsZero() { timeAgo := now.Sub(result.FailTime) // We only expect results in the past to get a probability // between 0 and 1. if timeAgo < 0 { timeAgo = 0 } exponent := -float64(timeAgo) / float64(p.BimodalDecayTime) directProbability -= math.Exp(exponent) } return directProbability } // directProbability computes the probability to reach a node based on the // liquidity distribution in the LN. func (p *BimodalEstimator) directProbability(now time.Time, results NodeResults, toNode route.Vertex, amt lnwire.MilliSatoshi, capacity lnwire.MilliSatoshi) float64 { // We first determine the time-adjusted success and failure amounts to // then compute a probability. We know that we can send a zero amount. successAmount := lnwire.MilliSatoshi(0) // We know that we cannot send the full capacity. failAmount := capacity // If we have information about past successes or failures, we modify // them with a time decay. result, ok := results[toNode] if ok { // Apply a time decay for the amount we cannot send. if !result.FailTime.IsZero() { failAmount = cannotSend( result.FailAmt, capacity, now, result.FailTime, p.BimodalDecayTime, ) } // Apply a time decay for the amount we can send. if !result.SuccessTime.IsZero() { successAmount = canSend( result.SuccessAmt, now, result.SuccessTime, p.BimodalDecayTime, ) } } // Compute the direct channel probability. probability, err := p.probabilityFormula( capacity, successAmount, failAmount, amt, ) if err != nil { log.Errorf("error computing probability to node: %v "+ "(node: %v, results: %v, amt: %v, capacity: %v)", err, toNode, results, amt, capacity) return 0.0 } return probability } // calculateProbability computes the total hop probability combining the channel // probability and historic forwarding data of other channels of the node we try // to send from. // // Goals: // * We want to incentivize good routing nodes: the more routable channels a // node has, the more we want to incentivize (vice versa for failures). // -> We reduce/increase the direct probability depending on past // failures/successes for other channels of the node. // // * We want to be forgiving/give other nodes a chance as well: we want to // forget about (non-)routable channels over time. // -> We weight the successes/failures with a time decay such that they will not // influence the total probability if a long time went by. // // * If we don't have other info, we want to solely rely on the direct // probability. // // * We want to be able to specify how important the other channels are compared // to the direct channel. // -> Introduce a node weight factor that weights the direct probability against // the node-wide average. The larger the node weight, the more important other // channels of the node are. // // How do failures on low fee nodes redirect routing to higher fee nodes? // Assumptions: // * attemptCostPPM of 1000 PPM // * constant direct channel probability of P0 (usually 0.5 for large amounts) // * node weight w of 0.2 // // The question we want to answer is: // How often would a zero-fee node be tried (even if there were failures for its // other channels) over trying a high-fee node with 2000 PPM and no direct // knowledge about the channel to send over? // // The probability of a route of length l is P(l) = l * P0. // // The total probability after n failures (with the implemented method here) is: // P(l, n) = P(l-1) * P(n) // = P(l-1) * (P0 + n*0) / (1 + n*w) // = P(l) / (1 + n*w) // // Condition for a high-fee channel to overcome a low fee channel in the // Dijkstra weight function (only looking at fee and probability PPM terms): // highFeePPM + attemptCostPPM * 1/P(l) = 0PPM + attemptCostPPM * 1/P(l, n) // highFeePPM/attemptCostPPM = 1/P(l, n) - 1/P(l) = // = (1 + n*w)/P(l) - 1/P(l) = // = n*w/P(l) // // Therefore: // n = (highFeePPM/attemptCostPPM) * (P(l)/w) = // = (2000/1000) * 0.5 * l / w = l/w // // For a one-hop route we get: // n = 1/0.2 = 5 tolerated failures // // For a three-hop route we get: // n = 3/0.2 = 15 tolerated failures // // For more details on the behavior see tests. func (p *BimodalEstimator) calculateProbability(directProbability float64, now time.Time, results NodeResults, toNode route.Vertex) float64 { // If we don't take other channels into account, we can return early. if p.BimodalNodeWeight == 0.0 { return directProbability } // If we have up-to-date information about the channel we want to use, // i.e. the info stems from results not longer ago than the decay time, // we will only use the direct probability. This is needed in order to // avoid that other previous results (on all other channels of the same // routing node) will distort and pin the calculated probability even if // we have accurate direct information. This helps to dip the // probability below the min probability in case of failures, to start // the splitting process. directResult, ok := results[toNode] if ok { latest := directResult.SuccessTime if directResult.FailTime.After(latest) { latest = directResult.FailTime } // We use BimonodalDecayTime to judge the currentness of the // data. It is the time scale on which we assume to have lost // information. if now.Sub(latest) < p.BimodalDecayTime { log.Tracef("Using direct probability for node %v: %v", toNode, directResult) return directProbability } } // w is a parameter which determines how strongly the other channels of // a node should be incorporated, the higher the stronger. w := p.BimodalNodeWeight // dt determines the timeliness of the previous successes/failures // to be taken into account. dt := float64(p.BimodalDecayTime) // The direct channel probability is weighted fully, all other results // are weighted according to how recent the information is. totalProbabilities := directProbability totalWeights := 1.0 for peer, result := range results { // We don't include the direct hop probability here because it // is already included in totalProbabilities. if peer == toNode { continue } // We add probabilities weighted by how recent the info is. var weight float64 if result.SuccessAmt > 0 { exponent := -float64(now.Sub(result.SuccessTime)) / dt weight = math.Exp(exponent) totalProbabilities += w * weight totalWeights += w * weight } if result.FailAmt > 0 { exponent := -float64(now.Sub(result.FailTime)) / dt weight = math.Exp(exponent) // Failures don't add to total success probability. totalWeights += w * weight } } return totalProbabilities / totalWeights } // canSend returns the sendable amount over the channel, respecting time decay. // canSend approaches zero, if we wait for a much longer time than the decay // time. func canSend(successAmount lnwire.MilliSatoshi, now, successTime time.Time, decayConstant time.Duration) lnwire.MilliSatoshi { // The factor approaches 0 for successTime a long time in the past, // is 1 when the successTime is now. factor := math.Exp( -float64(now.Sub(successTime)) / float64(decayConstant), ) canSend := factor * float64(successAmount) return lnwire.MilliSatoshi(canSend) } // cannotSend returns the not sendable amount over the channel, respecting time // decay. cannotSend approaches the capacity, if we wait for a much longer time // than the decay time. func cannotSend(failAmount, capacity lnwire.MilliSatoshi, now, failTime time.Time, decayConstant time.Duration) lnwire.MilliSatoshi { if failAmount > capacity { failAmount = capacity } // The factor approaches 0 for failTime a long time in the past and it // is 1 when the failTime is now. factor := math.Exp( -float64(now.Sub(failTime)) / float64(decayConstant), ) cannotSend := capacity - lnwire.MilliSatoshi( factor*float64(capacity-failAmount), ) return cannotSend } // primitive computes the indefinite integral of our assumed (normalized) // liquidity probability distribution. The distribution of liquidity x here is // the function P(x) ~ exp(-x/s) + exp((x-c)/s), i.e., two exponentials residing // at the ends of channels. This means that we expect liquidity to be at either // side of the channel with capacity c. The s parameter (scale) defines how far // the liquidity leaks into the channel. A very low scale assumes completely // unbalanced channels, a very high scale assumes a random distribution. More // details can be found in // https://github.com/lightningnetwork/lnd/issues/5988#issuecomment-1131234858. func (p *BimodalEstimator) primitive(c, x float64) float64 { s := float64(p.BimodalScaleMsat) // The indefinite integral of P(x) is given by // Int P(x) dx = H(x) = s * (-e(-x/s) + e((x-c)/s)), // and its norm from 0 to c can be computed from it, // norm = [H(x)]_0^c = s * (-e(-c/s) + 1 -(1 + e(-c/s))). ecs := math.Exp(-c / s) exs := math.Exp(-x / s) // It would be possible to split the next term and reuse the factors // from before, but this can lead to numerical issues with large // numbers. excs := math.Exp((x - c) / s) // norm can only become zero, if c is zero, which we sorted out before // calling this method. norm := -2*ecs + 2 // We end up with the primitive function of the normalized P(x). return (-exs + excs) / norm } // integral computes the integral of our liquidity distribution from the lower // to the upper value. func (p *BimodalEstimator) integral(capacity, lower, upper float64) float64 { if lower < 0 || lower > upper { log.Errorf("probability integral limits nonsensical: capacity:"+ "%v lower: %v upper: %v", capacity, lower, upper) return 0.0 } return p.primitive(capacity, upper) - p.primitive(capacity, lower) } // probabilityFormula computes the expected probability for a payment of // amountMsat given prior learnings for a channel of certain capacity. // successAmountMsat and failAmountMsat stand for the unsettled success and // failure amounts, respectively. The formula is derived using the formalism // presented in Pickhardt et al., https://arxiv.org/abs/2103.08576. func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat, failAmountMsat, amountMsat lnwire.MilliSatoshi) (float64, error) { // Convert to positive-valued floats. capacity := float64(capacityMsat) successAmount := float64(successAmountMsat) failAmount := float64(failAmountMsat) amount := float64(amountMsat) // In order for this formula to give reasonable results, we need to have // an estimate of the capacity of a channel (or edge between nodes). if capacity == 0.0 { return 0, ErrZeroCapacity } // We cannot send more than the capacity. if amount > capacity { return 0.0, nil } // Mission control may have some outdated values, we correct them here. // TODO(bitromortac): there may be better decisions to make in these // cases, e.g., resetting failAmount=cap and successAmount=0. // failAmount should be capacity at max. if failAmount > capacity { log.Debugf("Correcting failAmount %v to capacity %v", failAmount, capacity) failAmount = capacity } // successAmount should be capacity at max. if successAmount > capacity { log.Debugf("Correcting successAmount %v to capacity %v", successAmount, capacity) successAmount = capacity } // The next statement is a safety check against an illogical condition, // otherwise the renormalization integral would become zero. This may // happen if a large channel gets closed and smaller ones remain, but // it should recover with the time decay. if failAmount <= successAmount { log.Tracef("fail amount (%v) is smaller than or equal the "+ "success amount (%v) for capacity (%v)", failAmountMsat, successAmountMsat, capacityMsat) return 0.0, nil } // We cannot send more than the fail amount. if amount >= failAmount { return 0.0, nil } // The success probability for payment amount a is the integral over the // prior distribution P(x), the probability to find liquidity between // the amount a and channel capacity c (or failAmount a_f): // P(X >= a | X < a_f) = Integral_{a}^{a_f} P(x) dx prob := p.integral(capacity, amount, failAmount) if math.IsNaN(prob) { return 0.0, fmt.Errorf("non-normalized probability is NaN, "+ "capacity: %v, amount: %v, fail amount: %v", capacity, amount, failAmount) } // If we have payment information, we need to adjust the prior // distribution P(x) and get the posterior distribution by renormalizing // the prior distribution in such a way that the probability mass lies // between a_s and a_f. reNorm := p.integral(capacity, successAmount, failAmount) if math.IsNaN(reNorm) { return 0.0, fmt.Errorf("normalization factor is NaN, "+ "capacity: %v, success amount: %v, fail amount: %v", capacity, successAmount, failAmount) } // The normalization factor can only be zero if the success amount is // equal or larger than the fail amount. This should not happen as we // have checked this scenario above. if reNorm == 0.0 { return 0.0, fmt.Errorf("normalization factor is zero, "+ "capacity: %v, success amount: %v, fail amount: %v", capacity, successAmount, failAmount) } prob /= reNorm // Note that for payment amounts smaller than successAmount, we can get // a value larger than unity, which we cap here to get a proper // probability. if prob > 1.0 { if amount > successAmount { return 0.0, fmt.Errorf("unexpected large probability "+ "(%v) capacity: %v, amount: %v, success "+ "amount: %v, fail amount: %v", prob, capacity, amount, successAmount, failAmount) } return 1.0, nil } else if prob < 0.0 { return 0.0, fmt.Errorf("negative probability "+ "(%v) capacity: %v, amount: %v, success "+ "amount: %v, fail amount: %v", prob, capacity, amount, successAmount, failAmount) } return prob, nil }