lnd/chanfitness/chanevent.go
carla 744876003d chanfitness: Add channel event log structure
This commit adds a chanfitness package which will be used to track
channel health and performance metrics. It adds a channel event
structure which will be used to track channel opens/closes and peer
uptime.

The eventLog implements an uptime function which calcualtes uptime
over a given period and a lifespan function which returns the time
when the log began monitoring the channel and, if the channel is
closed, the time when it stopped moitoring it.
2019-10-25 09:51:07 +02:00

219 lines
6.1 KiB
Go

package chanfitness
import (
"fmt"
"time"
"github.com/lightningnetwork/lnd/routing/route"
)
type eventType int
const (
peerOnlineEvent eventType = iota
peerOfflineEvent
)
// String provides string representations of channel events.
func (e eventType) String() string {
switch e {
case peerOnlineEvent:
return "peer_online"
case peerOfflineEvent:
return "peer_offline"
}
return "unknown"
}
// channelEvent is a a timestamped event which is observed on a per channel
// basis.
type channelEvent struct {
timestamp time.Time
eventType eventType
}
// chanEventLog stores all events that have occurred over a channel's lifetime.
type chanEventLog struct {
// id is the uint64 of the short channel ID.
id uint64
// peer is the compressed public key of the peer being monitored.
peer route.Vertex
// events is a log of timestamped events observed for the channel.
events []*channelEvent
// now is expected to return the current time. It is supplied as an
// external function to enable deterministic unit tests.
now func() time.Time
// openedAt tracks the first time this channel was seen. This is not
// necessarily the time that it confirmed on chain because channel events
// are not persisted at present.
openedAt time.Time
// closedAt is the time that the channel was closed. If the channel has not
// been closed yet, it is zero.
closedAt time.Time
}
func newEventLog(id uint64, peer route.Vertex, now func() time.Time) *chanEventLog {
return &chanEventLog{
id: id,
peer: peer,
now: now,
}
}
// close sets the closing time for an event log.
func (e *chanEventLog) close() {
e.closedAt = e.now()
}
// add appends an event with the given type and current time to the event log.
// The open time for the eventLog will be set to the event's timestamp if it is
// not set yet.
func (e *chanEventLog) add(eventType eventType) {
// If the channel is already closed, return early without adding an event.
if !e.closedAt.IsZero() {
return
}
// Add the event to the eventLog with the current timestamp.
event := &channelEvent{
timestamp: e.now(),
eventType: eventType,
}
e.events = append(e.events, event)
// If the eventLog does not have an opened time set, set it to the timestamp
// of the event. This has the effect of setting the eventLog's open time to
// the timestamp of the first event added.
if e.openedAt.IsZero() {
e.openedAt = event.timestamp
}
log.Debugf("Channel %v recording event: %v", e.id, eventType)
}
// onlinePeriod represents a period of time over which a peer was online.
type onlinePeriod struct {
start, end time.Time
}
// getOnlinePeriods returns a list of all the periods that the event log has
// recorded the remote peer as being online. In the unexpected case where there
// are no events, the function returns early. Online periods are defined as a
// peer online event which is terminated by a peer offline event. This function
// expects the event log provided to be ordered by ascending timestamp.
func (e *chanEventLog) getOnlinePeriods() []*onlinePeriod {
// Return early if there are no events, there are no online periods.
if len(e.events) == 0 {
return nil
}
var (
lastOnline time.Time
offline bool
onlinePeriods []*onlinePeriod
)
// Loop through all events to build a list of periods that the peer was
// online. Online periods are added when they are terminated with a peer
// offline event. If the log ends on an online event, the period between
// the online event and the present is not tracked. The type of the most
// recent event is tracked using the offline bool so that we can add a
// final online period if necessary.
for _, event := range e.events {
switch event.eventType {
case peerOnlineEvent:
lastOnline = event.timestamp
offline = false
case peerOfflineEvent:
offline = true
// Do not add to uptime if there is no previous online timestamp,
// the event log has started with an offline event
if lastOnline.IsZero() {
continue
}
// The eventLog has recorded an offline event, having previously
// been online so we add an online period to to set of online periods.
onlinePeriods = append(onlinePeriods, &onlinePeriod{
start: lastOnline,
end: event.timestamp,
})
}
}
// If the last event was an peer offline event, we do not need to calculate
// a final online period and can return online periods as is.
if offline {
return onlinePeriods
}
// The log ended on an online event, so we need to add a final online event.
// If the channel is closed, this period is until channel closure. It it is
// still open, we calculate it until the present.
endTime := e.closedAt
if endTime.IsZero() {
endTime = e.now()
}
// Add the final online period to the set and return.
return append(onlinePeriods, &onlinePeriod{
start: lastOnline,
end: endTime,
})
}
// uptime calculates the total uptime we have recorded for a channel over the
// inclusive range specified. An error is returned if the end of the range is
// before the start or a zero end time is returned.
func (e *chanEventLog) uptime(start, end time.Time) (time.Duration, error) {
// Error if we are provided with an invalid range to calculate uptime for.
if end.Before(start) {
return 0, fmt.Errorf("end time: %v before start time: %v",
end, start)
}
if end.IsZero() {
return 0, fmt.Errorf("zero end time")
}
var uptime time.Duration
for _, p := range e.getOnlinePeriods() {
// The online period ends before the range we're looking at, so we can
// skip over it.
if p.end.Before(start) {
continue
}
// The online period starts after the range we're looking at, so can
// stop calculating uptime.
if p.start.After(end) {
break
}
// If the online period starts before our range, shift the start time up
// so that we only calculate uptime from the start of our range.
if p.start.Before(start) {
p.start = start
}
// If the online period ends before our range, shift the end time
// forward so that we only calculate uptime until the end of the range.
if p.end.After(end) {
p.end = end
}
uptime += p.end.Sub(p.start)
}
return uptime, nil
}