2023-07-27 07:13:53 +02:00
|
|
|
#include "config.h"
|
|
|
|
#include <assert.h>
|
|
|
|
#include <ccan/htable/htable.h>
|
|
|
|
#include <ccan/str/hex/hex.h>
|
|
|
|
#include <ccan/tal/str/str.h>
|
|
|
|
#include <ccan/tal/tal.h>
|
|
|
|
#include <ccan/time/time.h>
|
|
|
|
#include <common/json_stream.h>
|
|
|
|
#include <common/memleak.h>
|
|
|
|
#include <common/trace.h>
|
|
|
|
#include <sodium/randombytes.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#if HAVE_USDT
|
2023-07-27 07:17:22 +02:00
|
|
|
#include <sys/sdt.h>
|
|
|
|
|
2023-07-27 07:13:53 +02:00
|
|
|
#define MAX_ACTIVE_SPANS 128
|
2023-12-01 17:58:55 +01:00
|
|
|
|
|
|
|
#define HEX_SPAN_ID_SIZE (2*SPAN_ID_SIZE+1)
|
|
|
|
#define HEX_TRACE_ID_SIZE (2 * TRACE_ID_SIZE + 1)
|
|
|
|
|
|
|
|
/* The traceperent format is defined in W3C Trace Context RFC[1].
|
|
|
|
* Its format is defined as
|
|
|
|
*
|
|
|
|
* ```
|
|
|
|
* version-format = trace-id "-" parent-id "-" trace-flags
|
|
|
|
* trace-id = 32HEXDIGLC ; 16 bytes array identifier. All zeroes forbidden
|
|
|
|
* parent-id = 16HEXDIGLC ; 8 bytes array identifier. All zeroes forbidden
|
|
|
|
* trace-flags = 2HEXDIGLC ; 8 bit flags. Currently, only one bit is used.
|
|
|
|
* ```
|
|
|
|
*
|
|
|
|
* [1] https://www.w3.org/TR/trace-context/
|
|
|
|
*/
|
|
|
|
#define TRACEPARENT_LEN (2 + 1 + 32 + 1 + 16 + 1 + 2)
|
2023-07-27 07:13:53 +02:00
|
|
|
|
|
|
|
const char *trace_service_name = "lightningd";
|
|
|
|
|
|
|
|
struct span_tag {
|
|
|
|
char *name, *value;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct span {
|
2023-12-01 17:58:55 +01:00
|
|
|
/* Our own id */
|
|
|
|
u8 id[SPAN_ID_SIZE];
|
|
|
|
|
|
|
|
/* 0 if we have no parent. */
|
|
|
|
u8 parent_id[SPAN_ID_SIZE];
|
|
|
|
|
|
|
|
/* The trace_id for this span and all its children. */
|
|
|
|
u8 trace_id[TRACE_ID_SIZE];
|
|
|
|
|
2023-07-27 07:13:53 +02:00
|
|
|
u64 start_time;
|
|
|
|
u64 end_time;
|
2023-12-01 17:58:55 +01:00
|
|
|
|
|
|
|
/* The unique key used to find ourselves in the active
|
|
|
|
* spans. */
|
|
|
|
size_t key;
|
|
|
|
struct span *parent;
|
2023-07-27 07:13:53 +02:00
|
|
|
struct span_tag *tags;
|
|
|
|
char *name;
|
2023-12-01 17:58:55 +01:00
|
|
|
|
|
|
|
/* Indicate whether this is a remote span, i.e., it was
|
|
|
|
inherited by some other process, which is in charge of
|
|
|
|
emitting the span. This just means that we don't emit this
|
|
|
|
span ourselves, but we want to add child spans to the remote
|
|
|
|
span. */
|
|
|
|
bool remote;
|
2023-07-27 07:13:53 +02:00
|
|
|
};
|
|
|
|
|
2023-12-01 17:58:55 +01:00
|
|
|
static struct span *active_spans = NULL;
|
2023-07-27 07:13:53 +02:00
|
|
|
static struct span *current;
|
|
|
|
|
2023-12-01 17:58:55 +01:00
|
|
|
/* If the `CLN_TRACEPARENT` envvar is set, we inject that as the
|
|
|
|
* parent for the startup. This allows us to integrate the startup
|
|
|
|
* tracing with whatever tooling we build around it. This only has an
|
|
|
|
* effect if the envvar is set, otherwise the startup will create its
|
|
|
|
* own parent. */
|
|
|
|
static void trace_inject_traceparent(void)
|
2023-07-27 07:13:53 +02:00
|
|
|
{
|
2023-12-01 17:58:55 +01:00
|
|
|
char *traceparent;
|
|
|
|
traceparent = getenv("CLN_TRACEPARENT");
|
|
|
|
if (!traceparent)
|
|
|
|
return;
|
|
|
|
|
|
|
|
assert(strlen(traceparent) == TRACEPARENT_LEN);
|
|
|
|
trace_span_start("", active_spans);
|
|
|
|
current->remote = true;
|
|
|
|
assert(current && !current->parent);
|
|
|
|
if (!hex_decode(traceparent + 3, 2*TRACE_ID_SIZE, current->trace_id,
|
|
|
|
TRACE_ID_SIZE) ||
|
|
|
|
!hex_decode(traceparent + 36, 2*SPAN_ID_SIZE, current->id,
|
|
|
|
SPAN_ID_SIZE)) {
|
|
|
|
/* We failed to parse the traceparent, abandon. */
|
|
|
|
fprintf(stderr, "Failed!");
|
|
|
|
trace_span_end(active_spans);
|
|
|
|
}
|
2023-07-27 07:13:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void trace_init(void) {
|
2023-12-01 17:58:55 +01:00
|
|
|
if (active_spans)
|
|
|
|
return;
|
2024-04-26 23:33:24 +02:00
|
|
|
active_spans = calloc(MAX_ACTIVE_SPANS, sizeof(struct span));
|
2023-12-01 17:58:55 +01:00
|
|
|
|
2023-07-27 07:13:53 +02:00
|
|
|
current = NULL;
|
2023-12-01 17:58:55 +01:00
|
|
|
trace_inject_traceparent();
|
2023-07-27 07:13:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Convert the pointer to a context object to a numeric key.
|
|
|
|
*/
|
|
|
|
static size_t trace_key(const void *key)
|
|
|
|
{
|
|
|
|
return (size_t)key;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct span *trace_span_find(size_t key)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < MAX_ACTIVE_SPANS; i++)
|
|
|
|
if (active_spans[i].key == key)
|
|
|
|
return &active_spans[i];
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Find an empty slot for a new span.
|
|
|
|
*/
|
|
|
|
static struct span *trace_span_slot(void)
|
|
|
|
{
|
|
|
|
struct span *s = trace_span_find(0);
|
|
|
|
|
|
|
|
/* Might end up here if we have more than MAX_ACTIVE_SPANS
|
|
|
|
* concurrent spans. */
|
|
|
|
assert(s);
|
|
|
|
assert(s->parent == NULL);
|
|
|
|
assert(s != current);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2023-12-01 17:58:55 +01:00
|
|
|
static const struct span *trace_span_root(const struct span *s)
|
|
|
|
{
|
|
|
|
if (s->parent)
|
|
|
|
return trace_span_root(s->parent);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2023-07-27 07:13:53 +02:00
|
|
|
static void trace_emit(struct span *s)
|
|
|
|
{
|
2023-12-01 17:58:55 +01:00
|
|
|
const struct span *root = trace_span_root(s);
|
|
|
|
char span_id[HEX_SPAN_ID_SIZE];
|
|
|
|
char trace_id[HEX_TRACE_ID_SIZE];
|
|
|
|
char parent_span_id[HEX_SPAN_ID_SIZE];
|
|
|
|
|
|
|
|
/* If this is a remote span it's not up to us to emit it. Make
|
|
|
|
* this a no-op. `trace_span_end` will take care of cleaning
|
|
|
|
* the in-memory span up. */
|
|
|
|
if (s->remote)
|
|
|
|
return;
|
|
|
|
|
|
|
|
hex_encode(s->id, SPAN_ID_SIZE, span_id, HEX_SPAN_ID_SIZE);
|
|
|
|
hex_encode(root->trace_id, TRACE_ID_SIZE, trace_id, HEX_TRACE_ID_SIZE);
|
|
|
|
|
|
|
|
if (s->parent)
|
|
|
|
hex_encode(s->parent_id, SPAN_ID_SIZE, parent_span_id, HEX_SPAN_ID_SIZE);
|
2023-07-27 07:13:53 +02:00
|
|
|
|
|
|
|
char *res = tal_fmt(
|
|
|
|
NULL,
|
|
|
|
"[{\"id\": \"%s\", \"name\": \"%s\", "
|
|
|
|
"\"timestamp\": %" PRIu64 ", \"duration\": %" PRIu64 ",",
|
2023-12-01 17:58:55 +01:00
|
|
|
span_id, s->name, s->start_time, s->end_time - s->start_time);
|
2023-07-27 07:13:53 +02:00
|
|
|
|
|
|
|
tal_append_fmt(&res, "\"localEndpoint\": { \"serviceName\": \"%s\"}, ",
|
|
|
|
trace_service_name);
|
|
|
|
|
|
|
|
if (s->parent != NULL) {
|
2023-12-01 17:58:55 +01:00
|
|
|
tal_append_fmt(&res, "\"parentId\": \"%s\",", parent_span_id);
|
2023-07-27 07:13:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
tal_append_fmt(&res, "\"tags\": {");
|
|
|
|
for (size_t i = 0; i < tal_count(s->tags); i++) {
|
|
|
|
tal_append_fmt(&res, "%s\"%s\": \"%s\"", i == 0 ? "" : ", ",
|
|
|
|
s->tags[i].name, s->tags[i].value);
|
|
|
|
}
|
|
|
|
|
2023-12-01 17:58:55 +01:00
|
|
|
tal_append_fmt(&res, "}, \"traceId\": \"%s\"}]", trace_id);
|
|
|
|
DTRACE_PROBE2(lightningd, span_emit, span_id, res);
|
2023-07-27 07:13:53 +02:00
|
|
|
tal_free(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Release the span back into the pool of available spans.
|
|
|
|
*/
|
|
|
|
static void trace_span_clear(struct span *s)
|
|
|
|
{
|
|
|
|
s->key = 0;
|
2023-12-01 17:58:55 +01:00
|
|
|
memset(s->id, 0, SPAN_ID_SIZE);
|
|
|
|
memset(s->trace_id, 0, TRACE_ID_SIZE);;
|
2023-07-27 07:13:53 +02:00
|
|
|
s->parent = NULL;
|
|
|
|
s->name = tal_free(s->name);
|
|
|
|
s->tags = tal_free(s->tags);
|
|
|
|
}
|
|
|
|
|
|
|
|
void trace_span_start(const char *name, const void *key)
|
|
|
|
{
|
|
|
|
size_t numkey = trace_key(key);
|
|
|
|
struct timeabs now = time_now();
|
|
|
|
|
2023-12-01 17:58:55 +01:00
|
|
|
trace_init();
|
|
|
|
|
2023-07-27 07:13:53 +02:00
|
|
|
assert(trace_span_find(numkey) == NULL);
|
|
|
|
struct span *s = trace_span_slot();
|
|
|
|
s->key = numkey;
|
2023-12-01 17:58:55 +01:00
|
|
|
randombytes_buf(s->id, SPAN_ID_SIZE);
|
2023-07-27 07:13:53 +02:00
|
|
|
s->start_time = (now.ts.tv_sec * 1000000) + now.ts.tv_nsec / 1000;
|
|
|
|
s->parent = current;
|
|
|
|
s->tags = notleak(tal_arr(NULL, struct span_tag, 0));
|
|
|
|
s->name = notleak(tal_strdup(NULL, name));
|
2023-12-01 17:58:55 +01:00
|
|
|
|
|
|
|
/* If this is a new root span we also need to associate a new
|
|
|
|
* trace_id with it. */
|
|
|
|
if (!current) {
|
|
|
|
randombytes_buf(s->trace_id, TRACE_ID_SIZE);
|
|
|
|
} else {
|
|
|
|
memcpy(s->parent_id, current->id, SPAN_ID_SIZE);
|
|
|
|
}
|
|
|
|
|
2023-07-27 07:13:53 +02:00
|
|
|
current = s;
|
2023-07-27 07:17:22 +02:00
|
|
|
DTRACE_PROBE1(lightningd, span_start, s->id);
|
2023-07-27 07:13:53 +02:00
|
|
|
}
|
|
|
|
|
2023-12-01 17:58:55 +01:00
|
|
|
void trace_span_remote(u8 trace_id[TRACE_ID_SIZE], u8 span_id[SPAN_ID_SIZE])
|
|
|
|
{
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2023-07-27 07:13:53 +02:00
|
|
|
void trace_span_end(const void *key)
|
|
|
|
{
|
|
|
|
size_t numkey = trace_key(key);
|
|
|
|
struct span *s = trace_span_find(numkey);
|
|
|
|
assert(s && "Span to end not found");
|
|
|
|
assert(s == current && "Ending a span that isn't the current one");
|
|
|
|
|
|
|
|
struct timeabs now = time_now();
|
|
|
|
s->end_time = (now.ts.tv_sec * 1000000) + now.ts.tv_nsec / 1000;
|
2023-07-27 07:17:22 +02:00
|
|
|
DTRACE_PROBE1(lightningd, span_end, s->id);
|
2023-07-27 07:13:53 +02:00
|
|
|
trace_emit(s);
|
|
|
|
|
|
|
|
/* Reset the context span we are in. */
|
|
|
|
current = s->parent;
|
|
|
|
|
|
|
|
/* Now reset the span */
|
|
|
|
trace_span_clear(s);
|
2023-12-01 17:58:55 +01:00
|
|
|
|
|
|
|
/* One last special case: if the parent is remote, it must be
|
|
|
|
* the root. And we should terminate that trace along with
|
|
|
|
* this one. */
|
|
|
|
if (current && current->remote) {
|
|
|
|
assert(current->parent == NULL);
|
|
|
|
current = NULL;
|
|
|
|
}
|
2023-07-27 07:13:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void trace_span_tag(const void *key, const char *name, const char *value)
|
|
|
|
{
|
|
|
|
size_t numkey = trace_key(key);
|
|
|
|
struct span *span = trace_span_find(numkey);
|
|
|
|
assert(span);
|
|
|
|
|
|
|
|
size_t s = tal_count(span->tags);
|
|
|
|
tal_resize(&span->tags, s + 1);
|
|
|
|
span->tags[s].name = tal_strdup(span->tags, name);
|
|
|
|
span->tags[s].value = tal_strdup(span->tags, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
void trace_span_suspend(const void *key)
|
|
|
|
{
|
2023-07-27 07:17:22 +02:00
|
|
|
size_t numkey = trace_key(key);
|
|
|
|
struct span *span = trace_span_find(numkey);
|
2023-07-27 07:13:53 +02:00
|
|
|
current = NULL;
|
2023-07-27 07:17:22 +02:00
|
|
|
DTRACE_PROBE1(lightningd, span_suspend, span->id);
|
2023-07-27 07:13:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void trace_span_resume(const void *key)
|
|
|
|
{
|
|
|
|
size_t numkey = trace_key(key);
|
|
|
|
current = trace_span_find(numkey);
|
2023-07-27 07:17:22 +02:00
|
|
|
DTRACE_PROBE1(lightningd, span_resume, current->id);
|
2023-07-27 07:13:53 +02:00
|
|
|
}
|
2023-12-15 01:21:56 +01:00
|
|
|
|
|
|
|
void trace_cleanup(void)
|
|
|
|
{
|
|
|
|
free(active_spans);
|
|
|
|
active_spans = NULL;
|
|
|
|
}
|
|
|
|
|
2023-07-27 07:13:53 +02:00
|
|
|
#else /* HAVE_USDT */
|
|
|
|
|
|
|
|
void trace_span_start(const char *name, const void *key) {}
|
|
|
|
void trace_span_end(const void *key) {}
|
|
|
|
void trace_span_suspend(const void *key) {}
|
|
|
|
void trace_span_resume(const void *key) {}
|
|
|
|
void trace_span_tag(const void *key, const char *name, const char *value) {}
|
2023-12-15 01:21:56 +01:00
|
|
|
void trace_cleanup(void) {}
|
2023-07-27 07:13:53 +02:00
|
|
|
|
|
|
|
#endif /* HAVE_USDT */
|