From 328863e6be475d2492e3f10ade554836952a870d Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Fri, 15 Dec 2023 10:53:00 +1030 Subject: [PATCH] tracing: Add an example exporter to zipkin and document it --- contrib/cln-tracer/.gitignore | 1 + contrib/cln-tracer/README.md | 17 +++ contrib/cln-tracer/cln_tracer/trace.py | 107 ++++++++++++++++++ contrib/cln-tracer/pyproject.toml | 16 +++ .../tracing-cln-performance.md | 34 ++---- 5 files changed, 151 insertions(+), 24 deletions(-) create mode 100644 contrib/cln-tracer/.gitignore create mode 100644 contrib/cln-tracer/README.md create mode 100644 contrib/cln-tracer/cln_tracer/trace.py create mode 100644 contrib/cln-tracer/pyproject.toml diff --git a/contrib/cln-tracer/.gitignore b/contrib/cln-tracer/.gitignore new file mode 100644 index 000000000..c04bc49f7 --- /dev/null +++ b/contrib/cln-tracer/.gitignore @@ -0,0 +1 @@ +poetry.lock diff --git a/contrib/cln-tracer/README.md b/contrib/cln-tracer/README.md new file mode 100644 index 000000000..494e4acc9 --- /dev/null +++ b/contrib/cln-tracer/README.md @@ -0,0 +1,17 @@ +# cln-tracer + +Utilities to export USDT traces from CLN. + +This directory contains the following utilities: + + - `cln_tracer/trace.py` instruments and exports traces from a single binary. + +## Installation + +Since the `bcc` library depends heavily on its binary extension +matching the version of the kernel `systemtap-sdt-dev` version, it is +strongly suggested to install the `libpbfcc` dependency via your +package manager and _not_ use a virtualenv to run these tracers. + +The tracers also require kernel access, and will most likely have to +be run by `root` diff --git a/contrib/cln-tracer/cln_tracer/trace.py b/contrib/cln-tracer/cln_tracer/trace.py new file mode 100644 index 000000000..a6731890c --- /dev/null +++ b/contrib/cln-tracer/cln_tracer/trace.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 + +"""A simple way to receive spans from an instrumented binary. + +Currently only supports a single binary given as the first command +line argument. +""" + +import threading +from urllib import request +from bcc import BPF, USDT +import json +import sys +from pathlib import Path +import time +from queue import Queue + +if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} [zipkin_url] [binary]") + sys.exit(1) + +zipkin_url = sys.argv[1] +binaries = [sys.argv[2]] + +# Path to the eBPF program +bpf_text = """ +#include + +int do_trace(struct pt_regs *ctx) { + uint64_t addr; + char path[495]; + bpf_usdt_readarg(2, ctx, &addr); + bpf_probe_read(&path, sizeof(path), (void *)addr); + bpf_trace_printk("%s\\n", path); + return 0; +}; +""" + + +def instrument(binary: Path) -> USDT: + """Given a binary isntrument it with the USDT context.""" + path = binary.resolve() + usdt_ctx = USDT(path=str(path)) + + usdt_ctx.enable_probe( + probe="span_emit", + fn_name="do_trace" + ) + return usdt_ctx + + +def submit_once(queue: Queue): + """Submit accumulated spans to an otelcol.""" + spans = [] + while not queue.empty(): + span = queue.get_nowait() + spans.append(span) + queue.task_done() + + print(f"Submitting a batch of {len(spans)} spans") + req = request.Request( + zipkin_url, + data=json.dumps(spans).encode('ASCII'), + headers={ + 'Content-Type': 'application/json', + }, + method='POST' + ) + request.urlopen(req).read() + + +def submit(queue: Queue): + """Repeatedly submit batches of spans to otelcol.""" + while True: + time.sleep(1) + try: + submit_once(queue) + except Exception as e: + print(f"Error submitting spans to otelcol: {e}") + + +# An queue to accumulate spans in. +queue = Queue() + + +# Run the submission to otelcol in the background. +thread = threading.Thread( + target=submit, + args=(queue,), + daemon=True +) +thread.start() + +binaries = [Path(i) for i in binaries] +ctxs = [instrument(b) for b in binaries] +b = BPF( + text=bpf_text, + usdt_contexts=ctxs, + debug=0, +) + +while True: + try: + (task, pid, cpu, flags, ts, msg) = b.trace_fields() + queue.put(json.loads(msg)[0]) + except Exception as e: + print(f"Failed to parse message {msg}: {e}") diff --git a/contrib/cln-tracer/pyproject.toml b/contrib/cln-tracer/pyproject.toml new file mode 100644 index 000000000..337adc47d --- /dev/null +++ b/contrib/cln-tracer/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "cln-tracer" +version = "0.1.0" +description = "" +authors = ["Christian Decker "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.8" +bcc = "^0.1.10" +opentelemetry-proto = "^1.21.0" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/doc/developers-guide/tracing-cln-performance.md b/doc/developers-guide/tracing-cln-performance.md index 8ec1ad563..e0d40e01e 100644 --- a/doc/developers-guide/tracing-cln-performance.md +++ b/doc/developers-guide/tracing-cln-performance.md @@ -53,34 +53,20 @@ usdt:lightningd/lightningd:lightningd:span_start usdt:lightningd/lightningd:lightningd:span_suspend ``` -We want to attach to the `span_emit` probe, as that's the one getting the opentracing-compatible JSON string passed as an argument, and we'd like to extract that. +There is a sample exporter that can be used to instrument a single +binary, batch the spans it receives and submit them as a batch to an +`otelcol` or `tempo` instance in [contrib/cln-tracer][cln-tracer] +using the zipkin format for spans and traces. -```bash -$ export BPFTRACE_STRLEN=200 -$ sudo -E bpftrace -e 'U:../lightning/lightningd/lightningd:span_emit {printf("%s\n", str(arg1, 10240));}' - -Attaching 1 probe... -[{"id": "5006000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283751653, "duration": 154,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "sql"}, "traceId": "b7f9b1 -[{"id": "5106000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283752515, "duration": 146,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "listnodes"}, "traceId": " -[{"id": "8206000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283759889, "duration": 657,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-memleak"}, "traceId": -[{"id": "8306000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283784095, "duration": 135,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-report-fds"}, "traceI -[{"id": "8406000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283785116, "duration": 204,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "stop"}, "traceId": "f6d42 -[{"id": "3100000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283860017, "duration": 770,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-memleak"}, "traceId": -[{"id": "3200000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283992271, "duration": 155,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-report-fds"}, "traceI -[{"id": "3300000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283993745, "duration": 370,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "stop"}, "traceId": "92576 -[{"id": "5206000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202284070125, "duration": 911,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-memleak"}, "traceId": -[{"id": "5506000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202284313462, "duration": 62,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-report-fds"}, "traceId -[{"id": "5606000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202284314014, "duration": 117,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "stop"}, "traceId": "b7f9b -[{"id": "5406000000000000", "name": "plugin/bitcoind", "timestamp": 1690202284312469, "duration": 4774,"localEndpoint": { "serviceName": "lightningd"}, "parentId": "5306000000000000","tags": {"method -[{"id": "5306000000000000", "name": "extend_tip", "timestamp": 1690202284312428, "duration": 4902,"localEndpoint": { "serviceName": "lightningd"}, "tags": {}, "traceId": "b7f9b1e8af12d252"}] - -``` +[cln-tracer]: https://github.com/ElementsProject/lightning/tree/master/contrib/cln-tracer +Notice that due to a [limitation][bpftracer305] in the way the eBPF +script is handled you'll at most get the first 495 bytes of the +payload. This is due to the 512 byte limitation for eBPF programs out +of the box. - - -Notice that due to a [limitation](https://github.com/iovisor/bpftrace/issues/305) in `bpftrace` you'll at most get the first 200 bytes of the payload. If you write your own exporter you'll be able to specify the size of the buffer that is being used, and can extract the entire span. +[bpftracer]: https://github.com/iovisor/bpftrace/issues/305 ## Tracing Overhead