[DTrace-devel] [PATCH] rawfbt: new provider
Kris Van Hees
kris.van.hees at oracle.com
Thu Dec 5 19:42:52 UTC 2024
This provider provides access to all kprobe-based probes that are
available on the system. This includes any compiler-generated
optimized variants of functions, named <func>.<suffix>.
Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
libdtrace/Build | 2 +
libdtrace/dt_prov_rawfbt.c | 330 +++++++++++++++++++++++++++++++++++++
libdtrace/dt_provider.c | 1 +
libdtrace/dt_provider.h | 1 +
test/utils/clean_probes.sh | 2 +-
5 files changed, 335 insertions(+), 1 deletion(-)
create mode 100644 libdtrace/dt_prov_rawfbt.c
diff --git a/libdtrace/Build b/libdtrace/Build
index 8d398221..72235159 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -55,6 +55,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
dt_prov_lockstat.c \
dt_prov_proc.c \
dt_prov_profile.c \
+ dt_prov_rawfbt.c \
dt_prov_rawtp.c \
dt_prov_sched.c \
dt_prov_sdt.c \
@@ -112,6 +113,7 @@ dt_prov_ip.c_CFLAGS := -Wno-pedantic
dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
dt_prov_proc.c_CFLAGS := -Wno-pedantic
dt_prov_profile.c_CFLAGS := -Wno-pedantic
+dt_prov_rawfbt.c_CFLAGS := -Wno-pedantic
dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
dt_prov_sched.c_CFLAGS := -Wno-pedantic
dt_prov_sdt.c_CFLAGS := -Wno-pedantic
diff --git a/libdtrace/dt_prov_rawfbt.c b/libdtrace/dt_prov_rawfbt.c
new file mode 100644
index 00000000..edfd36b4
--- /dev/null
+++ b/libdtrace/dt_prov_rawfbt.c
@@ -0,0 +1,330 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ *
+ * The Raw Function Boundary Tracing provider for DTrace.
+ *
+ * The kernel provides kprobes to trace specific symbols. They are listed in
+ * the TRACEFS/available_filter_functions file. Kprobes may be associated with
+ * a symbol in the core kernel or with a symbol in a specific kernel module.
+ * Whereas the fbt provider supports tracing regular symbols only, the rawfbt
+ * provider also provides access to synthetic symbols, i.e. symbols created by
+ * compiler optimizations.
+ *
+ * Mapping from event name to DTrace probe name:
+ *
+ * <name> rawfbt:vmlinux:<name>:entry
+ * rawfbt:vmlinux:<name>:return
+ * or
+ * <name> [<modname>] rawfbt:<modname>:<name>:entry
+ * rawfbt:<modname>:<name>:return
+ */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <bpf_asm.h>
+
+#include "dt_btf.h"
+#include "dt_dctx.h"
+#include "dt_cg.h"
+#include "dt_module.h"
+#include "dt_provider_tp.h"
+#include "dt_probe.h"
+#include "dt_pt_regs.h"
+
+static const char prvname[] = "rawfbt";
+static const char modname[] = "vmlinux";
+
+#define KPROBE_EVENTS TRACEFS "kprobe_events"
+#define PROBE_LIST TRACEFS "available_filter_functions"
+
+#define FBT_GROUP_FMT GROUP_FMT "_%s"
+#define FBT_GROUP_DATA GROUP_DATA, prp->desc->prb
+
+static const dtrace_pattr_t pattr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+};
+
+/*
+ * Scan the PROBE_LIST file and add entry and return probes for every function
+ * that is listed.
+ */
+static int populate(dtrace_hdl_t *dtp)
+{
+ dt_provider_t *prv;
+ FILE *f;
+ char *buf = NULL;
+ size_t len = 0;
+ size_t n = 0;
+ dtrace_syminfo_t sip;
+ dtrace_probedesc_t pd;
+
+ prv = dt_provider_create(dtp, prvname, &dt_rawfbt, &pattr, NULL);
+ if (prv == NULL)
+ return -1; /* errno already set */
+
+ f = fopen(PROBE_LIST, "r");
+ if (f == NULL)
+ return 0;
+
+ while (getline(&buf, &len, f) >= 0) {
+ char *p, *q;
+ const char *mod = modname;
+ dt_probe_t *prp;
+
+ /*
+ * Here buf is either "funcname\n" or "funcname [modname]\n".
+ * The last line may not have a linefeed.
+ */
+ p = strchr(buf, '\n');
+ if (p) {
+ *p = '\0';
+ if (p > buf && *(--p) == ']')
+ *p = '\0';
+ }
+
+ /*
+ * Now buf is either "funcname" or "funcname [modname". If
+ * there is no module name provided, we will use the default.
+ */
+ p = strchr(buf, ' ');
+ if (p) {
+ *p++ = '\0';
+ if (*p == '[')
+ p++;
+ }
+
+#define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
+ /* Weed out __ftrace_invalid_address___* entries. */
+ if (strstarts(buf, "__ftrace_invalid_address__") ||
+ strstarts(buf, "__probestub_") ||
+ strstarts(buf, "__traceiter_"))
+ continue;
+#undef strstarts
+
+ /*
+ * If we did not see a module name, perform a symbol lookup to
+ * try to determine the module name.
+ */
+ if (!p) {
+ /*
+ * For synthetic symbol names (those containing '.'),
+ * we need to use the base name (before the '.') for
+ * module name lookup, because the synthetic forms are
+ * not recorded in kallsyms information.
+ *
+ * We replace the first '.' with a 0 to terminate the
+ * string, and after the lookup, we put it back.
+ */
+ q = strchr(buf, '.');
+ if (q != NULL)
+ *q = '\0';
+
+ if (dtrace_lookup_by_name(dtp, DTRACE_OBJ_KMODS, buf,
+ NULL, &sip) == 0)
+ mod = sip.object;
+
+ if (q != NULL)
+ *q = '.';
+ } else
+ mod = p;
+
+ /*
+ * Due to the lack of module names in
+ * TRACEFS/available_filter_functions, there are some duplicate
+ * function names. The kernel does not let us trace functions
+ * that have duplicates, so we need to remove the existing one.
+ */
+ pd.id = DTRACE_IDNONE;
+ pd.prv = prvname;
+ pd.mod = mod;
+ pd.fun = buf;
+ pd.prb = "entry";
+ prp = dt_probe_lookup(dtp, &pd);
+ if (prp != NULL) {
+ dt_probe_destroy(prp);
+ continue;
+ }
+
+ if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "entry"))
+ n++;
+ if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "return"))
+ n++;
+ }
+
+ free(buf);
+ fclose(f);
+
+ return n;
+}
+
+/*
+ * Generate a BPF trampoline for a FBT probe.
+ *
+ * The trampoline function is called when a FBT probe triggers, and it must
+ * satisfy the following prototype:
+ *
+ * int dt_fbt(dt_pt_regs *regs)
+ *
+ * The trampoline will populate a dt_dctx_t struct and then call the function
+ * that implements the compiled D clause. It returns 0 to the caller.
+ */
+static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
+{
+ dt_cg_tramp_prologue(pcb);
+
+ /*
+ * After the dt_cg_tramp_prologue() call, we have:
+ * // (%r7 = dctx->mst)
+ * // (%r8 = dctx->ctx)
+ */
+ dt_cg_tramp_copy_regs(pcb);
+ if (strcmp(pcb->pcb_probe->desc->prb, "return") == 0) {
+ dt_irlist_t *dlp = &pcb->pcb_ir;
+
+ dt_cg_tramp_copy_rval_from_regs(pcb);
+
+ /*
+ * fbt:::return arg0 should be the function offset for
+ * return instruction. Since we use kretprobes, however,
+ * which do not fire until the function has returned to
+ * its caller, information about the returning instruction
+ * in the callee has been lost.
+ *
+ * Set arg0=-1 to indicate that we do not know the value.
+ */
+ dt_cg_xsetx(dlp, NULL, DT_LBL_NONE, BPF_REG_0, -1);
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+ } else
+ dt_cg_tramp_copy_args_from_regs(pcb, 1);
+ dt_cg_tramp_epilogue(pcb);
+
+ return 0;
+}
+
+static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
+{
+ if (!dt_tp_probe_has_info(prp)) {
+ char *fn, *prb, *p;
+ FILE *f;
+ size_t len;
+ int fd, rc = -1;
+
+ /*
+ * The tracepoint event we will be creating needs to have a
+ * valid name. We use a copy of the probe name, with . -> _
+ * conversion.
+ */
+ prb = strdup(prp->desc->fun);
+ for (p = prb; *p; p++) {
+ if (*p == '.')
+ *p = '_';
+ }
+
+ /*
+ * Register the kprobe with the tracing subsystem. This will
+ * create a tracepoint event.
+ */
+ fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
+ if (fd == -1)
+ return -ENOENT;
+
+ rc = dprintf(fd, "%c:" FBT_GROUP_FMT "/%s %s\n",
+ prp->desc->prb[0] == 'e' ? 'p' : 'r',
+ FBT_GROUP_DATA, prb, prp->desc->fun);
+ close(fd);
+ if (rc == -1)
+ return -ENOENT;
+
+ /* create format file name */
+ len = snprintf(NULL, 0, "%s" FBT_GROUP_FMT "/%s/format",
+ EVENTSFS, FBT_GROUP_DATA, prb) + 1;
+ fn = dt_alloc(dtp, len);
+ if (fn == NULL)
+ return -ENOENT;
+
+ snprintf(fn, len, "%s" FBT_GROUP_FMT "/%s/format", EVENTSFS,
+ FBT_GROUP_DATA, prb);
+
+ /* open format file */
+ f = fopen(fn, "r");
+ dt_free(dtp, fn);
+ if (f == NULL)
+ return -ENOENT;
+
+ /* read event id from format file */
+ rc = dt_tp_probe_info(dtp, f, 0, prp, NULL, NULL);
+ fclose(f);
+
+ if (rc < 0)
+ return -ENOENT;
+ }
+
+ /* attach BPF program to the probe */
+ return dt_tp_probe_attach(dtp, prp, bpf_fd);
+}
+
+static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp, int *argcp,
+ dt_argdesc_t **argvp)
+{
+ *argcp = 0; /* no arguments by default */
+ *argvp = NULL;
+
+ return 0;
+}
+
+/*
+ * Try to clean up system resources that may have been allocated for this
+ * probe.
+ *
+ * If there is an event FD, we close it.
+ *
+ * We also try to remove any kprobe that may have been created for the probe.
+ * This is harmless for probes that didn't get created. If the removal fails
+ * for some reason we are out of luck - fortunately it is not harmful to the
+ * system as a whole.
+ */
+static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
+{
+ int fd;
+
+ if (!dt_tp_probe_has_info(prp))
+ return;
+
+ dt_tp_probe_detach(dtp, prp);
+
+ fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
+ if (fd == -1)
+ return;
+
+ dprintf(fd, "-:" FBT_GROUP_FMT "/%s\n", FBT_GROUP_DATA,
+ prp->desc->fun);
+ close(fd);
+}
+
+dt_provimpl_t dt_rawfbt = {
+ .name = "rawfbt",
+ .prog_type = BPF_PROG_TYPE_KPROBE,
+ .populate = &populate,
+ .load_prog = &dt_bpf_prog_load,
+ .trampoline = &trampoline,
+ .attach = &attach,
+ .probe_info = &probe_info,
+ .detach = &detach,
+ .probe_destroy = &dt_tp_probe_destroy,
+};
diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c
index 1e2e844e..0c621197 100644
--- a/libdtrace/dt_provider.c
+++ b/libdtrace/dt_provider.c
@@ -36,6 +36,7 @@ const dt_provimpl_t *dt_providers[] = {
&dt_lockstat,
&dt_proc,
&dt_profile,
+ &dt_rawfbt,
&dt_rawtp,
&dt_sched,
&dt_sdt,
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index f62137de..59a8d62e 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -82,6 +82,7 @@ extern dt_provimpl_t dt_ip;
extern dt_provimpl_t dt_lockstat;
extern dt_provimpl_t dt_proc;
extern dt_provimpl_t dt_profile;
+extern dt_provimpl_t dt_rawfbt;
extern dt_provimpl_t dt_rawtp;
extern dt_provimpl_t dt_sched;
extern dt_provimpl_t dt_sdt;
diff --git a/test/utils/clean_probes.sh b/test/utils/clean_probes.sh
index 8292b309..16052cbc 100755
--- a/test/utils/clean_probes.sh
+++ b/test/utils/clean_probes.sh
@@ -70,7 +70,7 @@ fi
{ sub(/:/, "/"); }
{
- if (/_fbt_/)
+ if (/_(raw)?fbt_/)
kpv[kpc++] = $0;
else
upv[upc++] = $0;
--
2.45.2
More information about the DTrace-devel
mailing list