[DTrace-devel] [PATCH 3/4] rawfbt: new provider
Kris Van Hees
kris.van.hees at oracle.com
Thu Dec 5 19:32:53 UTC 2024
Hm, this is smissing a change to clean_probes.sh... Updated patch coming...
On Thu, Dec 05, 2024 at 01:53:29PM -0500, Kris Van Hees via DTrace-devel wrote:
> This provider provides access to all kprobe-based probes that are
> available on the system. This includes any compiler-generated
> optimized variants of functions, named <func>.<suffix>.
>
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> libdtrace/Build | 2 +
> libdtrace/dt_prov_rawfbt.c | 330 +++++++++++++++++++++++++++++++++++++
> libdtrace/dt_provider.c | 1 +
> libdtrace/dt_provider.h | 1 +
> 4 files changed, 334 insertions(+)
> create mode 100644 libdtrace/dt_prov_rawfbt.c
>
> diff --git a/libdtrace/Build b/libdtrace/Build
> index 8d398221..72235159 100644
> --- a/libdtrace/Build
> +++ b/libdtrace/Build
> @@ -55,6 +55,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
> dt_prov_lockstat.c \
> dt_prov_proc.c \
> dt_prov_profile.c \
> + dt_prov_rawfbt.c \
> dt_prov_rawtp.c \
> dt_prov_sched.c \
> dt_prov_sdt.c \
> @@ -112,6 +113,7 @@ dt_prov_ip.c_CFLAGS := -Wno-pedantic
> dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
> dt_prov_proc.c_CFLAGS := -Wno-pedantic
> dt_prov_profile.c_CFLAGS := -Wno-pedantic
> +dt_prov_rawfbt.c_CFLAGS := -Wno-pedantic
> dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
> dt_prov_sched.c_CFLAGS := -Wno-pedantic
> dt_prov_sdt.c_CFLAGS := -Wno-pedantic
> diff --git a/libdtrace/dt_prov_rawfbt.c b/libdtrace/dt_prov_rawfbt.c
> new file mode 100644
> index 00000000..edfd36b4
> --- /dev/null
> +++ b/libdtrace/dt_prov_rawfbt.c
> @@ -0,0 +1,330 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The Raw Function Boundary Tracing provider for DTrace.
> + *
> + * The kernel provides kprobes to trace specific symbols. They are listed in
> + * the TRACEFS/available_filter_functions file. Kprobes may be associated with
> + * a symbol in the core kernel or with a symbol in a specific kernel module.
> + * Whereas the fbt provider supports tracing regular symbols only, the rawfbt
> + * provider also provides access to synthetic symbols, i.e. symbols created by
> + * compiler optimizations.
> + *
> + * Mapping from event name to DTrace probe name:
> + *
> + * <name> rawfbt:vmlinux:<name>:entry
> + * rawfbt:vmlinux:<name>:return
> + * or
> + * <name> [<modname>] rawfbt:<modname>:<name>:entry
> + * rawfbt:<modname>:<name>:return
> + */
> +#include <assert.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <linux/bpf.h>
> +#include <linux/btf.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +
> +#include <bpf_asm.h>
> +
> +#include "dt_btf.h"
> +#include "dt_dctx.h"
> +#include "dt_cg.h"
> +#include "dt_module.h"
> +#include "dt_provider_tp.h"
> +#include "dt_probe.h"
> +#include "dt_pt_regs.h"
> +
> +static const char prvname[] = "rawfbt";
> +static const char modname[] = "vmlinux";
> +
> +#define KPROBE_EVENTS TRACEFS "kprobe_events"
> +#define PROBE_LIST TRACEFS "available_filter_functions"
> +
> +#define FBT_GROUP_FMT GROUP_FMT "_%s"
> +#define FBT_GROUP_DATA GROUP_DATA, prp->desc->prb
> +
> +static const dtrace_pattr_t pattr = {
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> +};
> +
> +/*
> + * Scan the PROBE_LIST file and add entry and return probes for every function
> + * that is listed.
> + */
> +static int populate(dtrace_hdl_t *dtp)
> +{
> + dt_provider_t *prv;
> + FILE *f;
> + char *buf = NULL;
> + size_t len = 0;
> + size_t n = 0;
> + dtrace_syminfo_t sip;
> + dtrace_probedesc_t pd;
> +
> + prv = dt_provider_create(dtp, prvname, &dt_rawfbt, &pattr, NULL);
> + if (prv == NULL)
> + return -1; /* errno already set */
> +
> + f = fopen(PROBE_LIST, "r");
> + if (f == NULL)
> + return 0;
> +
> + while (getline(&buf, &len, f) >= 0) {
> + char *p, *q;
> + const char *mod = modname;
> + dt_probe_t *prp;
> +
> + /*
> + * Here buf is either "funcname\n" or "funcname [modname]\n".
> + * The last line may not have a linefeed.
> + */
> + p = strchr(buf, '\n');
> + if (p) {
> + *p = '\0';
> + if (p > buf && *(--p) == ']')
> + *p = '\0';
> + }
> +
> + /*
> + * Now buf is either "funcname" or "funcname [modname". If
> + * there is no module name provided, we will use the default.
> + */
> + p = strchr(buf, ' ');
> + if (p) {
> + *p++ = '\0';
> + if (*p == '[')
> + p++;
> + }
> +
> +#define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
> + /* Weed out __ftrace_invalid_address___* entries. */
> + if (strstarts(buf, "__ftrace_invalid_address__") ||
> + strstarts(buf, "__probestub_") ||
> + strstarts(buf, "__traceiter_"))
> + continue;
> +#undef strstarts
> +
> + /*
> + * If we did not see a module name, perform a symbol lookup to
> + * try to determine the module name.
> + */
> + if (!p) {
> + /*
> + * For synthetic symbol names (those containing '.'),
> + * we need to use the base name (before the '.') for
> + * module name lookup, because the synthetic forms are
> + * not recorded in kallsyms information.
> + *
> + * We replace the first '.' with a 0 to terminate the
> + * string, and after the lookup, we put it back.
> + */
> + q = strchr(buf, '.');
> + if (q != NULL)
> + *q = '\0';
> +
> + if (dtrace_lookup_by_name(dtp, DTRACE_OBJ_KMODS, buf,
> + NULL, &sip) == 0)
> + mod = sip.object;
> +
> + if (q != NULL)
> + *q = '.';
> + } else
> + mod = p;
> +
> + /*
> + * Due to the lack of module names in
> + * TRACEFS/available_filter_functions, there are some duplicate
> + * function names. The kernel does not let us trace functions
> + * that have duplicates, so we need to remove the existing one.
> + */
> + pd.id = DTRACE_IDNONE;
> + pd.prv = prvname;
> + pd.mod = mod;
> + pd.fun = buf;
> + pd.prb = "entry";
> + prp = dt_probe_lookup(dtp, &pd);
> + if (prp != NULL) {
> + dt_probe_destroy(prp);
> + continue;
> + }
> +
> + if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "entry"))
> + n++;
> + if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "return"))
> + n++;
> + }
> +
> + free(buf);
> + fclose(f);
> +
> + return n;
> +}
> +
> +/*
> + * Generate a BPF trampoline for a FBT probe.
> + *
> + * The trampoline function is called when a FBT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + * int dt_fbt(dt_pt_regs *regs)
> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause. It returns 0 to the caller.
> + */
> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> + dt_cg_tramp_prologue(pcb);
> +
> + /*
> + * After the dt_cg_tramp_prologue() call, we have:
> + * // (%r7 = dctx->mst)
> + * // (%r8 = dctx->ctx)
> + */
> + dt_cg_tramp_copy_regs(pcb);
> + if (strcmp(pcb->pcb_probe->desc->prb, "return") == 0) {
> + dt_irlist_t *dlp = &pcb->pcb_ir;
> +
> + dt_cg_tramp_copy_rval_from_regs(pcb);
> +
> + /*
> + * fbt:::return arg0 should be the function offset for
> + * return instruction. Since we use kretprobes, however,
> + * which do not fire until the function has returned to
> + * its caller, information about the returning instruction
> + * in the callee has been lost.
> + *
> + * Set arg0=-1 to indicate that we do not know the value.
> + */
> + dt_cg_xsetx(dlp, NULL, DT_LBL_NONE, BPF_REG_0, -1);
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> + } else
> + dt_cg_tramp_copy_args_from_regs(pcb, 1);
> + dt_cg_tramp_epilogue(pcb);
> +
> + return 0;
> +}
> +
> +static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
> +{
> + if (!dt_tp_probe_has_info(prp)) {
> + char *fn, *prb, *p;
> + FILE *f;
> + size_t len;
> + int fd, rc = -1;
> +
> + /*
> + * The tracepoint event we will be creating needs to have a
> + * valid name. We use a copy of the probe name, with . -> _
> + * conversion.
> + */
> + prb = strdup(prp->desc->fun);
> + for (p = prb; *p; p++) {
> + if (*p == '.')
> + *p = '_';
> + }
> +
> + /*
> + * Register the kprobe with the tracing subsystem. This will
> + * create a tracepoint event.
> + */
> + fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> + if (fd == -1)
> + return -ENOENT;
> +
> + rc = dprintf(fd, "%c:" FBT_GROUP_FMT "/%s %s\n",
> + prp->desc->prb[0] == 'e' ? 'p' : 'r',
> + FBT_GROUP_DATA, prb, prp->desc->fun);
> + close(fd);
> + if (rc == -1)
> + return -ENOENT;
> +
> + /* create format file name */
> + len = snprintf(NULL, 0, "%s" FBT_GROUP_FMT "/%s/format",
> + EVENTSFS, FBT_GROUP_DATA, prb) + 1;
> + fn = dt_alloc(dtp, len);
> + if (fn == NULL)
> + return -ENOENT;
> +
> + snprintf(fn, len, "%s" FBT_GROUP_FMT "/%s/format", EVENTSFS,
> + FBT_GROUP_DATA, prb);
> +
> + /* open format file */
> + f = fopen(fn, "r");
> + dt_free(dtp, fn);
> + if (f == NULL)
> + return -ENOENT;
> +
> + /* read event id from format file */
> + rc = dt_tp_probe_info(dtp, f, 0, prp, NULL, NULL);
> + fclose(f);
> +
> + if (rc < 0)
> + return -ENOENT;
> + }
> +
> + /* attach BPF program to the probe */
> + return dt_tp_probe_attach(dtp, prp, bpf_fd);
> +}
> +
> +static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp, int *argcp,
> + dt_argdesc_t **argvp)
> +{
> + *argcp = 0; /* no arguments by default */
> + *argvp = NULL;
> +
> + return 0;
> +}
> +
> +/*
> + * Try to clean up system resources that may have been allocated for this
> + * probe.
> + *
> + * If there is an event FD, we close it.
> + *
> + * We also try to remove any kprobe that may have been created for the probe.
> + * This is harmless for probes that didn't get created. If the removal fails
> + * for some reason we are out of luck - fortunately it is not harmful to the
> + * system as a whole.
> + */
> +static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
> +{
> + int fd;
> +
> + if (!dt_tp_probe_has_info(prp))
> + return;
> +
> + dt_tp_probe_detach(dtp, prp);
> +
> + fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> + if (fd == -1)
> + return;
> +
> + dprintf(fd, "-:" FBT_GROUP_FMT "/%s\n", FBT_GROUP_DATA,
> + prp->desc->fun);
> + close(fd);
> +}
> +
> +dt_provimpl_t dt_rawfbt = {
> + .name = "rawfbt",
> + .prog_type = BPF_PROG_TYPE_KPROBE,
> + .populate = &populate,
> + .load_prog = &dt_bpf_prog_load,
> + .trampoline = &trampoline,
> + .attach = &attach,
> + .probe_info = &probe_info,
> + .detach = &detach,
> + .probe_destroy = &dt_tp_probe_destroy,
> +};
> diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c
> index 1e2e844e..0c621197 100644
> --- a/libdtrace/dt_provider.c
> +++ b/libdtrace/dt_provider.c
> @@ -36,6 +36,7 @@ const dt_provimpl_t *dt_providers[] = {
> &dt_lockstat,
> &dt_proc,
> &dt_profile,
> + &dt_rawfbt,
> &dt_rawtp,
> &dt_sched,
> &dt_sdt,
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index f62137de..59a8d62e 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -82,6 +82,7 @@ extern dt_provimpl_t dt_ip;
> extern dt_provimpl_t dt_lockstat;
> extern dt_provimpl_t dt_proc;
> extern dt_provimpl_t dt_profile;
> +extern dt_provimpl_t dt_rawfbt;
> extern dt_provimpl_t dt_rawtp;
> extern dt_provimpl_t dt_sched;
> extern dt_provimpl_t dt_sdt;
> --
> 2.45.2
>
>
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list