[DTrace-devel] [PATCH] rawfbt: new provider
Eugene Loh
eugene.loh at oracle.com
Thu Dec 5 23:23:07 UTC 2024
There are a lot of comments below, but presumably they will be easy to
adapt. So:
Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
Anyhow...
Add comment in the commit message that this is the old kprobes provider?
Should there be a test?
On 12/5/24 14:42, Kris Van Hees wrote:
> This provider provides access to all kprobe-based probes that are
> available on the system. This includes any compiler-generated
> optimized variants of functions, named <func>.<suffix>.
>
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
>
> diff --git a/libdtrace/dt_prov_rawfbt.c b/libdtrace/dt_prov_rawfbt.c
> new file mode 100644
> index 00000000..edfd36b4
> --- /dev/null
> +++ b/libdtrace/dt_prov_rawfbt.c
> @@ -0,0 +1,330 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The Raw Function Boundary Tracing provider for DTrace.
> + *
> + * The kernel provides kprobes to trace specific symbols. They are listed in
> + * the TRACEFS/available_filter_functions file. Kprobes may be associated with
> + * a symbol in the core kernel or with a symbol in a specific kernel module.
> + * Whereas the fbt provider supports tracing regular symbols only, the rawfbt
> + * provider also provides access to synthetic symbols, i.e. symbols created by
> + * compiler optimizations.
> + *
> + * Mapping from event name to DTrace probe name:
> + *
> + * <name> rawfbt:vmlinux:<name>:entry
> + * rawfbt:vmlinux:<name>:return
> + * or
> + * <name> [<modname>] rawfbt:<modname>:<name>:entry
> + * rawfbt:<modname>:<name>:return
> + */
> +#include <assert.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <linux/bpf.h>
> +#include <linux/btf.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +
> +#include <bpf_asm.h>
> +
> +#include "dt_btf.h"
> +#include "dt_dctx.h"
> +#include "dt_cg.h"
> +#include "dt_module.h"
> +#include "dt_provider_tp.h"
> +#include "dt_probe.h"
> +#include "dt_pt_regs.h"
> +
> +static const char prvname[] = "rawfbt";
> +static const char modname[] = "vmlinux";
> +
> +#define KPROBE_EVENTS TRACEFS "kprobe_events"
> +#define PROBE_LIST TRACEFS "available_filter_functions"
> +
> +#define FBT_GROUP_FMT GROUP_FMT "_%s"
> +#define FBT_GROUP_DATA GROUP_DATA, prp->desc->prb
> +
> +static const dtrace_pattr_t pattr = {
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> +};
> +
> +/*
> + * Scan the PROBE_LIST file and add entry and return probes for every function
> + * that is listed.
> + */
> +static int populate(dtrace_hdl_t *dtp)
> +{
> + dt_provider_t *prv;
> + FILE *f;
> + char *buf = NULL;
> + size_t len = 0;
Extra space before the =.
Incidentally, nice catch: n was being used for two distinct purposes:
counting (for which it was uninitialized) and buflen. But then shouldn't
this problem be corrected in the file where it originally appeared...
not to mention the other files with this same problem? That is the
pre-existing problem exists in:
dt_prov_fbt.c
dt_prov_rawtp.c
dt_prov_sdt.c
dt_prov_syscall.c
> + size_t n = 0;
> + dtrace_syminfo_t sip;
> + dtrace_probedesc_t pd;
> +
> + prv = dt_provider_create(dtp, prvname, &dt_rawfbt, &pattr, NULL);
> + if (prv == NULL)
> + return -1; /* errno already set */
> +
> + f = fopen(PROBE_LIST, "r");
> + if (f == NULL)
> + return 0;
> +
> + while (getline(&buf, &len, f) >= 0) {
> + char *p, *q;
Okay, but q could also be declared in the innermost clause in which it
is used.
> + const char *mod = modname;
> + dt_probe_t *prp;
> +
> + /*
> + * Here buf is either "funcname\n" or "funcname [modname]\n".
> + * The last line may not have a linefeed.
> + */
> + p = strchr(buf, '\n');
> + if (p) {
> + *p = '\0';
> + if (p > buf && *(--p) == ']')
> + *p = '\0';
> + }
> +
> + /*
> + * Now buf is either "funcname" or "funcname [modname". If
> + * there is no module name provided, we will use the default.
> + */
> + p = strchr(buf, ' ');
> + if (p) {
> + *p++ = '\0';
> + if (*p == '[')
> + p++;
> + }
> +
> +#define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
> + /* Weed out __ftrace_invalid_address___* entries. */
> + if (strstarts(buf, "__ftrace_invalid_address__") ||
> + strstarts(buf, "__probestub_") ||
> + strstarts(buf, "__traceiter_"))
> + continue;
> +#undef strstarts
> +
> + /*
> + * If we did not see a module name, perform a symbol lookup to
> + * try to determine the module name.
> + */
> + if (!p) {
> + /*
> + * For synthetic symbol names (those containing '.'),
> + * we need to use the base name (before the '.') for
> + * module name lookup, because the synthetic forms are
> + * not recorded in kallsyms information.
> + *
> + * We replace the first '.' with a 0 to terminate the
> + * string, and after the lookup, we put it back.
> + */
> + q = strchr(buf, '.');
> + if (q != NULL)
> + *q = '\0';
> +
> + if (dtrace_lookup_by_name(dtp, DTRACE_OBJ_KMODS, buf,
> + NULL, &sip) == 0)
> + mod = sip.object;
> +
> + if (q != NULL)
> + *q = '.';
> + } else
> + mod = p;
> +
> + /*
> + * Due to the lack of module names in
> + * TRACEFS/available_filter_functions, there are some duplicate
> + * function names. The kernel does not let us trace functions
> + * that have duplicates, so we need to remove the existing one.
> + */
This represents a change in behavior from fbt kprobe. We used not to
create the new one, but here we remove the old one. How about a comment
on why the change.
> + pd.id = DTRACE_IDNONE;
> + pd.prv = prvname;
> + pd.mod = mod;
> + pd.fun = buf;
> + pd.prb = "entry";
> + prp = dt_probe_lookup(dtp, &pd);
> + if (prp != NULL) {
> + dt_probe_destroy(prp);
> + continue;
> + }
> +
> + if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "entry"))
> + n++;
> + if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "return"))
> + n++;
> + }
> +
> + free(buf);
> + fclose(f);
> +
> + return n;
> +}
> +
> +/*
> + * Generate a BPF trampoline for a FBT probe.
> + *
> + * The trampoline function is called when a FBT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + * int dt_fbt(dt_pt_regs *regs)
Should that be dt_rawfbt?
> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause. It returns 0 to the caller.
> + */
> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> + dt_cg_tramp_prologue(pcb);
> +
> + /*
> + * After the dt_cg_tramp_prologue() call, we have:
> + * // (%r7 = dctx->mst)
> + * // (%r8 = dctx->ctx)
> + */
> + dt_cg_tramp_copy_regs(pcb);
> + if (strcmp(pcb->pcb_probe->desc->prb, "return") == 0) {
> + dt_irlist_t *dlp = &pcb->pcb_ir;
> +
> + dt_cg_tramp_copy_rval_from_regs(pcb);
> +
> + /*
> + * fbt:::return arg0 should be the function offset for
> + * return instruction. Since we use kretprobes, however,
> + * which do not fire until the function has returned to
> + * its caller, information about the returning instruction
> + * in the callee has been lost.
> + *
> + * Set arg0=-1 to indicate that we do not know the value.
> + */
> + dt_cg_xsetx(dlp, NULL, DT_LBL_NONE, BPF_REG_0, -1);
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> + } else
> + dt_cg_tramp_copy_args_from_regs(pcb, 1);
> + dt_cg_tramp_epilogue(pcb);
> +
> + return 0;
> +}
> +
> +static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
> +{
> + if (!dt_tp_probe_has_info(prp)) {
> + char *fn, *prb, *p;
> + FILE *f;
> + size_t len;
> + int fd, rc = -1;
> +
> + /*
> + * The tracepoint event we will be creating needs to have a
> + * valid name. We use a copy of the probe name, with . -> _
> + * conversion.
> + */
> + prb = strdup(prp->desc->fun);
strdup()... memory leak?
> + for (p = prb; *p; p++) {
> + if (*p == '.')
> + *p = '_';
> + }
> +
> + /*
> + * Register the kprobe with the tracing subsystem. This will
> + * create a tracepoint event.
> + */
> + fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> + if (fd == -1)
> + return -ENOENT;
> +
> + rc = dprintf(fd, "%c:" FBT_GROUP_FMT "/%s %s\n",
> + prp->desc->prb[0] == 'e' ? 'p' : 'r',
> + FBT_GROUP_DATA, prb, prp->desc->fun);
> + close(fd);
> + if (rc == -1)
> + return -ENOENT;
> +
> + /* create format file name */
> + len = snprintf(NULL, 0, "%s" FBT_GROUP_FMT "/%s/format",
> + EVENTSFS, FBT_GROUP_DATA, prb) + 1;
> + fn = dt_alloc(dtp, len);
Use asprintf() instead?
> + if (fn == NULL)
> + return -ENOENT;
> +
> + snprintf(fn, len, "%s" FBT_GROUP_FMT "/%s/format", EVENTSFS,
> + FBT_GROUP_DATA, prb);
> +
> + /* open format file */
> + f = fopen(fn, "r");
> + dt_free(dtp, fn);
> + if (f == NULL)
> + return -ENOENT;
> +
> + /* read event id from format file */
> + rc = dt_tp_probe_info(dtp, f, 0, prp, NULL, NULL);
> + fclose(f);
> +
> + if (rc < 0)
> + return -ENOENT;
> + }
> +
> + /* attach BPF program to the probe */
> + return dt_tp_probe_attach(dtp, prp, bpf_fd);
> +}
> +
> +static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp, int *argcp,
> + dt_argdesc_t **argvp)
> +{
> + *argcp = 0; /* no arguments by default */
> + *argvp = NULL;
> +
> + return 0;
> +}
With d44036087 ("probe: probes in providers without probe_info() have no
args"), such a probe_info() is no longer necessary.
> +
> +/*
> + * Try to clean up system resources that may have been allocated for this
> + * probe.
> + *
> + * If there is an event FD, we close it.
> + *
> + * We also try to remove any kprobe that may have been created for the probe.
kprobe, yes. But that fixes the "uprobe" typo that was in
kprobe_detach(). So, how about fixing that typo in dt_prov_fbt.c in
this patch while we're at it.
> + * This is harmless for probes that didn't get created. If the removal fails
> + * for some reason we are out of luck - fortunately it is not harmful to the
> + * system as a whole.
> + */
> +static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
> +{
> + int fd;
> +
> + if (!dt_tp_probe_has_info(prp))
> + return;
> +
> + dt_tp_probe_detach(dtp, prp);
> +
> + fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> + if (fd == -1)
> + return;
> +
> + dprintf(fd, "-:" FBT_GROUP_FMT "/%s\n", FBT_GROUP_DATA,
> + prp->desc->fun);
> + close(fd);
> +}
> +
> +dt_provimpl_t dt_rawfbt = {
> + .name = "rawfbt",
Use prvname instead of "rawfbt"?
> + .prog_type = BPF_PROG_TYPE_KPROBE,
> + .populate = &populate,
> + .load_prog = &dt_bpf_prog_load,
> + .trampoline = &trampoline,
> + .attach = &attach,
> + .probe_info = &probe_info,
> + .detach = &detach,
> + .probe_destroy = &dt_tp_probe_destroy,
> +};
> diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c
> index 1e2e844e..0c621197 100644
> --- a/libdtrace/dt_provider.c
> +++ b/libdtrace/dt_provider.c
> @@ -36,6 +36,7 @@ const dt_provimpl_t *dt_providers[] = {
> &dt_lockstat,
> &dt_proc,
> &dt_profile,
> + &dt_rawfbt,
> &dt_rawtp,
> &dt_sched,
> &dt_sdt,
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index f62137de..59a8d62e 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -82,6 +82,7 @@ extern dt_provimpl_t dt_ip;
> extern dt_provimpl_t dt_lockstat;
> extern dt_provimpl_t dt_proc;
> extern dt_provimpl_t dt_profile;
> +extern dt_provimpl_t dt_rawfbt;
> extern dt_provimpl_t dt_rawtp;
> extern dt_provimpl_t dt_sched;
> extern dt_provimpl_t dt_sdt;
> diff --git a/test/utils/clean_probes.sh b/test/utils/clean_probes.sh
> index 8292b309..16052cbc 100755
> --- a/test/utils/clean_probes.sh
> +++ b/test/utils/clean_probes.sh
> @@ -70,7 +70,7 @@ fi
> { sub(/:/, "/"); }
>
> {
> - if (/_fbt_/)
> + if (/_(raw)?fbt_/)
> kpv[kpc++] = $0;
> else
> upv[upc++] = $0;
More information about the DTrace-devel
mailing list