[DTrace-devel] [RFC patch 1/4] stapsdt provider: support systemwide probing
Alan Maguire
alan.maguire at oracle.com
Thu Dec 18 17:23:51 UTC 2025
For stapsdt probes we can do systemwide probing by having
the kernel insert traps into the VMAs associated with a file
from the point that instrumentation is enabled. This means that
for any _new_ process probes will fire. Capturing existing
processes means iterating over /proc maps etc. However having
this support is still valuable, and as long as it is documented
clearly seems worthwhile even in such a form.
The key problem for DTrace is how to specify a file path in
a module in a provider:module:function:probe specification.
Here the approach (also used by libbpf) is to expand potential
paths using [LD_LIBRARY_]PATH from a binary/library name; so
specifying
myprov*:myprog::myprobe
causes us to search /usr/bin, /usr/sbin and PATH directories
to find myprog to instrument it. If the module contains .so
we check /usr/lib, /usr/lib64 and LD_LIBRARY_PATH.
The other part that was needed was fixups to offsets that are
optionally provided in .stapsdt.base and the addrs[1] value.
Signed-off-by: Alan Maguire <alan.maguire at oracle.com>
---
libdtrace/dt_pid.c | 158 ++++++++++++++++++++++++++++---------
libdtrace/dt_prov_uprobe.c | 12 ++-
2 files changed, 130 insertions(+), 40 deletions(-)
diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
index 36a5883b..3448397c 100644
--- a/libdtrace/dt_pid.c
+++ b/libdtrace/dt_pid.c
@@ -7,7 +7,9 @@
#include <sys/ioctl.h>
#include <sys/types.h>
+#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <unistd.h>
#include <stddef.h>
#include <assert.h>
#include <ctype.h>
@@ -34,11 +36,13 @@
#include <dt_impl.h>
#include <dt_program.h>
+#include <dt_probe.h>
#include <dt_provider.h>
#include <dt_pid.h>
#include <dt_string.h>
#define SEC_STAPSDT_NOTE ".note.stapsdt"
+#define SEC_STAPSDT_BASE ".stapsdt.base"
#define NAME_STAPSDT_NOTE "stapsdt"
/*
@@ -1267,9 +1271,10 @@ dt_stapsdt_parse(dtrace_hdl_t *dtp, dt_proc_t *dpr, dtrace_probedesc_t *pdp,
unsigned long addr_start)
{
size_t shstrndx, noff, doff, off, n;
+ Elf_Scn *scn = NULL, *nscn = NULL;
const prmap_t *pmp = NULL;
+ unsigned long base = 0;
char *mapfile = NULL;
- Elf_Scn *scn = NULL;
Elf *elf = NULL;
GElf_Shdr shdr;
GElf_Ehdr ehdr;
@@ -1323,12 +1328,14 @@ dt_stapsdt_parse(dtrace_hdl_t *dtp, dt_proc_t *dpr, dtrace_probedesc_t *pdp,
secname = elf_strptr(elf, shstrndx, shdr.sh_name);
if (strcmp(secname, SEC_STAPSDT_NOTE) == 0 &&
shdr.sh_type == SHT_NOTE)
- break;
+ nscn = scn;
+ if (strcmp(secname, SEC_STAPSDT_BASE) == 0)
+ base = shdr.sh_addr;
}
/* No ELF notes, just bail. */
- if (scn == NULL)
+ if (nscn == NULL)
goto out;
- data = elf_getdata(scn, 0);
+ data = elf_getdata(nscn, 0);
for (off = 0;
(off = gelf_getnote(data, off, &nhdr, &noff, &doff)) > 0;) {
char prvname[DTRACE_PROVNAMELEN];
@@ -1385,38 +1392,59 @@ dt_stapsdt_parse(dtrace_hdl_t *dtp, dt_proc_t *dpr, dtrace_probedesc_t *pdp,
psp.pps_refcntr_off = addrs[2] - phdr.p_vaddr + phdr.p_offset;
}
+ /* readjust based on optional .stapsdt.base, note base addr. */
+ if (base && addrs[1])
+ psp.pps_off += base - addrs[1];
+
if (!psp.pps_off)
continue;
psp.pps_nameoff = 0;
- if (!pmp)
- pmp = Paddr_to_map(dpr->dpr_proc, addr_start + addrs[0]);
- if (!pmp) {
- dt_dprintf("%i: cannot determine 0x%lx's mapping\n",
- Pgetpid(dpr->dpr_proc), psp.pps_off);
- continue;
- }
- if (!mapfile)
- mapfile = Pmap_mapfile_name(dpr->dpr_proc, pmp);
+ if (dpr) {
+ if (!pmp)
+ pmp = Paddr_to_map(dpr->dpr_proc, addr_start + addrs[0]);
+ if (!pmp) {
+ dt_dprintf("%i: cannot determine 0x%lx's mapping\n",
+ Pgetpid(dpr->dpr_proc), psp.pps_off);
+ continue;
+ }
+ if (!mapfile)
+ mapfile = Pmap_mapfile_name(dpr->dpr_proc, pmp);
- if (!mapfile) {
- dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
- "Cannot get name of mapping containing probe %s for pid %d\n",
- psp.pps_prb, dpr->dpr_pid);
- err = -1;
- break;
- }
- psp.pps_fn = mapfile;
- if (dt_Plookup_by_addr(dtp, dpr->dpr_pid, addr_start + addrs[0],
- &fun, &sym) == 0)
- psp.pps_fun = (char *)fun;
- else
- psp.pps_fun = no_fun;
- psp.pps_dev = pmp->pr_dev;
- psp.pps_inum = pmp->pr_inum;
- psp.pps_pid = dpr->dpr_pid;
- psp.pps_nameoff = 0;
+ if (!mapfile) {
+ dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
+ "Cannot get name of mapping containing probe %s for pid %d\n",
+ psp.pps_prb, dpr->dpr_pid);
+ err = -1;
+ break;
+ }
+ psp.pps_fn = mapfile;
+ if (dt_Plookup_by_addr(dtp, dpr->dpr_pid, addr_start + addrs[0],
+ &fun, &sym) == 0)
+ psp.pps_fun = (char *)fun;
+ else
+ psp.pps_fun = no_fun;
+ psp.pps_dev = pmp->pr_dev;
+ psp.pps_inum = pmp->pr_inum;
+ psp.pps_pid = dpr->dpr_pid;
+ psp.pps_nameoff = 0;
+ } else {
+ struct stat stats = {};
+ if (stat(path, &stats)) {
+ dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
+ "failed to stat() %s", path);
+ dtrace_errmsg(dtp, dtrace_errno(dtp));
+ err = -1;
+ break;
+ }
+ psp.pps_mod = mod;
+ psp.pps_dev = stats.st_dev;
+ psp.pps_inum = stats.st_ino;
+ psp.pps_fn = path;
+ psp.pps_fun = no_fun;
+ psp.pps_pid = -1;
+ }
if (pvp->impl->provide_probe(dtp, &psp) < 0) {
dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
"failed to instantiate probe %s for pid %d: %s",
@@ -1506,6 +1534,48 @@ dt_pid_create_stapsdt_probes_proc(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp,
fclose(fp);
}
+static int expand_modpath(const char *mod, char *path, size_t pathsz)
+{
+ const char *searches[2] = {};
+ int perm, i;
+
+ if (strstr(mod, ".so")) {
+ searches[0] = getenv("LD_LIBRARY_PATH");
+ searches[1] = "/usr/lib64:/usr/lib";
+ perm = R_OK;
+ } else {
+ searches[0] = getenv("PATH");
+ searches[1] = "/usr/bin/:/usr/sbin";
+ perm = R_OK | X_OK;
+ }
+
+ for (i = 0; i < sizeof(searches)/sizeof(const char *); i++) {
+ const char *s, *n;
+
+ if (!searches[i])
+ continue;
+
+ for (s = searches[i]; s != NULL; s = n) {
+ int len;
+
+ if (*s == ':')
+ s++;
+ n = strchr(s, ':');
+ if (n)
+ len = n - s;
+ else
+ len = strlen(s);
+ snprintf(path, pathsz, "%.*s/%s", len, s, mod);
+ /* make sure accessible */
+ if (faccessat(AT_FDCWD, path, perm, AT_EACCESS) < 0)
+ continue;
+ dt_dprintf("%s: found full path '%s'\n", mod, path);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
static int
dt_pid_create_stapsdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb)
{
@@ -1522,14 +1592,25 @@ dt_pid_create_stapsdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_
pidstr = &pdp->prv[len];
- while (isdigit(*(pidstr - 1)))
- pidstr--;
- if (strlen(pidstr) == 0)
- return 0;
-
pvp = dt_provider_lookup(dtp, "stapsdt");
assert(pvp != NULL);
+ while (isdigit(*(pidstr - 1)))
+ pidstr--;
+ if (strlen(pidstr) == 0) {
+ char m[PATH_MAX];
+
+ /* only full pid wildcards are supported. */
+ if (*(pidstr - 1) != '*')
+ return 0;
+ if (isdigit(*(pidstr - 2)))
+ return 0;
+ if (dt_probe_lookup(dtp, pdp) != NULL)
+ return 0;
+ if (expand_modpath(pdp->mod, m, sizeof(m)))
+ return 0;
+ return dt_stapsdt_parse(dtp, NULL, pdp, pcb, pvp, m, 0);
+ }
pid = atoll(pidstr);
if (pid <= 0)
return 0;
@@ -1612,8 +1693,13 @@ dt_pid_create_usdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *
free(globpat);
globfree(&globbuf);
- if (err == 0)
+ if (err == 0) {
err = dt_pid_create_stapsdt_probes(pdp, dtp, pcb);
+ if (err != 0) {
+ dt_dprintf("stapsdt probe creation %s:%s:%s:%s failed: %d\n",
+ pdp->prv, pdp->mod, pdp->fun, pdp->prb, err);
+ }
+ }
/* If no errors, report success. */
if (err == 0)
diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
index e94827f2..631c3557 100644
--- a/libdtrace/dt_prov_uprobe.c
+++ b/libdtrace/dt_prov_uprobe.c
@@ -507,7 +507,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
list_probe_t *pup = prp->prv_data;
dt_uprobe_t *upp = pup->probe->prv_data;
- if (Pexists(upp->pid))
+ if (upp->pid == -1 || Pexists(upp->pid))
continue;
}
@@ -629,7 +629,7 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
assert(0); // FIXME do something here
/* Even though we just enabled this, check it's still live. */
- if (!Pexists(pid)) {
+ if (pid != -1 && !Pexists(pid)) {
probe_disable(dtp, prp);
dt_bpf_map_delete(fd, &pdp->id);
@@ -944,6 +944,7 @@ static int provide_probe(dtrace_hdl_t *dtp, const pid_probespec_t *psp,
return -1;
upp = uprp->prv_data;
+ upp->pid = psp->pps_pid;
upp->flags |= flags;
/* Look up the overlying probe. */
@@ -1552,7 +1553,7 @@ static int uprobe_create(dtrace_hdl_t *dtp, const dt_uprobe_t *upp,
attr.uprobe_path = (uint64_t)upp->fn;
attr.probe_offset = upp->off;
- return dt_perf_event_open(&attr, upp->pid, -1, -1, 0);
+ return dt_perf_event_open(&attr, upp->pid, upp->pid == -1 ? 0 : -1, -1, 0);
}
static int attach(dtrace_hdl_t *dtp, const dt_probe_t *uprp, int bpf_fd)
@@ -1563,7 +1564,10 @@ static int attach(dtrace_hdl_t *dtp, const dt_probe_t *uprp, int bpf_fd)
assert(upp->fn != NULL);
upp->fd = uprobe_create(dtp, upp, upp->refcntr_off);
-
+ if (upp->fd < 0) {
+ dt_dprintf("uprobe_create failed: %d\n", upp->fd);
+ return upp->fd;
+ }
/* attach BPF program to the probe */
if (ioctl(upp->fd, PERF_EVENT_IOC_SET_BPF, bpf_fd) < 0)
return -errno;
--
2.43.5
More information about the DTrace-devel
mailing list