[DTrace-devel] [PATCH v2 16/20] usdt: DTrace userspace side

Nick Alcock nick.alcock at oracle.com
Wed Sep 7 13:00:03 UTC 2022


This is implemented almost entirely in the pid provider, which is
reassuringly similar to how it was done in the in-kernel days. They're
really very closely-related beasts, and the same code can handle both
easily enough.

This does several things that are sufficiently intertwined that putting
them in one commit seems most readable:

 - implements USDT probe discovery, ripping out a lot of old ioctl stuff
   and obsolete code handling stuff like structure-copying thunks in the
   Solaris C library and a bunch of obsolete functions around DOF
   acquisition (keeping one which might well be revived in the next
   phase), and adding dt_pid_create_usdt_probes, which scans the
   systemwide uprobe list and creates DTrace-side underlying probes for
   all of them that are relevant (see below).  Right now it can only
   create probes for specific named processes, but in future it'll grow
   the ability to make probes for everything dtprobed has spotted probes
   for.  Because it is driven by the systemwide uprobe list, it can
   create probes for processes that started before DTrace did, just like
   the old in-kernel model.  There's some rather hairy textual parsing
   in here, but I think it's just simple enough that we don't need to
   stick it in a seccomp jail (but if we do, we can do that easily
   enough now!)

 - rejigs dt_prov_pid to use the new uprobe_create mechanism to make pid
   probes, with names consistent with those dtprobed creates for USDT
   probes (but without the USDT probe name addendum portion, since there
   is none); the uprobes have names like dt_pid_$dev_$ino_$addr (USDT
   uprobes look like dt_pid_$dev_$ino_$addr_$encoded-probe-name).

 - adjusts provide_pid so it can be called at USDT uprobe discovery time
   to create underlying probes that user-requested USDT probes will then
   utilize.  The changes here look rather extreme but are actually quite
   small: the underlying probe stuff is split into a new function,
   provide_pid_underlying(), and provide_pid can be asked to do nothingg
   but call this; the probe name follows the new dtprobed rules, looking
   like pid:$inum::$offset, and thus is identical for probes created for
   USDT and for pid at the same offset.

   The struct pid_probe attached to the underlying probe gains a device
   number (which it should always have had) and keeps track of the
   underlying uprobe name from create_uprobe or USDT probe discovery
   and remembers whether or not DTrace created it (if dtprobed created
   it, dtrace must not delete it).

   provide_pid itself is adjusted to call provide_pid_underlying as
   needed, but also to do more work if the probe already exists: USDT
   probes can be associated with more than one underlying probe (if the
   probe appears repeatedly in a program), so if it is repeatedly
   provide_pid'ed with different offsets, we spot this and chain it into
   all the necessary underlying probes.  (This means a bit of flailing
   about, since we suddenly need a new tiny structure whose sole purpose
   is to be in this list and point at the real probe.)

 - enabling gets a little more complex.  We can no longer get away with
   just enabling the underlying probe, because things not in the
   enablings list don't get entries in the probename strtab, and their
   probename is liable to be empty.  So we intern both the overlying
   *and* the underlying probe, and arrange for the overlying probe to
   have no trampoline: in conjunction with a prior commit in this series
   this causes only the underlying probe to have any BPF code generated
   for it.

 - Trampoline generation has to adapt to this, but also has to use a
   less kludgy way of figuring out the pids the trampoline applies to:
   rather than parsing the name apart on the spot, we ask dt_pid, which
   already has code to *properly* parse apart both pid and usdt names
   and extract the pid from them.

Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
---
 libdtrace/dt_pid.c      | 312 ++++++++++++++++++++++++++++--------
 libdtrace/dt_pid.h      |   9 +-
 libdtrace/dt_prov_pid.c | 343 ++++++++++++++++++++++++++--------------
 3 files changed, 480 insertions(+), 184 deletions(-)

diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
index 4b143966ff3c..1ce2559aab7a 100644
--- a/libdtrace/dt_pid.c
+++ b/libdtrace/dt_pid.c
@@ -15,9 +15,11 @@
 #include <libgen.h>
 #include <stddef.h>
 #include <sys/ioctl.h>
+#include <sys/sysmacros.h>
 
 #include <mutex.h>
 #include <port.h>
+#include <uprobes.h>
 
 #include <dt_impl.h>
 #include <dt_program.h>
@@ -34,7 +36,8 @@ typedef struct dt_pid_probe {
 	const char *dpp_func;
 	const char *dpp_name;
 	const char *dpp_obj;
-	ino_t dpp_ino;
+	dev_t dpp_dev;
+	ino_t dpp_inum;
 	char *dpp_fname;
 	uintptr_t dpp_pc;
 	uintptr_t dpp_vaddr;
@@ -98,9 +101,9 @@ dt_pid_create_fbt_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
 
 	psp->pps_type = type;
 	psp->pps_pc = (uintptr_t)symp->st_value;
+	psp->pps_addr = psp->pps_pc - psp->pps_vaddr;
 	psp->pps_size = (size_t)symp->st_size;
-	psp->pps_glen = 0;		/* no glob pattern */
-	psp->pps_gstr[0] = '\0';
+	psp->pps_gstr[0] = '\0';		/* no glob pattern */
 
 	/* Make sure we have a PID provider. */
 	pvp = dtp->dt_prov_pid;
@@ -125,10 +128,10 @@ dt_pid_create_glob_offset_probes(struct ps_prochandle *P, dtrace_hdl_t *dtp,
 {
 	psp->pps_type = DTPPT_OFFSETS;
 	psp->pps_pc = (uintptr_t)symp->st_value;
+	psp->pps_addr = psp->pps_pc - psp->pps_vaddr;
 	psp->pps_size = (size_t)symp->st_size;
-	psp->pps_glen = strlen(pattern);
 
-	strncpy(psp->pps_gstr, pattern, psp->pps_glen + 1);
+	strcpy(psp->pps_gstr, pattern);
 
 	/* Create a probe using 'psp'. */
 
@@ -168,7 +171,8 @@ dt_pid_per_sym(dt_pid_probe_t *pp, const GElf_Sym *symp, const char *func)
 
 	psp->pps_pid = pid;
 	psp->pps_mod = dt_pid_objname(pp->dpp_lmid, pp->dpp_obj);
-	psp->pps_ino = pp->dpp_ino;
+	psp->pps_dev = pp->dpp_dev;
+	psp->pps_inum = pp->dpp_inum;
 	psp->pps_fn = strdup(pp->dpp_fname);
 	psp->pps_vaddr = pp->dpp_vaddr;
 	strcpy_safe(psp->pps_fun, sizeof(psp->pps_fun), func);
@@ -306,7 +310,8 @@ dt_pid_per_mod(void *arg, const prmap_t *pmp, const char *obj)
 
 	dt_Plmid(pp->dpp_dtp, pid, pmp->pr_vaddr, &pp->dpp_lmid);
 
-	pp->dpp_ino = pmp->pr_inum;
+	pp->dpp_dev = pmp->pr_dev;
+	pp->dpp_inum = pmp->pr_inum;
 	pp->dpp_vaddr = pmp->pr_file->first_segment->pr_vaddr;
 
 	/*
@@ -459,7 +464,8 @@ dt_pid_fix_mod(dt_pid_probe_t *pp, dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp,
 		return NULL;
 
 	dt_Pobjname(dtp, pid, pmp->pr_vaddr, m, sizeof(m));
-	pp->dpp_fname = strdup(m);
+	if (pp)
+		pp->dpp_fname = strdup(m);
 	if ((obj = strrchr(m, '/')) == NULL)
 		obj = &m[0];
 	else
@@ -484,7 +490,8 @@ dt_pid_create_pid_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp,
 	pp.dpp_pr = dpr->dpr_proc;
 	pp.dpp_pcb = pcb;
 	pp.dpp_nmatches = 0;
-	pp.dpp_ino = 0;
+	pp.dpp_dev = makedev(0, 0);
+	pp.dpp_inum = 0;
 
 	/*
 	 * Prohibit self-grabs.  (This is banned anyway by libproc, but this way
@@ -570,7 +577,227 @@ dt_pid_create_pid_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp,
 	return ret;
 }
 
-#if 0
+/*
+ * Rescan the PID uprobe list and create suitable underlying probes.
+ *
+ * If dpr is set, just set up probes relating to mappings found in that one
+ * process.  (dpr must in this case be locked.)
+ *
+ * If pdp is set, create overlying USDT probes for the specified probe
+ * description.
+ *
+ * Return 0 on success or -1 on error.  (Failure to create specific underlying
+ * probes is not an error.)
+ */
+static int
+dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
+			  dtrace_probedesc_t *pdp, dt_pcb_t *pcb)
+{
+	const dt_provider_t *pvp;
+	FILE *f;
+	char *buf = NULL, *p, *next_p;
+	size_t sz;
+	int ret = 0;
+
+	pvp = dtp->dt_prov_pid;
+	if (!pvp) {
+		pvp = dt_provider_lookup(dtp, "pid");
+		assert(pvp != NULL);
+		dtp->dt_prov_pid = pvp;
+	}
+
+	assert(pvp->impl != NULL && pvp->impl->provide_pid != NULL);
+
+	f = fopen(TRACEFS "uprobe_events", "r");
+	if (!f) {
+		dt_dprintf("cannot open " TRACEFS "uprobe_events: %s\n",
+		    strerror(errno));
+		return -1;
+	}
+
+	/*
+	 * Systemwide probing: not yet implemented.
+	 */
+	assert(dpr != NULL);
+
+	dt_dprintf("Scanning for usdt probes matching %i\n", dpr->dpr_pid);
+
+	/*
+	 * We are only interested in pid uprobes, not any other uprobes that may
+	 * exist.  Some of these may be for pid probes, some for usdt: we create
+	 * underlying probes for all of them, except that we may only be
+	 * interested in probes belonging to mappings in a particular process,
+	 * in which case we create probes for that process only.
+	 */
+	while (getline(&buf, &sz, f) >= 0) {
+		dev_t dev;
+		ino_t inum;
+		uint64_t offset;
+		char *dev_str, *inum_str;
+		char *start_prb = NULL;
+		pid_probespec_t psp;
+		char *uprobe_name;
+		char *decoded_name = NULL;
+
+#define UPROBE_PREFIX "p:uprobes/dt_pid_"
+		if (strncmp(buf, UPROBE_PREFIX, strlen(UPROBE_PREFIX)) != 0)
+			continue;
+
+		p = strchr(buf, ' ');
+		if (!p)
+			continue;
+
+		*p = 0;
+		uprobe_name = buf + 2;		/* shave off p: */
+		decoded_name = uprobe_decode_name(buf + strlen(UPROBE_PREFIX));
+		if (!decoded_name)
+			goto oom;
+#undef UPROBE_PREFIX
+		p = decoded_name;
+		next_p = strchr(p, '_');
+		if (!next_p)
+			goto next;
+		*next_p = 0;
+		dev_str = p;
+
+		p = ++next_p;
+		next_p = strchr(p, '_');
+		if (!next_p)
+			goto next;
+		*next_p = 0;
+		inum_str = p;
+
+		/*
+		 * Maybe there is a probe name component after this _; if so,
+		 * extract it and use it as the underlying probe's name.
+		 */
+		p = ++next_p;
+		start_prb = strchr(p, '_');
+
+		errno = 0;
+		dev = strtoul(dev_str, NULL, 16);
+		inum = strtoul(inum_str, NULL, 16);
+		offset = strtoul(p, NULL, 16);
+		if (errno != 0) {
+			dt_dprintf("dev/ino/offset invalid: %lx, %lx, %s",
+				   dev, inum, p);
+			goto next;
+		}
+
+		/*
+		 * Make the underlying probe, if not already present.
+		 */
+		memset(&psp, 0, sizeof(pid_probespec_t));
+
+		psp.pps_type = DTPPT_UNDERLYING;
+		psp.pps_mod = inum_str;
+		psp.pps_fun[0] = '\0';
+		psp.pps_prb = p;
+		psp.pps_dev = dev;
+		psp.pps_inum = inum;
+		psp.pps_addr = offset;
+		psp.pps_uprobe_name = uprobe_name;
+		psp.pps_usdt_mod = "";
+		psp.pps_usdt_prb = "";
+
+		/*
+		 * The filename is only needed when creating a spec for uprobes:
+		 * but in this case the uprobe already exists.
+		 */
+		psp.pps_fn = "";
+
+		/*
+		 * Filter out probes not related to the process of interest.
+		 */
+		if (dpr && dpr->dpr_proc) {
+			assert(MUTEX_HELD(&dpr->dpr_lock));
+			if (Pinode_to_map(dpr->dpr_proc, dev, inum) == NULL) {
+				dt_dprintf("inode -> map for %i %lx:%lx failed\n", dpr->dpr_pid, dev, inum);
+				goto next;
+			}
+		}
+
+		/*
+		 * If we have a full probe name, parse it out and use it for the
+		 * underlying probe name (and the module name of the overlying
+		 * probe).  start_prb at this point points to the underscore
+		 * before the probe name.
+		 */
+		if (start_prb) {
+			start_prb++;
+			next_p = strchr(start_prb, ':');
+			if (!next_p)
+				goto next;
+			*next_p = 0;
+			psp.pps_usdt_mod = start_prb;
+			p = ++next_p;
+
+			next_p = strchr(p, ':');
+			if (!next_p)
+				goto next;
+			*next_p = 0;
+			strcpy_safe(psp.pps_fun, sizeof(psp.pps_fun), p);
+			p = ++next_p;
+			psp.pps_usdt_prb = p;
+		}
+
+		/*
+		 * Does this match the overlying probe we are meant to be
+		 * creating?
+		 *
+		 * TODO: wildcard probes get handled here.
+		 */
+		if (pdp) {
+			if ((pdp->mod[0] == 0 || strcmp(pdp->mod, psp.pps_usdt_mod) == 0) &&
+			    (pdp->fun[0] == 0 || strcmp(pdp->fun, psp.pps_fun) == 0) &&
+			    (pdp->prb[0] == 0 || strcmp(pdp->prb, psp.pps_usdt_prb) == 0)) {
+				psp.pps_type = DTPPT_USDT;
+				psp.pps_prv = pdp->prv;
+				psp.pps_mod = psp.pps_usdt_mod;
+				psp.pps_prb = psp.pps_usdt_prb;
+			}
+			else
+				goto next;
+		}
+
+		/*
+		 * Create a probe using psp, if not already present.
+		 *
+		 * If we are creating an overlying probe, complain about it if
+		 * probe creation fails.  Otherwise, this is just an underlying
+		 * probe: we'll complain later if we use it for anything.
+		 */
+
+		if (pvp->impl->provide_pid(dtp, &psp) < 0 && pdp) {
+			dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
+				     "failed to instantiate probe %s for pid %d: %s",
+				     pdp->prb, dpr->dpr_pid, strerror(errno));
+			ret = -1;
+		}
+
+		if (pdp && dpr) {
+			/*
+			 * Put the module name in its canonical form.
+			 */
+			dt_pid_fix_mod(NULL, pdp, dtp, dpr->dpr_pid);
+		}
+
+	next:
+		free(decoded_name);
+		continue;
+	oom:
+		dt_dprintf("out of memory allocating probe name");
+		free(decoded_name);
+		free(buf);
+		return -1;
+
+	}
+	free(buf);
+
+	return ret;
+}
+
+#if 0 /* Almost certainly unnecessary in this form */
 static int
 dt_pid_usdt_mapping(void *data, const prmap_t *pmp, const char *oname)
 {
@@ -585,10 +812,8 @@ dt_pid_usdt_mapping(void *data, const prmap_t *pmp, const char *oname)
 	int fd = -1;
 
 	/*
-	 * The symbol ___SUNW_dof is for lazy-loaded DOF sections, and
-	 * __SUNW_dof is for actively-loaded DOF sections. We try to force
-	 * in both types of DOF section since the process may not yet have
-	 * run the code to instantiate these providers.
+	 * We try to force-load the DOF since the process may not yet have run
+	 * the code to instantiate these providers.
 	 */
 	for (i = 0; i < 2; i++) {
 		if (dt_Pxlookup_by_name(dpr->dpr_hdl, dpr->dpr_pid, PR_LMID_EVERY,
@@ -628,32 +853,9 @@ dt_pid_usdt_mapping(void *data, const prmap_t *pmp, const char *oname)
 
 	return 0;
 }
-
-static int
-dt_pid_create_usdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp,
-    dt_pcb_t *pcb, dt_proc_t *dpr)
-{
-	int ret = 0;
-
-	assert(MUTEX_HELD(&dpr->dpr_lock));
-
-	if (dt_Pobject_iter(dtp, dpr->dpr_pid, dt_pid_usdt_mapping, dpr) != 0) {
-		ret = -1;
-		dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
-		    "failed to instantiate probes for pid %d: %s",
-		    dpr->dpr_pid, strerror(errno));
-	}
-
-	/*
-	 * Put the module name in its canonical form.
-	 */
-	dt_pid_fix_mod(pdp, dtp, dpr->dpr_pid);
-
-	return ret;
-}
 #endif
 
-static pid_t
+pid_t
 dt_pid_get_pid(const dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb,
 	       dt_proc_t *dpr)
 {
@@ -700,8 +902,7 @@ dt_pid_create_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb)
 	snprintf(provname, sizeof(provname), PID_PRVNAME, (int)pid);
 
 	if (gmatch(provname, pdp->prv) != 0) {
-		pid = dt_proc_grab_lock(dtp, pid, DTRACE_PROC_WAITING);
-		if (pid < 0) {
+		if (dt_proc_grab_lock(dtp, pid, DTRACE_PROC_WAITING) < 0) {
 			dt_pid_error(dtp, pcb, NULL, D_PROC_GRAB,
 			    "failed to grab process %d", (int)pid);
 			return -1;
@@ -711,14 +912,6 @@ dt_pid_create_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb)
 		assert(dpr != NULL);
 
 		err = dt_pid_create_pid_probes(pdp, dtp, pcb, dpr);
-		if (err == 0) {
-			/*
-			 * Alert other retained enablings which may match
-			 * against the newly created probes.
-			 */
-			dt_ioctl(dtp, DTRACEIOC_ENABLE, NULL);
-		}
-
 		dt_proc_release_unlock(dtp, pid);
 	}
 
@@ -726,8 +919,7 @@ dt_pid_create_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb)
 	 * If it's not strictly a pid provider, we might match a USDT provider.
 	 */
 	if (strcmp(provname, pdp->prv) != 0) {
-		pid = dt_proc_grab_lock(dtp, pid, DTRACE_PROC_WAITING);
-		if (pid < 0) {
+		if (dt_proc_grab_lock(dtp, pid, DTRACE_PROC_WAITING) < 0) {
 			dt_pid_error(dtp, pcb, NULL, D_PROC_GRAB,
 			    "failed to grab process %d", (int)pid);
 			return -1;
@@ -736,16 +928,16 @@ dt_pid_create_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb)
 		dpr = dt_proc_lookup(dtp, pid);
 		assert(dpr != NULL);
 
-#ifdef FIXME
 		if (!dpr->dpr_usdt) {
-			err = dt_pid_create_usdt_probes(pdp, dtp, pcb, dpr);
+			err = dt_pid_create_usdt_probes(dtp, dpr, pdp, pcb);
 			dpr->dpr_usdt = B_TRUE;
 		}
-#endif
 
 		dt_proc_release_unlock(dtp, pid);
 	}
 
+	/* (USDT systemwide probing goes here.)  */
+
 	return err ? -1 : 0;
 }
 
@@ -756,7 +948,7 @@ dt_pid_create_probes_module(dtrace_hdl_t *dtp, dt_proc_t *dpr)
 	dt_stmt_t *stp;
 	dtrace_probedesc_t *pdp;
 	pid_t pid;
-	int ret = 0, found = B_FALSE;
+	int ret = 0;
 	char provname[DTRACE_PROVNAMELEN];
 
 	snprintf(provname, sizeof(provname), "pid%d", (int)dpr->dpr_pid);
@@ -772,8 +964,6 @@ dt_pid_create_probes_module(dtrace_hdl_t *dtp, dt_proc_t *dpr)
 			if (pid != dpr->dpr_pid)
 				continue;
 
-			found = B_TRUE;
-
 			pd = *pdp;
 			pd.fun = strdup(pd.fun);	/* we may change it */
 
@@ -781,26 +971,22 @@ dt_pid_create_probes_module(dtrace_hdl_t *dtp, dt_proc_t *dpr)
 			    dt_pid_create_pid_probes(&pd, dtp, NULL, dpr) != 0)
 				ret = 1;
 
-#ifdef FIXME
 			/*
 			 * If it's not strictly a pid provider, we might match
 			 * a USDT provider.
 			 */
 			if (strcmp(provname, pdp->prv) != 0 &&
-			    dt_pid_create_usdt_probes(&pd, dtp, NULL, dpr) != 0)
+			    dt_pid_create_usdt_probes(dtp, dpr, pdp, NULL) < 0)
 				ret = 1;
-#endif
 
 			free((char *)pd.fun);
 		}
 	}
 
 	/*
-	 * Give DTrace a shot to the ribs to get it to check
-	 * out the newly created probes.
+	 * XXX systemwide: rescan for new probes here?  We have to do it
+	 * at some point, but when?
 	 */
-	if (found)
-		dt_ioctl(dtp, DTRACEIOC_ENABLE, NULL);
 
 	return ret;
 }
diff --git a/libdtrace/dt_pid.h b/libdtrace/dt_pid.h
index b5ba9598a425..d82e85c1aabc 100644
--- a/libdtrace/dt_pid.h
+++ b/libdtrace/dt_pid.h
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -16,14 +16,13 @@
 extern "C" {
 #endif
 
-#define	DT_PROC_ERR	(-1)
-#define	DT_PROC_ALIGN	(-2)
-
 #define PID_PRVNAME	"pid%d"
 
 extern int dt_pid_create_probes(dtrace_probedesc_t *, dtrace_hdl_t *,
-				dt_pcb_t *pcb);
+				dt_pcb_t *);
 extern int dt_pid_create_probes_module(dtrace_hdl_t *, dt_proc_t *);
+extern pid_t dt_pid_get_pid(const dtrace_probedesc_t *, dtrace_hdl_t *, dt_pcb_t *,
+			    dt_proc_t *);
 
 #ifdef	__cplusplus
 }
diff --git a/libdtrace/dt_prov_pid.c b/libdtrace/dt_prov_pid.c
index f747f49741e3..a986e91490f6 100644
--- a/libdtrace/dt_prov_pid.c
+++ b/libdtrace/dt_prov_pid.c
@@ -6,6 +6,7 @@
  *
  * The PID provider for DTrace.
  */
+#include <sys/types.h>
 #include <assert.h>
 #include <errno.h>
 #include <string.h>
@@ -18,22 +19,29 @@
 #include "dt_provider.h"
 #include "dt_probe.h"
 #include "dt_pid.h"
+#include "dt_string.h"
+#include "uprobes.h"
 
 static const char		prvname[] = "pid";
 
 #define UPROBE_EVENTS		TRACEFS "uprobe_events"
 
-#define PID_GROUP_FMT		GROUP_FMT "_%lx"
-#define PID_GROUP_DATA		GROUP_DATA, pp->ino
-
 typedef struct pid_probe {
-	ino_t		ino;
+	dev_t		dev;
+	ino_t		inum;
 	char		*fn;
 	uint64_t	off;
 	tp_probe_t	*tp;
 	dt_list_t	probes;
+	char		*uprobe_name;
+	int		dtrace_created;
 } pid_probe_t;
 
+typedef struct pid_overlying_probe {
+	dt_list_t	list;			/* forward/back pointers */
+	dt_probe_t	*probe;			/* the probe in question */
+} pid_overlying_probe_t;
+
 static const dtrace_pattr_t	pattr = {
 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
@@ -47,37 +55,134 @@ dt_provimpl_t	dt_pid_proc;
 static int populate(dtrace_hdl_t *dtp)
 {
 	dt_provider_create(dtp, prvname, &dt_pid, &pattr);
+	/* XXX systemwide probing: dt_pid_create_usdt_probes(dtp, NULL_NULL, NULL); */
 	return 0;
 }
 
 static void probe_destroy(dtrace_hdl_t *dtp, void *datap)
 {
-	pid_probe_t	*pp = datap;
-	tp_probe_t	*tpp = pp->tp;
+	pid_probe_t		*pp = datap;
+	tp_probe_t		*tpp = pp->tp;
+	pid_overlying_probe_t	*pop, *pop_next;
 
 	dt_tp_destroy(dtp, tpp);
 	dt_free(dtp, pp->fn);
+	dt_free(dtp, pp->uprobe_name);
+	for (pop = dt_list_next(&pp->probes); pop != NULL;
+	     pop = pop_next) {
+		pop_next = dt_list_next(pop);
+		dt_free(dtp, pop);
+	}
 	dt_free(dtp, pp);
 }
 
+/*
+ * Look up or create an underlying (real) probe, corresponding directly to a
+ * uprobe.  Since multiple pid and USDT probes may all map onto the same
+ * underlying probe, we may already have one in the system.
+ *
+ * If not found, we create a new probe.
+ */
+static dt_probe_t *provide_pid_underlying(dtrace_hdl_t *dtp,
+					  const pid_probespec_t *psp)
+{
+	dtrace_probedesc_t	pd;
+	dt_probe_t		*prp;
+	pid_probe_t		*pp;
+
+	/*
+	 * The module for these probes is the mapping inode number, in hex
+	 * (filled in by the caller): the function is unset, and the probe name
+	 * is the offset.  (This is amenable to change if it turns out to be
+	 * inconvenient: but it's more or less arbitrary anyway.  It just needs
+	 * to be something that is determinable both for pid and USDT probes.)
+	 *
+	 * XXX does this mean we lump together all probes with a given offset
+	 * when doing wildcarding?  This is hardly ideal.  Do we want underlying
+	 * probes to be wildcardable at all?  If we do, how should we structure
+	 * this stuff?
+	 */
+
+	pd.id = DTRACE_IDNONE;
+	pd.prv = prvname;
+	pd.mod = psp->pps_mod;
+	pd.fun = "";
+	pd.prb = psp->pps_prb;
+	prp = dt_probe_lookup(dtp, &pd);
+	if (prp == NULL) {
+		dt_provider_t *pvp;
+
+		/* Get the pid provider. */
+		pvp = dt_provider_lookup(dtp, prvname);
+		if (pvp == NULL)
+			return NULL;
+
+		/* Set up the pid probe data. */
+		pp = dt_zalloc(dtp, sizeof(pid_probe_t));
+		if (pp == NULL)
+			return NULL;
+
+		if (psp->pps_uprobe_name) {
+			pp->uprobe_name = strdup(psp->pps_uprobe_name);
+			if (pp->uprobe_name == NULL)
+				goto fail;
+		}
+
+		pp->dev = psp->pps_dev;
+		pp->inum = psp->pps_inum;
+		pp->fn = strdup(psp->pps_fn);
+		pp->off = psp->pps_addr;
+		pp->tp = dt_tp_alloc(dtp);
+		if (pp->tp == NULL)
+			goto fail;
+
+		prp = dt_probe_insert(dtp, pvp, prvname, pd.mod, pp->fn,
+				      pd.prb, pp);
+		if (prp == NULL)
+			goto fail;
+	}
+
+	return prp;
+fail:
+	probe_destroy(dtp, pp);
+	free(pp->uprobe_name);
+	return NULL;
+}
+
 static int provide_pid(dtrace_hdl_t *dtp, const pid_probespec_t *psp)
 {
 	char			prv[DTRACE_PROVNAMELEN];
 	char			mod[DTRACE_MODNAMELEN];
 	char			prb[DTRACE_NAMELEN];
-	dt_provider_t		*pidpvp;
+	char			underlying_prb[DTRACE_NAMELEN];
 	dt_provider_t		*pvp;
 	dtrace_probedesc_t	pd;
 	pid_probe_t		*pp;
-	dt_probe_t		*prp;
-	uint64_t		off = psp->pps_pc - psp->pps_vaddr;
+	dt_probe_t		*prp, *uprp;
+	uint64_t		off = psp->pps_addr;
+	pid_probespec_t		pp_psp;
+	pid_overlying_probe_t	*pop;
+
+	if (psp->pps_type == DTPPT_UNDERLYING) {
+		if (provide_pid_underlying(dtp, psp))
+			return 0;
+		else
+			return -1;
+	}
 
 	/*
-	 * First check whether this pid probe already exists.  If so, there is
-	 * nothing left to do.
+	 * First check whether this probe already exists.  If so, there is
+	 * nothing left to do other than to make sure this probe exists
+	 * in the underlying probe's list of probes.
 	 */
-	snprintf(prv, sizeof(prv), PID_PRVNAME, psp->pps_pid);
+	if (psp->pps_type != DTPPT_USDT)
+		snprintf(prv, sizeof(prv), PID_PRVNAME, psp->pps_pid);
+	else
+		strcpy_safe(prv, sizeof(prv), psp->pps_prv);
 
+	snprintf(underlying_prb, sizeof(underlying_prb), "%lx", off);
+
+	strcpy(mod, psp->pps_mod);
 	switch (psp->pps_type) {
 	case DTPPT_ENTRY:
 		strncpy(prb, "entry", sizeof(prb));
@@ -86,92 +191,83 @@ static int provide_pid(dtrace_hdl_t *dtp, const pid_probespec_t *psp)
 		strncpy(prb, "return", sizeof(prb));
 		break;
 	case DTPPT_OFFSETS:
-		snprintf(prb, sizeof(prb), "%lx", off);
+		strcpy(prb, underlying_prb);
+		break;
+	case DTPPT_USDT:
+		strcpy(prb, psp->pps_usdt_prb);
+		strcpy(mod, psp->pps_usdt_mod);
 		break;
 	default:
-		return 0;
+		dt_dprintf("pid: unknown probe type %i\n", psp->pps_type);
+		return -1;
 	}
 
 	pd.id = DTRACE_IDNONE;
 	pd.prv = prv;
-	pd.mod = psp->pps_mod;
+	pd.mod = mod;
 	pd.fun = psp->pps_fun;
 	pd.prb = prb;
 
-	prp = dt_probe_lookup(dtp, &pd);
-	if (prp != NULL)
-		return 1;		/* probe found */
-
-	/* Get the main (real) pid provider. */
-	pidpvp = dt_provider_lookup(dtp, prvname);
-	if (pidpvp == NULL)
-		return 0;
-
 	/* Get (or create) the provider for the PID of the probe. */
 	pvp = dt_provider_lookup(dtp, prv);
 	if (pvp == NULL) {
 		pvp = dt_provider_create(dtp, prv, &dt_pid_proc, &pattr);
 		if (pvp == NULL)
-			return 0;
+			return -1;
 	}
 
 	/* Mark the provider as a PID provider. */
 	pvp->pv_flags |= DT_PROVIDER_PID;
 
-	/*
-	 * Fill in the probe description for the main (real) probe.  The
-	 * module is the inode number (in hex), the function name is as
-	 * specified for the pid probe, and the probe name is "entry",
-	 * "return", or the offset into the function (in hex).
-	 */
-	snprintf(mod, sizeof(mod), "%lx", psp->pps_ino);
+	snprintf(mod, sizeof(mod), "%lx", psp->pps_inum);
+
+	/* Get (or create) the underlying probe.  */
+	memcpy(&pp_psp, psp, sizeof(pid_probespec_t));
+	pp_psp.pps_type = DTPPT_UNDERLYING;
+	pp_psp.pps_mod = mod;
+	pp_psp.pps_prb = underlying_prb;
+	uprp = provide_pid_underlying(dtp, &pp_psp);
+
+	if (uprp == NULL)
+		return -1;
+
+	pp = uprp->prv_data;
 
-	/*
-	 * Try to lookup the main (real) probe.  Since multiple pid probes may
-	 * all map onto the same underlying main (real) probe, we may already
-	 * have one in the system.
-	 *
-	 * If not found, we create a new probe.
-	 */
-	pd.id = DTRACE_IDNONE;
-	pd.prv = prvname;
-	pd.mod = mod;
-	pd.fun = psp->pps_fun;
-	pd.prb = prb;
 	prp = dt_probe_lookup(dtp, &pd);
-	if (prp == NULL) {
-		/* Set up the pid probe data. */
-		pp = dt_zalloc(dtp, sizeof(pid_probe_t));
-		if (pp == NULL)
-			return 0;
+	if (prp != NULL) {
+		/*
+		 * Probe already exists.  If it's already in the underlying
+		 * probe's probe list, there is nothing left to do.
+		 */
+		for (pop = dt_list_next(&pp->probes); pop != NULL;
+		     pop = dt_list_next(pop)) {
+			if (pop->probe == prp)
+				return 0;
+		}
+	}
 
-		pp->ino = psp->pps_ino;
-		pp->fn = strdup(psp->pps_fn);
-		pp->off = off;
-		pp->tp = dt_tp_alloc(dtp);
-		if (pp->tp == NULL)
-			goto fail;
+	pop = dt_zalloc(dtp, sizeof(pid_overlying_probe_t));
+	if (pop == NULL)
+		return -1;
 
-		prp = dt_probe_insert(dtp, pidpvp, prvname, mod, psp->pps_fun,
-				      prb, pp);
-		if (prp == NULL)
-			goto fail;
-	} else
-		pp = prp->prv_data;
+	/* Add the pid probe, if we need to. */
 
-	/* Try to add the pid probe. */
-	prp = dt_probe_insert(dtp, pvp, prv, psp->pps_mod, psp->pps_fun, prb,
-			      prp);
-	if (prp == NULL)
-		goto fail;
+	if (prp ==  NULL)
+		prp = dt_probe_insert(dtp, pvp, prv, psp->pps_mod, psp->pps_fun,
+		    prb, uprp);
 
-	/* Add the pid probe to the list of probes for the main (real) probe. */
-	dt_list_append(&pp->probes, prp);
+	if (prp == NULL) {
+		dt_free(dtp, pop);
+		return -1;
+	}
 
-	return 1;
+	pop->probe = prp;
+
+	/*
+	 * Add the pid probe to the list of probes for the underlying probe.
+	 */
+	dt_list_append(&pp->probes, pop);
 
-fail:
-	probe_destroy(dtp, pp);
 	return 0;
 }
 
@@ -179,8 +275,19 @@ static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp)
 {
 	assert(prp->prov->impl == &dt_pid_proc);
 
-	/* We need to enable the main (real) probe (if not enabled yet). */
+	/*
+	 * We need to enable the main (real) probe (if not enabled yet).
+	 */
 	dt_probe_enable(dtp, (dt_probe_t *)prp->prv_data);
+
+
+	/*
+	 * Finally, ensure we're in the list of enablings as well.
+	 * (This ensures that, among other things, the probes map
+	 * gains entries for us.)
+	 */
+	if (!dt_in_list(&dtp->dt_enablings, prp))
+		dt_list_append(&dtp->dt_enablings, prp);
 }
 
 /*
@@ -196,11 +303,11 @@ static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp)
  */
 static void trampoline(dt_pcb_t *pcb)
 {
-	dt_irlist_t		*dlp = &pcb->pcb_ir;
-	const dt_probe_t	*prp = pcb->pcb_probe;
-	const dt_probe_t	*pprp;
-	const pid_probe_t	*pp = prp->prv_data;
-	uint_t			lbl_exit = pcb->pcb_exitlbl;
+	dt_irlist_t			*dlp = &pcb->pcb_ir;
+	const dt_probe_t		*prp = pcb->pcb_probe;
+	const pid_probe_t		*pp = prp->prv_data;
+	const pid_overlying_probe_t	*pop;
+	uint_t				lbl_exit = pcb->pcb_exitlbl;
 
 	dt_cg_tramp_prologue(pcb);
 
@@ -241,12 +348,16 @@ static void trampoline(dt_pcb_t *pcb)
 	 * are no assignments to %r0 possible in between the conditional
 	 * statements.
 	 */
-	for (pprp = dt_list_next(&pp->probes); pprp != NULL;
-	     pprp = dt_list_next(pprp)) {
-		uint_t		lbl_next = dt_irlist_label(dlp);
-		pid_t		pid = strtoul(pprp->desc->prv + 3, NULL, 10);
-		char		pn[DTRACE_FULLNAMELEN + 1];
-		dt_ident_t	*idp;
+	for (pop = dt_list_next(&pp->probes); pop != NULL;
+	     pop = dt_list_next(pop)) {
+		const dt_probe_t	*pprp = pop->probe;
+		uint_t			lbl_next = dt_irlist_label(dlp);
+		pid_t			pid;
+		char			pn[DTRACE_FULLNAMELEN + 1];
+		dt_ident_t		*idp;
+
+		pid = dt_pid_get_pid(pprp->desc, pcb->pcb_hdl, pcb, NULL);
+		assert(pid != -1);
 
 		snprintf(pn, DTRACE_FULLNAMELEN, "%s:%s:%s:%s",
 			 pprp->desc->prv, pprp->desc->mod, pprp->desc->fun,
@@ -255,7 +366,7 @@ static void trampoline(dt_pcb_t *pcb)
 		assert(idp != NULL);
 		/*
 		 * Check whether this pid-provider probe serves the current
-		 * process.  This loop creates a sequence
+		 * process, and emit a sequence of clauses for it when it does.
 		 */
 		emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, pid, lbl_next));
 		emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_PRID, pprp->desc->id), idp);
@@ -276,35 +387,28 @@ static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
 	if (!dt_tp_is_created(tpp)) {
 		char	*fn;
 		FILE	*f;
-		size_t	len;
-		int	fd, rc = -1;
-
-		/* add the uprobe */
-		fd = open(UPROBE_EVENTS, O_WRONLY | O_APPEND);
-		if (fd != -1) {
-			rc = dprintf(fd,
-				     "%c:" PID_GROUP_FMT "/%s_%s %s:0x%lx\n",
-				     prp->desc->prb[0] == 'e' ? 'p' : 'r',
-				     PID_GROUP_DATA, prp->desc->fun,
-				     prp->desc->prb, pp->fn, pp->off);
-			close(fd);
+		char	*prb;
+		char	*spec;
+		int	rc = -1;
+
+		if (pp->uprobe_name == NULL) {
+			if (asprintf(&spec, "%s:0x%lx", pp->fn ? pp->fn : "", pp->off) < 0)
+				return -ENOENT;
+
+			prb = uprobe_create(pp->dev, pp->inum, pp->off, spec, NULL,
+			    prp->desc->prb[0] == 'r');
+			free(spec);
+			if (prb == NULL)
+				return -ENOENT;
+			pp->uprobe_name = prb;
+			pp->dtrace_created = 1;
 		}
-		if (rc == -1)
-			return -ENOENT;
 
 		/* open format file */
-		len = snprintf(NULL, 0, "%s" PID_GROUP_FMT "/%s_%s/format",
-			       EVENTSFS, PID_GROUP_DATA, prp->desc->fun,
-			       prp->desc->prb) + 1;
-		fn = dt_alloc(dtp, len);
-		if (fn == NULL)
+		if (asprintf(&fn, "%s%s/format", EVENTSFS, pp->uprobe_name) < 0)
 			return -ENOENT;
-
-		snprintf(fn, len, "%s" PID_GROUP_FMT "/%s_%s/format",
-			 EVENTSFS, PID_GROUP_DATA, prp->desc->fun,
-			 prp->desc->prb);
 		f = fopen(fn, "r");
-		dt_free(dtp, fn);
+		free(fn);
 		if (f == NULL)
 			return -ENOENT;
 
@@ -334,10 +438,10 @@ static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
  *
  * If there is an event FD, we close it.
  *
- * We also try to remove any uprobe that may have been created for the probe.
- * This is harmless for probes that didn't get created.  If the removal fails
- * for some reason we are out of luck - fortunately it is not harmful to the
- * system as a whole.
+ * We also try to remove any uprobe that may have been created for the probe
+ * (but only if we created it, not if dtprobed did).  This is harmless for
+ * probes that didn't get created.  If the removal fails for some reason we are
+ * out of luck - fortunately it is not harmful to the system as a whole.
  */
 static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
 {
@@ -350,15 +454,20 @@ static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
 
 	dt_tp_detach(dtp, tpp);
 
+	if (!pp->dtrace_created)
+		return;
+
 	fd = open(UPROBE_EVENTS, O_WRONLY | O_APPEND);
 	if (fd == -1)
 		return;
 
-	dprintf(fd, "-:" PID_GROUP_FMT "/%s_%s\n", PID_GROUP_DATA,
-		prp->desc->fun, prp->desc->prb);
+	dprintf(fd, "-:%s", pp->uprobe_name);
 	close(fd);
 }
 
+/*
+ * Used for underlying probes.
+ */
 dt_provimpl_t	dt_pid = {
 	.name		= prvname,
 	.prog_type	= BPF_PROG_TYPE_KPROBE,
@@ -371,10 +480,12 @@ dt_provimpl_t	dt_pid = {
 	.probe_destroy	= &probe_destroy,
 };
 
+/*
+ * Used for pid probes for specific processes.
+ */
 dt_provimpl_t	dt_pid_proc = {
 	.name		= prvname,
 	.prog_type	= BPF_PROG_TYPE_KPROBE,
 	.provide_pid	= &provide_pid,
-	.enable		= &enable,
-	.trampoline	= &trampoline,
+	.enable		= &enable
 };
-- 
2.37.1.265.g363c192786.dirty




More information about the DTrace-devel mailing list