[DTrace-devel] [PATCH v2 4/7] uprobe: Implement PID-specific uprobes

Kris Van Hees kris.van.hees at oracle.com
Tue Nov 18 22:14:27 UTC 2025


On Tue, Nov 18, 2025 at 04:47:56PM -0500, Eugene Loh wrote:
> Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
> 
> If you're open to some little nits:
> 
> * I think it's a good idea to have a .r results file that just has "success"
> in it.  (I think) there have been cases in the past where a test script
> stopped partway and yet passed since the script returned a 0.  So the
> one-line file helps ensure that the script reaches its normal end.
> 
> * The test has a line:
> 
>     # Confirm that dtrace is still running (otherwise trigger run forever).

Thanks.  Done both.

> Now that it's a single trigger, s/run/runs/.
> 
> On 11/18/25 11:38, Kris Van Hees via DTrace-devel wrote:
> 
> > The mechanism to create uprobes by writing to $TRACEFS/uprobe_events
> > caused probes to be placed in the dev/inode based mapping.  This means
> > that all tasks that use that mapping are subject to the probes firing.
> > 
> > The kernel supports placing uprobes for a specific task (by PID), which
> > avoids impacting all other tasks that share the same code but are not
> > the target of the tracing.
> > 
> > This new mechanism places uprobes using the perf_event_open interface.
> > Perf event attribute configuration data is read from
> > /sys/bus/event_source/devices/uprobe/ as needed (and cached to ease
> > repeated use).  Underlying probes are now organized by PID-specific
> > providers (uprobe$PID), and attach/detach no longer depends on the
> > generic tracepoint support.
> > 
> > The usdt_prids BPF map is no longer needed because USDT BPF programs
> > are now task-specific.  The trampoline generation for USDT Probes
> > discovered after tracing started can now perform a simple loop over
> > all compiled clauses, adding those that match the probe description
> > to the program.
> > 
> > Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> > ---
> >   libdtrace/dt_bpf.c                      |  14 +-
> >   libdtrace/dt_bpf_maps.h                 |   9 -
> >   libdtrace/dt_dlibs.c                    |   1 -
> >   libdtrace/dt_impl.h                     |   1 -
> >   libdtrace/dt_pid.c                      |   5 +-
> >   libdtrace/dt_probe.c                    |  28 +
> >   libdtrace/dt_probe.h                    |   1 +
> >   libdtrace/dt_program.c                  |  15 -
> >   libdtrace/dt_program.h                  |   3 -
> >   libdtrace/dt_prov_uprobe.c              | 724 ++++++++----------------
> >   libdtrace/dtrace.h                      |   2 -
> >   test/unittest/usdt/tst.defer-Z-basic.sh | 102 ++++
> >   12 files changed, 362 insertions(+), 543 deletions(-)
> >   create mode 100755 test/unittest/usdt/tst.defer-Z-basic.sh
> > 
> > diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> > index 28eb890e..0a57b7d2 100644
> > --- a/libdtrace/dt_bpf.c
> > +++ b/libdtrace/dt_bpf.c
> > @@ -974,19 +974,12 @@ gmap_create_probes(dtrace_hdl_t *dtp)
> >   }
> >   /*
> > - * Create the 'usdt_names' and 'usdt_prids' BPF maps.
> > + * Create the 'usdt_names' BPF map.
> >    *
> >    * 'usdt_names':  a global hash map indexed by PRID and whose value has probe
> >    *                name elements at fixed offsets within the value.  This map
> >    *                is used for get_bvar() to look up probe name elements for
> >    *                any prid that was created after dtrace_go().
> > - *
> > - * 'usdt_prids':  a global hash map indexed by (pid, underlying probe ID).
> > - *                The value is a probe ID for the overlying USDT probe and
> > - *                a bit mask indicating which clauses to execute for this pid.
> > - *
> > - *                For a given (pid, PRID) key, there can be at most one
> > - *                overlying USDT probe.
> >    */
> >   static int
> >   gmap_create_usdt(dtrace_hdl_t *dtp)
> > @@ -998,11 +991,6 @@ gmap_create_usdt(dtrace_hdl_t *dtp)
> >   	if (dtp->dt_usdt_namesmap_fd == -1)
> >   		return -1;
> > -	dtp->dt_usdt_pridsmap_fd = create_gmap(dtp, "usdt_prids", BPF_MAP_TYPE_HASH,
> > -	    sizeof(usdt_prids_map_key_t), sizeof(usdt_prids_map_val_t), nusdtprobes);
> > -	if (dtp->dt_usdt_pridsmap_fd == -1)
> > -		return -1;
> > -
> >   	dtp->dt_nprobes = dtp->dt_probe_id;
> >   	return 0;
> > diff --git a/libdtrace/dt_bpf_maps.h b/libdtrace/dt_bpf_maps.h
> > index 884dc398..2f93c2b3 100644
> > --- a/libdtrace/dt_bpf_maps.h
> > +++ b/libdtrace/dt_bpf_maps.h
> > @@ -42,15 +42,6 @@ struct dt_bpf_cpuinfo {
> >   	uint64_t	lockstat_stime;	/* lockstat: spin time */
> >   };
> > -typedef struct usdt_prids_map_key {
> > -	int		pid;		/* should be pid_t, unistd.h? */
> > -	uint32_t	uprid;		/* should be dtrace_id_t, sys/dtrace_types.h */
> > -} usdt_prids_map_key_t;
> > -typedef struct usdt_prids_map_val {
> > -	uint32_t	prid;		/* should be dtrace_id_t, sys/dtrace_types.h */
> > -	long long	mask;
> > -} usdt_prids_map_val_t;
> > -
> >   #ifdef  __cplusplus
> >   }
> >   #endif
> > diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> > index 21df22a8..161e2106 100644
> > --- a/libdtrace/dt_dlibs.c
> > +++ b/libdtrace/dt_dlibs.c
> > @@ -72,7 +72,6 @@ static const dt_ident_t		dt_bpf_symbols[] = {
> >   	DT_BPF_SYMBOL(strtab, DT_IDENT_PTR),
> >   	DT_BPF_SYMBOL(tuples, DT_IDENT_PTR),
> >   	DT_BPF_SYMBOL(usdt_names, DT_IDENT_PTR),
> > -	DT_BPF_SYMBOL(usdt_prids, DT_IDENT_PTR),
> >   	/* BPF internal identifiers */
> >   	DT_BPF_SYMBOL_ID(PRID, DT_IDENT_SCALAR, DT_CONST_PRID),
> > diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> > index 3b0b2358..5282efbd 100644
> > --- a/libdtrace/dt_impl.h
> > +++ b/libdtrace/dt_impl.h
> > @@ -400,7 +400,6 @@ struct dtrace_hdl {
> >   	int *dt_aggmap_ids;	/* ids for the 'aggN' BPF maps */
> >   	int dt_genmap_fd;	/* file descriptor for the 'agggen' BPF map */
> >   	int dt_cpumap_fd;	/* file descriptor for the 'cpuinfo' BPF map */
> > -	int dt_usdt_pridsmap_fd; /* file descriptor for the 'usdt_prids' BPF map */
> >   	int dt_usdt_namesmap_fd; /* file descriptor for the 'usdt_names' BPF map */
> >   	dtrace_handle_err_f *dt_errhdlr; /* error handler, if any */
> >   	void *dt_errarg;	/* error handler argument */
> > diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
> > index 7d6cfb4d..08133466 100644
> > --- a/libdtrace/dt_pid.c
> > +++ b/libdtrace/dt_pid.c
> > @@ -1078,8 +1078,9 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, pid_t pid, dt_proc_t *dpr,
> >   			if (tp->tracepoint.args[0] != 0)
> >   				psp.pps_sargv = tp->tracepoint.args;
> > -			dt_dprintf("providing %s:%s:%s:%s for pid %d\n", psp.pps_prv,
> > -				   psp.pps_mod, psp.pps_fun, psp.pps_prb, psp.pps_pid);
> > +			dt_dprintf("providing %s:%s:%s:%s for pid %d @ %lx\n",
> > +				   psp.pps_prv, psp.pps_mod, psp.pps_fun,
> > +				   psp.pps_prb, psp.pps_pid, psp.pps_off);
> >   			if (pvp->impl->provide_probe(dtp, &psp) < 0) {
> >   				dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
> >   					     "failed to instantiate %sprobe %s for pid %d: %s",
> > diff --git a/libdtrace/dt_probe.c b/libdtrace/dt_probe.c
> > index 28a1133f..65316f51 100644
> > --- a/libdtrace/dt_probe.c
> > +++ b/libdtrace/dt_probe.c
> > @@ -1213,6 +1213,34 @@ dt_probe_add_stmt(dtrace_hdl_t *dtp, dt_probe_t *prp, dtrace_stmtdesc_t *sdp)
> >   	return 0;
> >   }
> > +int
> > +dt_probe_add_stmt_matchall(dtrace_hdl_t *dtp, dt_probe_t *prp)
> > +{
> > +	int	i, rc = 0;
> > +
> > +	for (i = 0; i < dtp->dt_stmt_nextid; i++) {
> > +		dtrace_stmtdesc_t	*sdp = dtp->dt_stmts[i];
> > +
> > +		if (sdp == NULL)
> > +			continue;
> > +
> > +		if (dt_gmatch(prp->desc->prv,
> > +			      sdp->dtsd_ecbdesc->dted_probe.prv) &&
> > +		    dt_gmatch(prp->desc->mod,
> > +			      sdp->dtsd_ecbdesc->dted_probe.mod) &&
> > +		    dt_gmatch(prp->desc->fun,
> > +			      sdp->dtsd_ecbdesc->dted_probe.fun) &&
> > +		    dt_gmatch(prp->desc->prb,
> > +			      sdp->dtsd_ecbdesc->dted_probe.prb)) {
> > +			rc = dt_probe_add_stmt(dtp, prp, sdp);
> > +			if (rc < 0)
> > +				break;
> > +		}
> > +	}
> > +
> > +	return rc;
> > +}
> > +
> >   int
> >   dt_probe_stmt_iter(dtrace_hdl_t *dtp, const dt_probe_t *prp, dt_stmt_f *func, void *arg)
> >   {
> > diff --git a/libdtrace/dt_probe.h b/libdtrace/dt_probe.h
> > index fe9babf3..54053cd3 100644
> > --- a/libdtrace/dt_probe.h
> > +++ b/libdtrace/dt_probe.h
> > @@ -91,6 +91,7 @@ extern int dt_probe_iter(dtrace_hdl_t *dtp, const dtrace_probedesc_t *pdp,
> >   extern int dt_probe_add_stmt(dtrace_hdl_t *dtp, dt_probe_t *prp,
> >   			     dtrace_stmtdesc_t *sdp);
> > +extern int dt_probe_add_stmt_matchall(dtrace_hdl_t *dtp, dt_probe_t *prp);
> >   typedef int dt_stmt_f(dtrace_hdl_t *dtp, dtrace_stmtdesc_t *sdp, void *arg);
> >   extern int dt_probe_stmt_iter(dtrace_hdl_t *dtp, const dt_probe_t *prp,
> >   			      dt_stmt_f *func, void *arg);
> > diff --git a/libdtrace/dt_program.c b/libdtrace/dt_program.c
> > index a2d1918a..38feefef 100644
> > --- a/libdtrace/dt_program.c
> > +++ b/libdtrace/dt_program.c
> > @@ -20,21 +20,6 @@
> >   #include <dt_probe.h>
> >   #include <dt_bpf.h>
> > -int
> > -dt_stmt_clsflag_set(dtrace_stmtdesc_t *stp, int flags) {
> > -	stp->dtsd_clauseflags |= flags;
> > -
> > -	return 0;
> > -}
> > -
> > -int
> > -dt_stmt_clsflag_test(dtrace_stmtdesc_t *stp, int flags) {
> > -	if (stp->dtsd_clauseflags & flags)
> > -		return 1;
> > -
> > -	return 0;
> > -}
> > -
> >   dtrace_prog_t *
> >   dt_program_create(dtrace_hdl_t *dtp)
> >   {
> > diff --git a/libdtrace/dt_program.h b/libdtrace/dt_program.h
> > index 29450d99..70cea993 100644
> > --- a/libdtrace/dt_program.h
> > +++ b/libdtrace/dt_program.h
> > @@ -28,9 +28,6 @@ struct dtrace_prog {
> >   	uint8_t dp_dofversion;	/* DOF version this program requires */
> >   };
> > -extern int dt_stmt_clsflag_set(dtrace_stmtdesc_t *stp, int flags);
> > -extern int dt_stmt_clsflag_test(dtrace_stmtdesc_t *stp, int flags);
> > -
> >   extern dtrace_prog_t *dt_program_create(dtrace_hdl_t *);
> >   extern void dt_program_destroy(dtrace_hdl_t *, dtrace_prog_t *);
> > diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
> > index 6cea7f4c..e94827f2 100644
> > --- a/libdtrace/dt_prov_uprobe.c
> > +++ b/libdtrace/dt_prov_uprobe.c
> > @@ -26,6 +26,7 @@
> >    * Finally, note that upp->probes is a dt_list_t of overlying probes.
> >    */
> >   #include <sys/types.h>
> > +#include <sys/ioctl.h>
> >   #include <assert.h>
> >   #include <ctype.h>
> >   #include <errno.h>
> > @@ -278,6 +279,7 @@ get_asm_reg(dt_provider_t *pvp, const char *name)
> >   #define PP_IS_MAPPED	0x10
> >   typedef struct dt_uprobe {
> > +	pid_t		pid;
> >   	dev_t		dev;
> >   	ino_t		inum;
> >   	char		*fn;		/* object full file name */
> > @@ -285,7 +287,7 @@ typedef struct dt_uprobe {
> >   	uint64_t	off;
> >   	uint64_t	refcntr_off;	/* optional reference counter offset */
> >   	int		flags;
> > -	tp_probe_t	*tp;
> > +	int		fd;		/* perf event fd (-1 if not created) */
> >   	int		argc;		/* number of args */
> >   	dt_argdesc_t	*args;		/* args array (points into argvbuf) */
> >   	char		*argvbuf;	/* arg strtab */
> > @@ -299,10 +301,11 @@ typedef struct list_probe {
> >   	dt_probe_t	*probe;
> >   } list_probe_t;
> > -typedef struct list_key {
> > -	dt_list_t		list;
> > -	usdt_prids_map_key_t	key;
> > -} list_key_t;
> > +typedef struct uprobe_data {
> > +	int	perf_type;
> > +	int	ret_flag;
> > +	int	ref_shift;
> > +} uprobe_data_t;
> >   static const dtrace_pattr_t	pattr = {
> >   { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> > @@ -316,11 +319,71 @@ dt_provimpl_t	dt_pid;
> >   dt_provimpl_t	dt_usdt;
> >   dt_provimpl_t	dt_stapsdt;
> > +#define UPROBE_CONFIG	"/sys/bus/event_source/devices/uprobe/"
> > +#define PERF_TYPE_FILE	(UPROBE_CONFIG "type")
> > +#define RET_FLAG_FILE	(UPROBE_CONFIG "format/retprobe")
> > +#define REF_SHIFT_FILE	(UPROBE_CONFIG "format/ref_ctr_offset")
> > +
> > +static int get_perf_type()
> > +{
> > +	FILE	*f;
> > +	int	val;
> > +
> > +	f = fopen(PERF_TYPE_FILE, "r");
> > +	if (f == NULL)
> > +		return -1;
> > +	if (fscanf(f, "%d\n", &val) != 1)
> > +		val = -1;
> > +
> > +	fclose(f);
> > +	return val;
> > +}
> > +
> > +static int get_retprobe_flag()
> > +{
> > +	FILE	*f;
> > +	int	val;
> > +
> > +	f = fopen(RET_FLAG_FILE, "r");
> > +	if (f == NULL)
> > +		return -1;
> > +	if (fscanf(f, "config:%d\n", &val) == 1)
> > +		val = 1 << val;
> > +	else
> > +		val = -1;
> > +
> > +	fclose(f);
> > +	return val;
> > +}
> > +
> > +static int get_refcnt_shift()
> > +{
> > +	FILE	*f;
> > +	int	val;
> > +
> > +	f = fopen(REF_SHIFT_FILE, "r");
> > +	if (f == NULL)
> > +		return -1;
> > +	if (fscanf(f, "config:%d-%*d\n", &val) != 1)
> > +		val = -1;
> > +
> > +	fclose(f);
> > +	return val;
> > +}
> > +
> >   static int populate(dtrace_hdl_t *dtp)
> >   {
> > +	uprobe_data_t	*udp = dt_alloc(dtp, sizeof(uprobe_data_t));
> > +
> > +	udp->perf_type = -1;			/* not initialized */
> > +	udp->ret_flag = -1;			/* not initialized */
> > +	udp->ref_shift = -1;			/* not initialized */
> > +
> >   	if (dt_provider_create(dtp, dt_uprobe.name, &dt_uprobe, &pattr,
> > -			       NULL) == NULL ||
> > -	    dt_provider_create(dtp, dt_pid.name, &dt_pid, &pattr,
> > +			       udp) == NULL)
> > +		return -1;
> > +
> > +	if (dt_provider_create(dtp, dt_pid.name, &dt_pid, &pattr,
> >   			       NULL) == NULL ||
> >   	    dt_provider_create(dtp, dt_stapsdt.name, &dt_stapsdt, &pattr,
> >   			       NULL) == NULL)
> > @@ -355,9 +418,7 @@ static void free_probe_list(dtrace_hdl_t *dtp, list_probe_t *elem)
> >   static void probe_destroy_underlying(dtrace_hdl_t *dtp, void *datap)
> >   {
> >   	dt_uprobe_t	*upp = datap;
> > -	tp_probe_t	*tpp = upp->tp;
> > -	dt_tp_destroy(dtp, tpp);
> >   	free_probe_list(dtp, dt_list_next(&upp->probes));
> >   	dt_free(dtp, upp->fn);
> >   	dt_free(dtp, upp->func);
> > @@ -375,6 +436,17 @@ static void probe_destroy(dtrace_hdl_t *dtp, void *datap)
> >   	free_probe_list(dtp, datap);
> >   }
> > +static void detach(dtrace_hdl_t *dtp, const dt_probe_t *uprp)
> > +{
> > +	dt_uprobe_t	*upp = uprp->prv_data;
> > +
> > +	if (upp->fd == -1)
> > +		return;
> > +
> > +	close(upp->fd);
> > +	upp->fd = -1;
> > +}
> > +
> >   /*
> >    * Disable an overlying USDT probe.
> >    */
> > @@ -392,6 +464,7 @@ static void probe_disable(dtrace_hdl_t *dtp, dt_probe_t *prp)
> >   	/* Free up its list of underlying probes. */
> >   	while ((pup = dt_list_next(prp->prv_data)) != NULL) {
> >   		dt_list_delete(prp->prv_data, pup);
> > +		detach(dtp, pup->probe);
> >   		dt_free(dtp, pup);
> >   	}
> >   	dt_free(dtp, prp->prv_data);
> > @@ -401,182 +474,57 @@ static void probe_disable(dtrace_hdl_t *dtp, dt_probe_t *prp)
> >   /*
> >    * Clean up stale pids from among the USDT probes.
> >    */
> > -static int
> > -clean_usdt_probes(dtrace_hdl_t *dtp)
> > -{
> > -	int			fdprids = dtp->dt_usdt_pridsmap_fd;
> > -	int			fdnames = dtp->dt_usdt_namesmap_fd;
> > -	usdt_prids_map_key_t	key, nxt;
> > -	usdt_prids_map_val_t	val;
> > -	list_key_t		keys_to_delete, *elem, *elem_next;
> > -	dt_probe_t		*prp, *prp_next;
> > -
> > -	/* Initialize list of usdt_prids keys to delete. */
> > -	memset(&keys_to_delete, 0, sizeof(keys_to_delete));
> > -
> > -	/* Initialize usdt_prids key to a pid/uprid that cannot be found. */
> > -	key.pid = 0;
> > -	key.uprid = 0;
> > -
> > -	/* Loop over usdt_prids entries. */
> > -	while (dt_bpf_map_next_key(fdprids, &key, &nxt) == 0) {
> > -		memcpy(&key, &nxt, sizeof(usdt_prids_map_key_t));
> > -
> > -		if (dt_bpf_map_lookup(fdprids, &key, &val) == -1)
> > -			return dt_set_errno(dtp, EDT_BPF);
> > -
> > -		/* Check if the process is still running. */
> > -		if (!Pexists(key.pid)) {
> > -			/*
> > -			 * Delete the usdt_names entry.
> > -			 *
> > -			 * Note that a PRID might correspond to multiple
> > -			 * sites.  So, as we loop over usdt_prids entries,
> > -			 * we might delete the same usdt_names entry
> > -			 * multiple times.  That's okay.
> > -			 */
> > -			dt_bpf_map_delete(fdnames, &val.prid);
> > -
> > -			/*
> > -			 * Delete the usdt_prids entry.
> > -			 *
> > -			 * Note that we do not want to disrupt the iterator.
> > -			 * So we just add the key to a list and will walk
> > -			 * the list later for actual deletion.
> > -			 */
> > -			elem = calloc(1, sizeof(list_key_t));
> > -			elem->key.pid = key.pid;
> > -			elem->key.uprid = key.uprid;
> > -			dt_list_append((dt_list_t *)&keys_to_delete, elem);
> > -
> > -			continue;
> > -		}
> > -
> > -		/*
> > -		 * FIXME.  There might be another case, where the process
> > -		 * is still running, but some of its USDT probes are gone?
> > -		 * So maybe we have to check for the existence of one of
> > -		 *     dtrace_probedesc_t *pdp = dtp->dt_probes[val.prid]->desc;
> > -		 *     char *prv = ...pdp->prv minus the numerial part;
> > -		 *
> > -		 *     /run/dtrace/probes/$pid/$pdp->prv/$pdp->mod/$pdp->fun/$pdp->prb
> > -		 *     /run/dtrace/stash/dof-pid/$pid/0/parsed/$prv:$pdp->mod:$pdp->fun:$pdp->prb
> > -		 *     /run/dtrace/stash/dof-pid/$pid/.../parsed/$prv:$pdp->mod:$pdp->fun:$pdp->prb
> > -		 */
> > -	}
> > -
> > -	/*
> > -	 * Delete the usdt_prids keys in our list.
> > -	 */
> > -	for (elem = dt_list_next(&keys_to_delete); elem != NULL; elem = elem_next) {
> > -		elem_next = dt_list_next(elem);
> > -
> > -		dt_bpf_map_delete(fdprids, &elem->key);
> > -		free(elem);
> > -	}
> > -
> > -	/* Clean up enablings. */
> > -	for (prp = dt_list_next(&dtp->dt_enablings); prp != NULL; prp = prp_next) {
> > -		pid_t		pid;
> > -
> > -		prp_next = dt_list_next(prp);
> > -
> > -		/* Make sure it is an overlying USDT, stapsdt probe. */
> > -		if (prp->prov->impl != &dt_usdt && prp->prov->impl != &dt_stapsdt)
> > -			continue;
> > -
> > -		/* FIXME passing in NULL pcb and dpr wreaks havoc on error reporting? */
> > -		/*
> > -		 * Nick writes:
> > -		 * This is a general problem with running compiler-adjacent things outside
> > -		 * compile time. I think we should adjust dt_pid_error() so that it works
> > -		 * with NULL pcb and dpr at once, probably by using the code path for
> > -		 * pcb != NULL and augmenting it so that it passes in NULL for the region and
> > -		 * filename args and 0 for the lineno if pcb is NULL. (dt_set_errmsg can
> > -		 * already handle this case.)
> > -		 */
> > -		pid = dt_pid_get_pid(prp->desc, dtp, NULL, NULL);
> > -
> > -		if (Pexists(pid))
> > -			continue;
> > -
> > -		probe_disable(dtp, prp);
> > -	}
> > -
> > -	return 0;
> > -}
> > +typedef struct del_list {
> > +	dt_list_t	list;
> > +	dt_probe_t	*probe;
> > +} del_list_t;
> > -/*
> > - * Judge whether clause "n" could ever be called as a USDT probe
> > - * for this underlying probe.  We can pass uprp==NULL to see if
> > - * the clause can be excluded for every probe.
> > - */
> >   static int
> > -ignore_clause(dtrace_hdl_t *dtp, int n, const dt_probe_t *uprp)
> > +clean_usdt_probes(dtrace_hdl_t *dtp)
> >   {
> > -	dtrace_stmtdesc_t	*stp = dtp->dt_stmts[n];
> > -	dtrace_probedesc_t	*pdp = &stp->dtsd_ecbdesc->dted_probe;
> > +	int		fdnames = dtp->dt_usdt_namesmap_fd;
> > +	uint32_t	key, nxt;
> > +	del_list_t	dlist = { 0, };
> > +	del_list_t	*del, *ndel;
> > +	dt_probe_t	*prp;
> > -	if (stp == NULL)
> > -		return 1;
> > +	/* Initialize key to a probe id that cannot be found. */
> > +	key = DTRACE_IDNONE;
> > -	/*
> > -	 * Some clauses could never be called for a USDT probe,
> > -	 * regardless of the underlying probe uprp.  Cache this
> > -	 * status in the clause flags for dt_stmts[n].
> > -	 */
> > -	if (dt_stmt_clsflag_test(stp, DT_CLSFLAG_USDT_INCLUDE | DT_CLSFLAG_USDT_EXCLUDE) == 0) {
> > -		size_t	len = strlen(pdp->prv);
> > +	/* Loop over usdt_names entries. */
> > +	while (dt_bpf_map_next_key(fdnames, &key, &nxt) == 0) {
> > +		dtrace_probedesc_t	pd = { 0, };
> > -		/*
> > -		 * If the last char in the provider description is
> > -		 * neither '*' nor a digit, it cannot be a USDT probe.
> > -		 */
> > -		if (len > 1) {
> > -			char	lastchar = (pdp->prv[0] != '\0' ? pdp->prv[len - 1] : '*');
> > -
> > -			if (lastchar != '*' && !isdigit(lastchar)) {
> > -				dt_stmt_clsflag_set(stp, DT_CLSFLAG_USDT_EXCLUDE);
> > -				return 1;
> > -			}
> > -		}
> > +		key = nxt;
> > +		pd.id = key;
> >   		/*
> > -		 * If the provider description is "pid[0-9]*", it
> > -		 * is a pid probe, not USDT.
> > +		 * If the probe exists (as it should), and the process exists,
> > +		 * we should keep it.
> >   		 */
> > -		if (strncmp(pdp->prv, "pid", 3) == 0) {
> > -			int i, l = strlen(pdp->prv);
> > -
> > -			for (i = 3; i < l; i++)
> > -				if (!isdigit((pdp->prv[i])))
> > -					break;
> > +		prp = dt_probe_lookup(dtp, &pd);
> > +		if (prp != NULL) {
> > +			list_probe_t		*pup = prp->prv_data;
> > +			dt_uprobe_t		*upp = pup->probe->prv_data;
> > -			if (i == l) {
> > -				dt_stmt_clsflag_set(stp, DT_CLSFLAG_USDT_EXCLUDE);
> > -				return 1;
> > -			}
> > +			if (Pexists(upp->pid))
> > +				continue;
> >   		}
> > -		/* Otherwise, it is possibly a USDT probe. */
> > -		dt_stmt_clsflag_set(stp, DT_CLSFLAG_USDT_INCLUDE);
> > +		/* Add the key and probe to the delete list. */
> > +		del = dt_zalloc(dtp, sizeof(del_list_t));
> > +		del->probe = prp;
> > +		dt_list_append((dt_list_t *)&dlist, del);
> >   	}
> > -	if (dt_stmt_clsflag_test(stp, DT_CLSFLAG_USDT_EXCLUDE) == 1)
> > -		return 1;
> > -	if (uprp == NULL)
> > -		return 0;
> > -	/*
> > -	 * If we cannot ignore this statement, try to use uprp.
> > -	 */
> > -
> > -	/* We know what function we're in.  It must match the probe description (unless "-"). */
> > -	if (strcmp(pdp->fun, "-") != 0) {
> > -		dt_uprobe_t	*upp = uprp->prv_data;
> > +	/* Really delete entries from usdt_names. */
> > +	for (del = dt_list_next(&dlist); del != NULL; del = ndel) {
> > +		ndel = dt_list_next(del);
> > +		prp = del->probe;
> > -		assert(upp->func);  // never a return probe
> > -		if (!dt_gmatch(upp->func, pdp->fun))
> > -			return 1;
> > +		dt_bpf_map_delete(fdnames, &prp->desc->id);
> > +		probe_disable(dtp, prp);
> > +		dt_free(dtp, del);
> >   	}
> >   	return 0;
> > @@ -640,8 +588,8 @@ static int add_probe_uprobe(dtrace_hdl_t *dtp, dt_probe_t *prp)
> >   	if (prp->prov->impl->attach)
> >   		rc = prp->prov->impl->attach(dtp, prp, fd);
> > +	close(fd);
> >   	if (rc < 0) {
> > -		close(fd);
> >   		dt_attach_error(dtp, rc, prp->desc->prv, prp->desc->mod,
> >   					 prp->desc->fun, prp->desc->prb);
> >   		goto fail;
> > @@ -652,6 +600,7 @@ static int add_probe_uprobe(dtrace_hdl_t *dtp, dt_probe_t *prp)
> >   fail:
> >   	dt_difo_free(dtp, prp->difo);
> >   	prp->difo = NULL;
> > +
> >   	return 0;	// FIXME in dt_bpf_make_progs() this is a fatal error; should we do the same here?
> >   }
> > @@ -661,8 +610,9 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
> >   	char				probnam[DTRACE_FULLNAMELEN], *p;
> >   	const dtrace_probedesc_t	*pdp = prp->desc;
> >   	int				fd = dtp->dt_usdt_namesmap_fd;
> > -	pid_t				pid;
> > -	list_probe_t			*pup;
> > +	list_probe_t			*pup = prp->prv_data;
> > +	dt_uprobe_t			*upp = pup->probe->prv_data;
> > +	pid_t				pid = upp->pid;
> >   	/* Add probe name elements to usdt_names map. */
> >   	p = probnam;
> > @@ -674,21 +624,10 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
> >   	snprintf(p, DTRACE_FUNCNAMELEN, "%s", pdp->fun);
> >   	p += DTRACE_FUNCNAMELEN;
> >   	snprintf(p, DTRACE_NAMELEN, "%s", pdp->prb);
> > +
> >   	if (dt_bpf_map_update(fd, &pdp->id, probnam) == -1)
> >   		assert(0);   // FIXME do something here
> > -	/* FIXME passing in NULL pcb and dpr wreaks havoc on error reporting? */
> > -	/*
> > -	 * Nick writes:
> > -	 * This is a general problem with running compiler-adjacent things outside
> > -	 * compile time. I think we should adjust dt_pid_error() so that it works
> > -	 * with NULL pcb and dpr at once, probably by using the code path for
> > -	 * pcb != NULL and augmenting it so that it passes in NULL for the region and
> > -	 * filename args and 0 for the lineno if pcb is NULL. (dt_set_errmsg can
> > -	 * already handle this case.)
> > -	 */
> > -	pid = dt_pid_get_pid(prp->desc, dtp, NULL, NULL);
> > -
> >   	/* Even though we just enabled this, check it's still live. */
> >   	if (!Pexists(pid)) {
> >   		probe_disable(dtp, prp);
> > @@ -697,55 +636,6 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
> >   		return 0;
> >   	}
> > -	/* Add prid and bit mask to usdt_prids map. */
> > -	for (pup = prp->prv_data; pup != NULL; pup = dt_list_next(pup)) {
> > -		dt_probe_t		*uprp = pup->probe;
> > -		long long		mask = 0, bit = 1;
> > -		usdt_prids_map_key_t	key;
> > -		usdt_prids_map_val_t	val;
> > -		dt_uprobe_t		*upp = uprp->prv_data;
> > -
> > -		/*
> > -		 * For is-enabled probes, the bit mask does not matter.
> > -		 * It is possible that we have this underlying probe due to
> > -		 * an overlying pid-offset probe and that we will not know
> > -		 * until later, when some new pid is created, that we also
> > -		 * have an overlying USDT is-enabled probe, but missing this
> > -		 * optimization opportunity is okay.
> > -		 */
> > -		if (uprp->prov->impl == &dt_uprobe && !(upp->flags & PP_IS_ENABLED)) {
> > -			int n;
> > -
> > -			for (n = 0; n < dtp->dt_stmt_nextid; n++) {
> > -				dtrace_stmtdesc_t *stp;
> > -
> > -				stp = dtp->dt_stmts[n];
> > -				if (stp == NULL)
> > -					continue;
> > -
> > -				if (ignore_clause(dtp, n, uprp))
> > -					continue;
> > -
> > -				if (dt_gmatch(prp->desc->prv, stp->dtsd_ecbdesc->dted_probe.prv) &&
> > -				    dt_gmatch(prp->desc->mod, stp->dtsd_ecbdesc->dted_probe.mod) &&
> > -				    dt_gmatch(prp->desc->fun, stp->dtsd_ecbdesc->dted_probe.fun) &&
> > -				    dt_gmatch(prp->desc->prb, stp->dtsd_ecbdesc->dted_probe.prb))
> > -					mask |= bit;
> > -
> > -				bit <<= 1;
> > -			}
> > -		}
> > -
> > -		key.pid = pid;
> > -		key.uprid = uprp->desc->id;
> > -
> > -		val.prid = prp->desc->id;
> > -		val.mask = mask;
> > -
> > -		// FIXME Check return value, but how should errors be handled?
> > -		dt_bpf_map_update(dtp->dt_usdt_pridsmap_fd, &key, &val);
> > -	}
> > -
> >   	return 0;
> >   }
> > @@ -770,8 +660,6 @@ static int discover(dtrace_hdl_t *dtp)
> >   	 */
> >   	memset(&pcb, 0, sizeof(dt_pcb_t));
> >   	for (i = 0; i < dtp->dt_stmt_nextid; i++) {
> > -		if (ignore_clause(dtp, i, NULL))
> > -			continue;
> >   		dt_pid_create_usdt_probes(&dtp->dt_stmts[i]->dtsd_ecbdesc->dted_probe, dtp, &pcb);
> >   	}
> > @@ -878,6 +766,7 @@ static int populate_args(dtrace_hdl_t *dtp, const pid_probespec_t *psp,
> >   static dt_probe_t *create_underlying(dtrace_hdl_t *dtp,
> >   				     const pid_probespec_t *psp)
> >   {
> > +	char			prv[DTRACE_PROVNAMELEN];
> >   	char			mod[DTRACE_MODNAMELEN];
> >   	char			fun[DTRACE_FUNCNAMELEN];
> >   	char			prb[DTRACE_NAMELEN];
> > @@ -893,12 +782,13 @@ static dt_probe_t *create_underlying(dtrace_hdl_t *dtp,
> >   	 *
> >   	 * The probe description for non-return probes is:
> >   	 *
> > -	 *	uprobe:<dev>_<inode>::<offset>
> > +	 *	uprobe<PID>:<dev>_<inode>::<offset>
> >   	 *
> >   	 * The probe description for return probes is:
> >   	 *
> > -	 *	uprobe:<dev>_<inode>:<func>:return
> > +	 *	uprobe<PID>:<dev>_<inode>:<func>:return
> >   	 */
> > +	snprintf(prv, sizeof(prv), "%s%d", dt_uprobe.name, psp->pps_pid);
> >   	snprintf(mod, sizeof(mod), "%lx_%lx", psp->pps_dev, psp->pps_inum);
> >   	fun[0] = '\0';
> > @@ -921,37 +811,40 @@ static dt_probe_t *create_underlying(dtrace_hdl_t *dtp,
> >   	}
> >   	pd.id = DTRACE_IDNONE;
> > -	pd.prv = prvname;
> > +	pd.prv = prv;
> >   	pd.mod = mod;
> >   	pd.fun = fun;
> >   	pd.prb = prb;
> > -	dt_dprintf("Providing underlying probe %s:%s:%s:%s @ %lx\n", psp->pps_prv,
> > -		   psp->pps_mod, psp->pps_fun, psp->pps_prb, psp->pps_off);
> > +	dt_dprintf("Providing underlying probe %s:%s:%s:%s\n",
> > +		   prv, mod, fun, prb);
> >   	uprp = dt_probe_lookup(dtp, &pd);
> >   	if (uprp == NULL) {
> >   		dt_provider_t	*pvp;
> >   		/* Get the provider for underlying probes. */
> >   		pvp = dt_provider_lookup(dtp, pd.prv);
> > -		if (pvp == NULL)
> > -			return NULL;
> > +		if (pvp == NULL) {
> > +			pvp = dt_provider_create(dtp, pd.prv, &dt_uprobe,
> > +						 &pattr, NULL);
> > +			if (pvp == NULL)
> > +				return NULL;
> > +		}
> >   		/* Set up the probe data. */
> >   		upp = dt_zalloc(dtp, sizeof(dt_uprobe_t));
> >   		if (upp == NULL)
> >   			return NULL;
> > +		upp->pid = psp->pps_pid;
> >   		upp->dev = psp->pps_dev;
> >   		upp->inum = psp->pps_inum;
> >   		upp->off = psp->pps_off;
> >   		upp->refcntr_off = psp->pps_refcntr_off;
> > +		upp->fd = -1;			/* not created yet */
> >   		upp->fn = strdup(psp->pps_fn);
> >   		upp->func = NULL;
> >   		upp->argc = -1;			/* no argument data yet */
> > -		upp->tp = dt_tp_alloc(dtp);
> > -		if (upp->tp == NULL)
> > -			goto fail;
> >   		uprp = dt_probe_insert(dtp, pvp, pd.prv, pd.mod, pd.fun, pd.prb,
> >   				       upp);
> > @@ -1491,13 +1384,12 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> >   	dtrace_hdl_t		*dtp = pcb->pcb_hdl;
> >   	dt_irlist_t		*dlp = &pcb->pcb_ir;
> >   	const dt_probe_t	*uprp = pcb->pcb_probe;
> > +	dt_probe_t		*usdtp = NULL;
> >   	const dt_uprobe_t	*upp = uprp->prv_data;
> >   	const list_probe_t	*pop;
> > -	uint_t			lbl_exit = pcb->pcb_exitlbl;
> > -	dt_ident_t		*usdt_prids = dt_dlib_get_map(dtp, "usdt_prids");
> > -	int			n;
> > +	dt_ident_t		*usdt_names = dt_dlib_get_map(dtp, "usdt_names");
> > -	assert(usdt_prids != NULL);
> > +	assert(usdt_names != NULL);
> >   	dt_cg_tramp_prologue(pcb);
> > @@ -1508,95 +1400,73 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> >   	 */
> >   	dt_cg_tramp_copy_regs(pcb);
> > -	/*
> > -	 * Hold the PID of the process that caused the probe to fire in %r6.
> > -	 */
> > -	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_get_current_pid_tgid));
> > -	emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
> > -	emit(dlp,  BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> > -
> >   	/*
> >   	 * pid probes.
> >   	 *
> >   	 * Loop over overlying pid probes, calling clauses for those that match:
> >   	 *
> > -	 *	for overlying pid probes (that match except possibly for pid)
> > -	 *		if (pid matches) {
> > -	 *			dctx->mst->prid = PRID1;
> > -	 *			< any number of clause calls >
> > -	 *		}
> > +	 *	for overlying pid probes
> > +	 *		dctx->mst->prid = PRID;
> > +	 *		< any number of clause calls >
> > +	 *
> > +	 * For efficiency, we'll also record if we find an overlying USDT probe
> > +	 * in the list (there can only be one).
> >   	 */
> >   	for (pop = dt_list_next(&upp->probes); pop != NULL;
> >   	     pop = dt_list_next(pop)) {
> > -		const dt_probe_t	*prp = pop->probe;
> > -		uint_t			lbl_next = dt_irlist_label(dlp);
> > -		pid_t			pid;
> > +		dt_probe_t	*prp = pop->probe;
> > -		if (prp->prov->impl != &dt_pid)
> > +		if (prp->prov->impl == &dt_usdt ||
> > +		    prp->prov->impl == &dt_stapsdt) {
> > +			usdtp = prp;
> >   			continue;
> > +		}
> > -		pid = dt_pid_get_pid(prp->desc, pcb->pcb_hdl, pcb, NULL);
> > -		assert(pid != -1);
> > -
> > -		/*
> > -		 * Populate probe arguments.
> > -		 */
> > +		/* Populate probe arguments.  */
> >   		if (upp->flags & PP_IS_RETURN)
> >   			dt_cg_tramp_copy_rval_from_regs(pcb);
> >   		else
> >   			dt_cg_tramp_copy_args_from_regs(pcb, 1);
> > -		/*
> > -		 * Check whether this pid-provider probe serves the current
> > -		 * process, and emit a sequence of clauses for it when it does.
> > -		 */
> > -		emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_6, pid, lbl_next));
> > +		/* Set PRID and call the clauses for the overlying probe. */
> >   		emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_PRID, prp->desc->id));
> >   		dt_cg_tramp_call_clauses(pcb, prp, DT_ACTIVITY_ACTIVE);
> > -		emitl(dlp, lbl_next,
> > -			   BPF_NOP());
> >   	}
> > +	/* If not USDT probe was found, we are done. */
> > +	if (usdtp == NULL)
> > +		goto out;
> > +
> >   	/*
> >   	 * USDT.
> >   	 */
> > -	/* In some cases, we know there are no USDT probes. */  // FIXME: add more checks
> > -	if (upp->flags & PP_IS_RETURN)
> > -		goto out;
> > -
> > +	/*
> > +	 * First check whether the USDT probe is active, i.e. its probe ID is
> > +	 * in the usdt_names BPF map.  If not, ignore it for now.
> > +	 */
> > +	emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DT_TRAMP_SP_SLOT(0), usdtp->desc->id));
> > +	dt_cg_xsetx(dlp, usdt_names, DT_LBL_NONE, BPF_REG_1, usdt_names->di_id);
> > +	emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> > +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_SLOT(0)));
> > +	emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> > +	emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
> > +
> > +	/* Set up probe arguments. */
> >   	if (upp->sargc)
> >   		copy_args(pcb, upp);
> >   	else
> >   		dt_cg_tramp_copy_args_from_regs(pcb, 0);
> > -	/*
> > -	 * Retrieve the PID of the process that caused the probe to fire.
> > -	 */
> > -	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_get_current_pid_tgid));
> > -	emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
> > -
> > -	/*
> > -	 * Look up in the BPF 'usdt_prids' map.  The key should fit into
> > -	 * trampoline stack slot 0.
> > -	 */
> > -	assert(sizeof(usdt_prids_map_key_t) <= DT_STK_SLOT_SZ);
> > -	emit(dlp,  BPF_STORE(BPF_W, BPF_REG_FP, DT_TRAMP_SP_SLOT(0), BPF_REG_0));
> > -	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_FP, DT_TRAMP_SP_SLOT(0) + (int)sizeof(pid_t), uprp->desc->id));
> > -	dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
> > -	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> > -	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_SLOT(0)));
> > -	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> > -	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
> > -
> >   	if (upp->flags & PP_IS_ENABLED) {
> >   		/*
> > -		 * Generate a BPF trampoline for an is-enabled probe.  The is-enabled probe
> > -		 * prototype looks like:
> > +		 * Generate a BPF trampoline for an is-enabled probe.  The
> > +		 * is-enabled probe prototype looks like:
> >   		 *
> >   		 *	int is_enabled(int *arg)
> >   		 *
> > -		 * The trampoline writes 1 into the location pointed to by the passed-in arg.
> > +		 * The trampoline writes 1 into the location pointed to by the
> > +		 * passed-in arg.
> >   		 */
> >   		emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DT_TRAMP_SP_SLOT(0), 1));
> >   		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> > @@ -1608,17 +1478,6 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> >   		goto out;
> >   	}
> > -	/*
> > -	 * Continue with normal USDT probes.
> > -	 */
> > -
> > -	/* Read the PRID from the table lookup and store to mst->prid. */
> > -	emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
> > -	emit(dlp,  BPF_STORE(BPF_W, BPF_REG_7, DMST_PRID, BPF_REG_1));
> > -
> > -	/* Read the bit mask from the table lookup in %r6. */    // FIXME someday, extend this past 64 bits
> > -	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_0, offsetof(usdt_prids_map_val_t, mask)));
> > -
> >   	/*
> >   	 * Apply arg mappings, if needed.
> >   	 */
> > @@ -1629,51 +1488,21 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> >   	}
> >   	/*
> > -	 * Hold the bit mask in %r6 between clause calls.
> > +	 * If the probe does not have clauses (yet), it was recently discovered
> > +	 * and we need to populate the clause list with any that match the
> > +	 * probe specification.
> >   	 */
> > -	for (n = 0; n < dtp->dt_stmt_nextid; n++) {
> > -		dtrace_stmtdesc_t *stp;
> > -		dt_ident_t	*idp;
> > -		uint_t		lbl_next;
> > +	if (dt_list_next(&usdtp->stmts) == NULL)
> > +		dt_probe_add_stmt_matchall(dtp, usdtp);
> > -		stp = dtp->dt_stmts[n];
> > -		if (stp == NULL)
> > -			continue;
> > -
> > -		if (ignore_clause(dtp, n, uprp))
> > -			continue;
> > -
> > -		idp = stp->dtsd_clause;
> > -		lbl_next = dt_irlist_label(dlp);
> > -
> > -		/* If the lowest %r6 bit is 0, skip over this clause. */
> > -		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_6));
> > -		emit(dlp,  BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 1));
> > -		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, lbl_next));
> > -
> > -		/*
> > -		 *      if (*dctx.act != act)   // ldw %r0, [%r9 + DCTX_ACT]
> > -		 *	      goto exit;      // ldw %r0, [%r0 + 0]
> > -		 *			      // jne %r0, act, lbl_exit
> > -		 */
> > -		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DCTX_ACT));
> > -		emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_0, 0));
> > -		emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, DT_ACTIVITY_ACTIVE, lbl_exit));
> > -
> > -		/* dctx.mst->scratch_top = 8 */
> > -		emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_SCRATCH_TOP, 8));
> > -
> > -		/* Call clause. */
> > -		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_9));
> > -		emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> > -
> > -		/* Finished this clause. */
> > -		emitl(dlp, lbl_next,
> > -			   BPF_NOP());
> > -
> > -		/* Right-shift %r6. */
> > -		emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 1));
> > -	}
> > +	/*
> > +	 * Call the clauses for the USDT probe:
> > +	 *
> > +	 *	dctx->mst->prid = PRID;
> > +	 *	< any number of clause calls >
> > +	 */
> > +	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_PRID, usdtp->desc->id));
> > +	dt_cg_tramp_call_clauses(pcb, usdtp, DT_ACTIVITY_ACTIVE);
> >   out:
> >   	dt_cg_tramp_return(pcb);
> > @@ -1681,111 +1510,65 @@ out:
> >   	return 0;
> >   }
> > -static char *uprobe_name(dev_t dev, ino_t ino, uint64_t addr, int flags)
> > +static int uprobe_create(dtrace_hdl_t *dtp, const dt_uprobe_t *upp,
> > +			 uint64_t refcntr_off)
> >   {
> > -	char	*name;
> > +	struct perf_event_attr	attr = { 0, };
> > +	dt_provider_t		*pvp = dt_provider_lookup(dtp, dt_uprobe.name);
> > +	uprobe_data_t		*udp;
> > -	if (asprintf(&name, "dt_pid/%c_%llx_%llx_%lx",
> > -		     flags & PP_IS_RETURN ? 'r' : 'p', (unsigned long long)dev,
> > -		     (unsigned long long)ino, (unsigned long)addr) < 0)
> > -		return NULL;
> > +	if (pvp == NULL)
> > +		return -1;
> > +	udp = pvp->prv_data;
> > +	assert(udp != NULL);
> > -	return name;
> > -}
> > +	attr.size = sizeof(attr);
> > -/*
> > - * Create a uprobe for a given dev/ino, mapping filename, and address: the
> > - * uprobe may be a uretprobe.  Return the probe's name as
> > - * a new dynamically-allocated string, or NULL on error.
> > - *
> > - * An optional refcntr_off - used by stapsdt probes to identify semaphore
> > - * address - can also be supplied.
> > - */
> > -static char *uprobe_create(dev_t dev, ino_t ino, const char *mapping_fn,
> > -			   uint64_t addr, uint64_t refcntr_off, int flags)
> > -{
> > -	int	fd = -1;
> > -	int	rc = -1;
> > -	char	*name;
> > -	char	*spec;
> > +	if (udp->perf_type == -1) {
> > +		udp->perf_type = get_perf_type();
> > +		if (udp->perf_type == -1)
> > +			return -1;
> > +	}
> > +	attr.type = udp->perf_type;
> >   	if (refcntr_off) {
> > -		if (asprintf(&spec, "%s:0x%lx(0x%lx)", mapping_fn, addr, refcntr_off) < 0)
> > -			return NULL;
> > -	} else {
> > -		if (asprintf(&spec, "%s:0x%lx", mapping_fn, addr) < 0)
> > -			return NULL;
> > +		if (udp->ref_shift == -1) {
> > +			udp->ref_shift = get_refcnt_shift();
> > +			if (udp->ref_shift == -1)
> > +				return -1;
> > +		}
> > +		attr.config = refcntr_off << udp->ref_shift;
> >   	}
> > -	name = uprobe_name(dev, ino, addr, flags);
> > -	if (!name)
> > -		goto out;
> > -
> > -	/* Add the uprobe. */
> > -	fd = open(TRACEFS "uprobe_events", O_WRONLY | O_APPEND);
> > -	if (fd == -1)
> > -		goto out;
> > -
> > -	rc = dprintf(fd, "%c:%s %s\n", flags & PP_IS_RETURN ? 'r' : 'p', name, spec);
> > -
> > -out:
> > -	free(spec);
> > -	if (fd != -1)
> > -		close(fd);
> > -	if (rc < 0) {
> > -		free(name);
> > -		return NULL;
> > +	if (upp->flags & PP_IS_RETURN) {
> > +		if (udp->ret_flag == -1) {
> > +			udp->ret_flag = get_retprobe_flag();
> > +			if (udp->ret_flag == -1)
> > +				return -1;
> > +		}
> > +		attr.config |= udp->ret_flag;
> >   	}
> > -	return name;
> > +	attr.uprobe_path = (uint64_t)upp->fn;
> > +	attr.probe_offset = upp->off;
> > +
> > +	return dt_perf_event_open(&attr, upp->pid, -1, -1, 0);
> >   }
> >   static int attach(dtrace_hdl_t *dtp, const dt_probe_t *uprp, int bpf_fd)
> >   {
> >   	dt_uprobe_t	*upp = uprp->prv_data;
> > -	tp_probe_t	*tpp = upp->tp;
> > -	FILE		*f;
> > -	char		*fn;
> > -	char		*prb = NULL;
> > -	int		rc = -1;
> > -
> > -	if (dt_tp_has_info(tpp))
> > -		goto attach_bpf;
> > +	assert(upp->fd == -1);
> >   	assert(upp->fn != NULL);
> > -	prb = uprobe_create(upp->dev, upp->inum, upp->fn, upp->off,
> > -			    upp->refcntr_off, upp->flags);
> > -
> > -	/*
> > -	 * If the uprobe creation failed, it is possible it already
> > -	 * existed because someone else created it.  Try to access its
> > -	 * tracefs info and if that fails, we really failed.
> > -	 */
> > +	upp->fd = uprobe_create(dtp, upp, upp->refcntr_off);
> > -	if (prb == NULL)
> > -		prb = uprobe_name(upp->dev, upp->inum, upp->off,
> > -				  upp->flags);
> > -
> > -	/* open format file */
> > -	rc = asprintf(&fn, "%s%s/format", EVENTSFS, prb);
> > -	free(prb);
> > -	if (rc < 0)
> > -		return -ENOENT;
> > -	f = fopen(fn, "r");
> > -	free(fn);
> > -	if (f == NULL)
> > -		return -ENOENT;
> > -
> > -	rc = dt_tp_event_info(dtp, f, 0, tpp, NULL, NULL);
> > -	fclose(f);
> > -
> > -	if (rc < 0)
> > -		return -ENOENT;
> > -
> > -attach_bpf:
> >   	/* attach BPF program to the probe */
> > -	return dt_tp_attach(dtp, tpp, bpf_fd);
> > +	if (ioctl(upp->fd, PERF_EVENT_IOC_SET_BPF, bpf_fd) < 0)
> > +		return -errno;
> > +
> > +	return 0;
> >   }
> >   static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
> > @@ -1936,59 +1719,6 @@ oom:
> >   	return dt_set_errno(dtp, EDT_NOMEM);
> >   }
> > -/*
> > - * Destroy a uprobe for a given device and address.
> > - */
> > -static int
> > -uprobe_delete(dev_t dev, ino_t ino, uint64_t addr, int flags)
> > -{
> > -	int	fd = -1;
> > -	int	rc = -1;
> > -	char	*name;
> > -
> > -	name = uprobe_name(dev, ino, addr, flags);
> > -	if (!name)
> > -		goto out;
> > -
> > -	fd = open(TRACEFS "uprobe_events", O_WRONLY | O_APPEND);
> > -	if (fd == -1)
> > -		goto out;
> > -
> > -
> > -	rc = dprintf(fd, "-:%s\n", name);
> > -
> > -out:
> > -	if (fd != -1)
> > -		close(fd);
> > -	free(name);
> > -
> > -	return rc < 0 ? -1 : 0;
> > -}
> > -
> > -/*
> > - * Try to clean up system resources that may have been allocated for this
> > - * probe.
> > - *
> > - * If there is an event FD, we close it.
> > - *
> > - * We also try to remove any uprobe that may have been created for the probe
> > - * (but only if we created it, not if dtprobed did).  This is harmless for
> > - * probes that didn't get created.  If the removal fails for some reason we are
> > - * out of luck - fortunately it is not harmful to the system as a whole.
> > - */
> > -static void detach(dtrace_hdl_t *dtp, const dt_probe_t *uprp)
> > -{
> > -	dt_uprobe_t	*upp = uprp->prv_data;
> > -	tp_probe_t	*tpp = upp->tp;
> > -
> > -	if (!dt_tp_has_info(tpp))
> > -		return;
> > -
> > -	dt_tp_detach(dtp, tpp);
> > -
> > -	uprobe_delete(upp->dev, upp->inum, upp->off, upp->flags);
> > -}
> > -
> >   /* Clean up the private provider data. */
> >   static void destroy(dtrace_hdl_t *dtp, void *arg)
> >   {
> > diff --git a/libdtrace/dtrace.h b/libdtrace/dtrace.h
> > index ef8f730a..82965fbd 100644
> > --- a/libdtrace/dtrace.h
> > +++ b/libdtrace/dtrace.h
> > @@ -162,8 +162,6 @@ typedef struct dtrace_stmtdesc {
> >   #define DT_CLSFLAG_DESTRUCT		0x0020	/* destructive */
> >   #define DT_CLSFLAG_RETURN		0x0040	/* aggregation */
> >   #define DT_CLSFLAG_AGGREGATION		0x0080	/* return action */
> > -#define DT_CLSFLAG_USDT_INCLUDE		0x0100	/* could be used in USDT clause */
> > -#define DT_CLSFLAG_USDT_EXCLUDE		0x0200	/* could not be used in USDT clause */
> >   typedef int dtrace_stmt_f(dtrace_hdl_t *dtp, dtrace_prog_t *pgp,
> >       dtrace_stmtdesc_t *sdp, void *data);
> > diff --git a/test/unittest/usdt/tst.defer-Z-basic.sh b/test/unittest/usdt/tst.defer-Z-basic.sh
> > new file mode 100755
> > index 00000000..a7d1d015
> > --- /dev/null
> > +++ b/test/unittest/usdt/tst.defer-Z-basic.sh
> > @@ -0,0 +1,102 @@
> > +#!/bin/bash
> > +#
> > +# Oracle Linux DTrace.
> > +# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> > +# Licensed under the Universal Permissive License v 1.0 as shown at
> > +# http://oss.oracle.com/licenses/upl.
> > +#
> > +# This test verifies that DTrace discovers and processes USDT probes in a
> > +# process that gets executed afer the DTrace session has started (-Z is
> > +# required).
> > +
> > +dtrace=$1
> > +trigger=`pwd`/test/triggers/usdt-tst-defer
> > +
> > +# Set up test directory.
> > +DIRNAME=$tmpdir/defer-Z-basic.$$.$RANDOM
> > +mkdir -p $DIRNAME
> > +cd $DIRNAME
> > +
> > +# Make a private copy of the trigger so that we get our own DOF stash.
> > +cp $trigger main
> > +
> > +# Start dtrace.
> > +$dtrace $dt_flags -Zq -o dtrace.out -n '
> > +testprov*:::foo,
> > +testprov*:::bar
> > +{
> > +	printf("%s:%s %d\n", probemod, probename, pid);
> > +}' &
> > +dtpid=$!
> > +
> > +# Wait up to half of the timeout period for dtrace to start up.
> > +iter=$((timeout / 2))
> > +while [ $iter -gt 0 ]; do
> > +	sleep 1
> > +	if [ -e dtrace.out ]; then
> > +		break
> > +	fi
> > +	iter=$((iter - 1))
> > +done
> > +if [[ $iter -eq 0 ]]; then
> > +	echo ERROR starting DTrace job
> > +	cat dtrace.out
> > +	exit 1
> > +fi
> > +
> > +# Start trigger process.
> > +./main > main.out &
> > +tpid=$!
> > +
> > +# Confirm that dtrace is still running (otherwise trigger run forever).
> > +sleep 2
> > +if [[ ! -d /proc/$dtpid ]]; then
> > +	echo ERROR dtrace died after trigger started
> > +	kill -USR1 $tpid
> > +	wait $tpid
> > +	exit 1
> > +fi
> > +
> > +# Wait for process to complete.
> > +wait $tpid
> > +
> > +# Kill the dtrace process.
> > +kill $dtpid
> > +wait
> > +
> > +# Check the program output (main.out).
> > +echo "$tpid: undefined 1 0 10 10 10" > main.out.expected
> > +awk '{ $2 = "undefined"; print }' main.out > main.out.post
> > +if ! diff -q main.out.post main.out.expected; then
> > +	echo program output looks wrong
> > +	echo === was ===
> > +	cat main.out
> > +	echo === got ===
> > +	cat main.out.post
> > +	echo === expected ===
> > +	cat main.out.expected
> > +	exit 1
> > +fi
> > +
> > +# Regularize the DTrace output, and check it.
> > +awk 'NF > 0 { map[$2 " " $1]++; }
> > +     END { for (i in map) printf "%s %d\n", i, map[i]; }' dtrace.out > dtrace.out.post
> > +
> > +echo "$tpid main:bar 10" > dtrace.out.expected
> > +
> > +if ! sort dtrace.out.expected | diff -q - dtrace.out.post; then
> > +	echo dtrace output looks wrong
> > +	echo === was ===
> > +	cat dtrace.out
> > +	echo === got ===
> > +	cat dtrace.out.post
> > +	echo === expected ===
> > +	sort dtrace.out.expected
> > +	echo === diff ===
> > +	sort dtrace.out.expected | diff - dtrace.out.post
> > +	exit 1
> > +fi
> > +
> > +echo success
> > +
> > +exit 0



More information about the DTrace-devel mailing list