[DTrace-devel] [PATCH 10/23] dt_pid, dtprobed: move uprobe creation to dtrace

Nick Alcock nick.alcock at oracle.com
Thu Feb 22 18:39:07 UTC 2024


Now the DOF stash is being created and populated, we can move all probe
creation to DTrace. This solves multiple problems:

 - removal of probes is difficult: dtprobed knows when all processes that
   registered DOF have unregistered it again, but that doesn't mean it's
   allowed to remove any of the corresponding uprobes. You can't remove a
   uprobe if it has any BPF associated with it, and the thing that does that
   is dtrace itself, which might well remove the BPF long after the
   processes that contained the corresponding uprobes are all dead: so
   dtrace is the only thing that can safely remove uprobes.

 - insertion of uprobes is limiting: passing the DTrace-side probe name down
   via probe arguments was clever, but alas probe arguments have much
   shorter maximum lengths than DTrace probes, so some valid DTrace probes
   cannot be registered as uprobes and end up with truncated names

 - it doesn't scale: the new probe registration/removal grinds to a halt
   after a few tens of thousands of uprobes are registered, and doing it
   here means that every probe that might potentially get used is created,
   even though any given DTrace invocation is only likely to use a subset
   of them.

The solution to this is to remove all the uprobe registration code from
dtprobed (goodbye, libcommon/uprobes.c) and migrate it all into
libdtrace/dt_prov_uprobe.c.  This is convenient because it's where PID probe
registration already happens, and it turns out we can reuse almost all the
code.

As for figuring out which probes to create, instead of pre-scanning the
/sys/kernel/debug/tracing/uprobe_events list and then looking for probes
that relate to the current process, we can just scan the /run/dtrace/probes
subdirectory in the DOF stash that relates to the specific PID we are
interested in (using globbing to identify the probes, taking advantage of
the fact that probespec globs follow the same rules as filesystem globs),
pull in the DOF, and generate probes from it.  Much simpler and much less
work.  (We do need to allow for processes dying as DTrace is processing
stash entries relating to them.)

The pre-parsing has one tiny caveat, which we can't even hit until we start
to reparse probes at runtime rather than just at DTrace startup. All the DOF
we need is already parsed by dtprobed, but if dtprobed was upgraded since
this instance of DTrace started, and the new dtprobed has a newer struct
dof_parsed, we want to ignore any such (too-new) parsed DOF and not register
the corresponding probes.  Earlier commits in this series introduced a new
DOF_PARSED_VERSION #define which is bumped whenever struct dof_parsed
changes and gets stuck at the start of all parsed DOF in the DOF stash: we
just need to compare that to what's baked into this copy of DTrace, and
avoid using any parsed DOF for which those are different.

Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 dtprobed/dtprobed.c        |  46 ++--
 libcommon/Build            |   4 +-
 libcommon/uprobes.c        | 343 -------------------------
 libcommon/uprobes.h        |  35 ---
 libdtrace/dt_impl.h        |   9 +-
 libdtrace/dt_open.c        |   4 +-
 libdtrace/dt_options.c     |  18 ++
 libdtrace/dt_pid.c         | 500 ++++++++++++++++++++-----------------
 libdtrace/dt_pid.h         |   3 +-
 libdtrace/dt_proc.h        |   3 +-
 libdtrace/dt_prov_uprobe.c | 130 +++++++---
 11 files changed, 409 insertions(+), 686 deletions(-)
 delete mode 100644 libcommon/uprobes.c
 delete mode 100644 libcommon/uprobes.h

diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
index b191f8e3..9775fda5 100644
--- a/dtprobed/dtprobed.c
+++ b/dtprobed/dtprobed.c
@@ -1,10 +1,23 @@
 /*
- * Oracle Linux DTrace; DOF-consumption and USDT-probe-creation daemon.
+ * Oracle Linux DTrace; DOF-consumption and storage daemon.
  * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
 
+/*
+ * dtprobed's purpose is simple: listen for ioctls on /dev/dtrace/helper and
+ * keep track of USDT probes live in running processes.  dtrace(1) cannot do
+ * this because it isn't going to be running all the time, and is almost
+ * certainly not going to be running in early boot when most daemons start up.
+ * It records this DOF in the DOF stash under /run/dtrace (see dof_stash.c in
+ * this directory), and also tracks the identity of the probes contained in it.
+ *
+ * The DOF is recorded in a pre-parsed form, and parsed on receipt by a forked
+ * helper jailed by strict-mode seccomp to prevent DOF contributed by hostile
+ * binaries from compromising the system.
+ */
+
 #include <sys/param.h>
 #include <sys/uio.h>
 #include <sys/wait.h>
@@ -54,7 +67,6 @@
 
 #include <dt_list.h>
 #include "dof_parser.h"
-#include "uprobes.h"
 #include "dof_stash.h"
 #include "libproc.h"
 
@@ -427,26 +439,6 @@ dof_read(pid_t pid, int in)
 	return reply;
 }
 
-/*
- * Create probes as requested by the dof_parsed_t parsed from the DOF.
- * The DOF parser has already applied the l_addr offset derived from the client
- * process's dynamic linker.
- */
-static void
-create_probe(ps_prochandle *P, dof_parsed_t *provider, dof_parsed_t *probe,
-    dof_parsed_t *tp)
-{
-	const char *mod, *fun, *prb;
-
-	mod = probe->probe.name;
-	fun = mod + strlen(mod) + 1;
-	prb = fun + strlen(fun) + 1;
-
-	free(uprobe_create_from_addr(P, tp->tracepoint.addr,
-		tp->tracepoint.is_enabled, provider->provider.name,
-		mod, fun, prb));
-}
-
 /*
  * Get the (dev, inum) pair for the mapping the passed-in addr belongs to in the
  * given pid.  (If there are multiple, it doesn't matter which we choose as long
@@ -501,7 +493,7 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 	const void *buf;
 	dev_t dev = 0;
 	ino_t inum = 0;
-	int gen = 0;
+	int gen;
 
 	/*
 	 * We can just ignore FUSE_IOCTL_COMPAT: the 32-bit and 64-bit versions
@@ -738,7 +730,7 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 }
 
 /*
- * Process some DOF, passing it to the parser and creating probes from it.
+ * Process some DOF, passing it to the parser and stashing it away for later.
  *
  * If reparsing is set, we are re-parsing existing DOF and should only update
  * the parsed DOF representation.
@@ -753,7 +745,6 @@ process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dof_helper_t *dh,
 	int gen = 0;
 	const char *errmsg;
 	dt_list_t accum = {0};
-	ps_prochandle *P = NULL; /* temporary */
 
 	do {
 		errmsg = "DOF parser write failed";
@@ -802,11 +793,6 @@ process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dof_helper_t *dh,
 			if (!tp || tp->type != DIT_TRACEPOINT)
 				goto err;
 
-			/*
-			 * Ignore errors here: we want to create as many probes
-			 * as we can, even if creation of some of them fails.
-			 */
-			create_probe(P, provider, probe, tp);
 			if (dof_stash_push_parsed(&accum, tp) < 0)
 				goto oom;
 		}
diff --git a/libcommon/Build b/libcommon/Build
index c481cea3..ad858737 100644
--- a/libcommon/Build
+++ b/libcommon/Build
@@ -1,5 +1,5 @@
 # Oracle Linux DTrace.
-# Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 
@@ -9,5 +9,5 @@ LIBS += libcommon
 libcommon_TARGET = libcommon
 libcommon_DIR := $(current-dir)
 libcommon_CPPFLAGS := -Ilibcommon -Ilibproc
-libcommon_SOURCES = dof_parser.c dof_parser_host.c uprobes.c dt_list.c
+libcommon_SOURCES = dof_parser.c dof_parser_host.c dt_list.c
 libcommon_LIBSOURCES = libcommon
diff --git a/libcommon/uprobes.c b/libcommon/uprobes.c
deleted file mode 100644
index c3b21d60..00000000
--- a/libcommon/uprobes.c
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * Oracle Linux DTrace.
- * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
- * Licensed under the Universal Permissive License v 1.0 as shown at
- * http://oss.oracle.com/licenses/upl.
- */
-
-#include <ctype.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <libproc.h>
-#include <assert.h>
-#include <tracefs.h>
-
-/*
- * Return a uprobe spec for a given address in a given process handle.
- */
-char *
-uprobe_spec_by_addr(ps_prochandle *P, uint64_t addr, prmap_t *mapp_)
-{
-	char			*spec = NULL;
-	const prmap_t		*mapp, *first_mapp;
-	char			*mapfile_name = NULL;
-
-	mapp = Paddr_to_map(P, addr);
-	if (mapp == NULL)
-		goto out;
-
-	first_mapp = mapp->pr_file->first_segment;
-
-	/*
-	 * Use a name in /proc/$pid/map_files: this will work even if the
-	 * destination is in a different filesystem namespace.  Never use the
-	 * absolute path: not only might this not exist, but an *entirely
-	 * different file* might be found there in the namespace in which we
-	 * are running: prf_mapname is derived from /proc/$pid/maps, and the
-	 * names in there are not relative to the namespace of the reader
-	 * at all.
-	 */
-	mapfile_name = Pmap_mapfile_name(P, mapp);
-	if (!mapfile_name)
-		goto out;
-
-	/*
-	 * No need for error-checking here: we do the same on error
-	 * and success.
-	 */
-	asprintf(&spec, "%s:0x%lx", mapfile_name, addr - first_mapp->pr_vaddr);
-
-	if (mapp_)
-		memcpy(mapp_, mapp, sizeof(prmap_t));
-
-out:
-	free(mapfile_name);
-	return spec;
-}
-
-static const char hexdigits[] = "0123456789abcdef";
-
-/*
- * Encode a NAME suitably for representation in a uprobe.  All non-alphanumeric,
- * non-_ characters are replaced with __XX where XX is the hex encoding of the
- * ASCII code of the byte. __ itself is replaced with ___.
- */
-char *
-uprobe_encode_name(const char *name)
-{
-	const char *p = name;
-	char *out_p;
-	char *encoded;
-	size_t sz = strlen(name);
-
-	/*
-	 * Compute size changes needed.
-	 */
-
-	while ((p = strstr(p, "__")) != NULL) {
-		sz++;
-		p += 2;
-	}
-
-	for (p = name; *p != '\0'; p++) {
-		if (!isalpha(*p) && !isdigit(*p) && *p != '_')
-			sz += 3;
-	}
-
-	encoded = malloc(sz + 1);
-	if (!encoded)
-		return NULL;
-	out_p = encoded;
-
-	/* Apply translations.  */
-
-	for (p = name; *p != '\0'; p++) {
-		int hexencode = 0, underencode = 0;
-
-		if (!isalpha(*p) && !isdigit(*p) && *p != '_')
-			hexencode = 1;
-		if (p[0] == '_' && p[1] == '_' && p[2] != '\0')
-			underencode = 1;
-
-		if (underencode) {
-			*out_p++ = '_';
-			*out_p++ = '_';
-			*out_p++ = '_';
-			p++;
-			continue;
-		}
-
-		if (hexencode) {
-			*out_p++ = '_';
-			*out_p++ = '_';
-			*out_p++ = hexdigits[*p >> 4];
-			*out_p++ = hexdigits[*p & 0xf];
-		}
-		else
-			*out_p++ = *p;
-	}
-	*out_p = '\0';
-
-	return encoded;
-}
-
-/*
- * Decode a NAME: the converse of uprobe_encode_name.
- */
-char *
-uprobe_decode_name(const char *name)
-{
-	const char *p = name;
-	char *new_p, *out_p;
-	char *decoded;
-	size_t sz = strlen(name);
-
-	/*
-	 * Compute size changes needed.
-	 */
-
-	while ((p = strstr(p, "__")) != NULL) {
-		if (p[3] == '_') {
-			sz--;
-			p += 3;
-		}
-		else if (strspn(&p[2], hexdigits) >= 2) {
-			sz -= 3;
-			p += 4;
-		}
-	}
-
-	decoded = malloc(sz + 1);
-	if (!decoded)
-		return NULL;
-	out_p = decoded;
-
-	/* Apply translations.  */
-
-	p = name;
-	while ((new_p = strstr(p, "__")) != NULL) {
-
-		/*
-		 * Copy unchanged bytes.
-		 */
-		memcpy(out_p, p, new_p - p);
-		out_p += new_p - p;
-		p = new_p;
-
-		if (p[3] == '_') {
-			*out_p++ = '_';
-			*out_p++ = '_';
-			p += 3;
-		} else if (strspn(&p[2], hexdigits) >= 2) {
-			if (isdigit(p[2]))
-				*out_p = (p[2] - '0') << 4;
-			else
-				*out_p = (p[2] - 'a' + 10) << 4;
-			if (isdigit(p[3]))
-				*out_p += p[3] - '0';
-			else
-				*out_p += p[3] - 'a' + 10;
-			p += 4;
-			out_p++;
-		}
-		else {
-			*out_p++ = '_';
-			*out_p++ = '_';
-			p += 2;
-		}
-	}
-	/*
-	 * Copy the remainder.
-	 */
-	strcpy(out_p, p);
-
-	return decoded;
-}
-
-char *
-uprobe_name(dev_t dev, ino_t ino, uint64_t addr, int isret, int is_enabled)
-{
-	char	*name;
-
-	if (asprintf(&name, "dt_pid%s/%c_%llx_%llx_%lx", is_enabled?"_is_enabled":"",
-		     isret ? 'r' : 'p', (unsigned long long)dev,
-		     (unsigned long long)ino, (unsigned long)addr) < 0)
-		return NULL;
-
-	return name;
-}
-
-/*
- * Create a uprobe for a given device, address, and spec: the uprobe may be a
- * uretprobe.  Return the probe's name as a new dynamically-allocated string, or
- * NULL on error.  If prv/mod/fun/prb are all set, they are passed down as the
- * name of the corresponding DTrace probe.
- */
-char *
-uprobe_create_named(dev_t dev, ino_t ino, uint64_t addr, const char *spec, int isret,
-		    int is_enabled, const char *prv, const char *mod, const char *fun,
-		    const char *prb)
-{
-	int	fd = -1;
-	int	rc = -1;
-	char	*name, *args = NULL;
-
-	if (prv && mod && fun && prb) {
-		char *eprv, *emod, *efun, *eprb;
-		int failed = 0;
-
-		eprv = uprobe_encode_name(prv);
-		emod = uprobe_encode_name(mod);
-		efun = uprobe_encode_name(fun);
-		eprb = uprobe_encode_name(prb);
-
-		if (eprv && emod && efun && eprb) {
-			if (asprintf(&args, "P%s=\\1 M%s=\\2 F%s=\\3 N%s=\\4",
-				     eprv, emod, efun, eprb) < 0)
-				failed = 1;
-		} else
-			failed = 1;
-
-		free(eprv);
-		free(emod);
-		free(efun);
-		free(eprb);
-
-		if (failed)
-			return NULL;
-	}
-
-	name = uprobe_name(dev, ino, addr, isret, is_enabled);
-	if (!name)
-		goto out;
-
-	/* Add the uprobe. */
-	fd = open(TRACEFS "uprobe_events", O_WRONLY | O_APPEND);
-	if (fd == -1)
-		goto out;
-
-	rc = dprintf(fd, "%c:%s %s %s\n", isret ? 'r' : 'p', name, spec,
-		     args ? args : "");
-
-out:
-	if (fd != -1)
-		close(fd);
-	free(args);
-	if (rc < 0) {
-		free(name);
-		return NULL;
-	}
-
-	return name;
-}
-
-/*
- * Like uprobe_create_named, but do not specify the name of a corresponding DTrace
- * probe.  (Used when the caller already knows what probe will be needed, and
- * there is no possibility of another DTrace having to pick it up from the
- * systemwide uprobe list.)
- */
-char *
-uprobe_create(dev_t dev, ino_t ino, uint64_t addr, const char *spec, int isret,
-	int is_enabled)
-{
-	return uprobe_create_named(dev, ino, addr, spec, isret, is_enabled,
-				   NULL, NULL, NULL, NULL);
-}
-
-/*
- * Create a uprobe given a particular process and address.  Return the probe's
- * name as a new dynamically-allocated string, or NULL on error.  If
- * prv/mod/fun/prb are set, they are passed down as the name of the
- * corresponding DTrace probe.
- */
-char *
-uprobe_create_from_addr(ps_prochandle *P, uint64_t addr, int is_enabled,
-			const char *prv, const char *mod, const char *fun,
-			const char *prb)
-{
-	char *spec;
-	char *name;
-	prmap_t mapp;
-
-	spec = uprobe_spec_by_addr(P, addr, &mapp);
-	if (!spec)
-		return NULL;
-
-	addr -= mapp.pr_file->first_segment->pr_vaddr;
-	name = uprobe_create_named(mapp.pr_dev, mapp.pr_inum, addr, spec, 0,
-				   is_enabled, prv, mod, fun, prb);
-	free(spec);
-	return name;
-}
-
-/*
- * Destroy a uprobe for a given device and address.
- */
-int
-uprobe_delete(dev_t dev, ino_t ino, uint64_t addr, int isret, int is_enabled)
-{
-	int	fd = -1;
-	int	rc = -1;
-	char	*name;
-
-	name = uprobe_name(dev, ino, addr, isret, is_enabled);
-	if (!name)
-		goto out;
-
-	fd = open(TRACEFS "uprobe_events", O_WRONLY | O_APPEND);
-	if (fd == -1)
-		goto out;
-
-	rc = dprintf(fd, "-:%s\n", name);
-
-out:
-	if (fd != -1)
-		close(fd);
-	free(name);
-
-	return rc < 0 ? -1 : 0;
-}
diff --git a/libcommon/uprobes.h b/libcommon/uprobes.h
deleted file mode 100644
index dac2872e..00000000
--- a/libcommon/uprobes.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Oracle Linux DTrace; simple uprobe helper functions
- * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
- * Licensed under the Universal Permissive License v 1.0 as shown at
- * http://oss.oracle.com/licenses/upl.
- */
-
-#ifndef	_UPROBES_H
-#define	_UPROBES_H
-
-#include <sys/types.h>
-#include <inttypes.h>
-#include <libproc.h>
-#include <unistd.h>
-
-extern char *uprobe_spec_by_addr(ps_prochandle *P, uint64_t addr,
-				 prmap_t *mapp);
-extern char *uprobe_name(dev_t dev, ino_t ino, uint64_t addr, int isret,
-			 int is_enabled);
-extern char *uprobe_create_named(dev_t dev, ino_t ino, uint64_t addr,
-				 const char *spec, int isret, int is_enabled,
-				 const char *prv, const char *mod,
-				 const char *fun, const char *prb);
-extern char *uprobe_create(dev_t dev, ino_t ino, uint64_t addr, const char *spec,
-			   int isret, int is_enabled);
-extern char *uprobe_create_from_addr(ps_prochandle *P, uint64_t addr,
-				     int is_enabled, const char *prv,
-				     const char *mod, const char *fun,
-				     const char *prb);
-extern int uprobe_delete(dev_t dev, ino_t ino, uint64_t addr, int isret,
-			 int is_enabled);
-extern char *uprobe_encode_name(const char *);
-extern char *uprobe_decode_name(const char *);
-
-#endif /* _UPROBES_H */
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 4932f374..32ccfe44 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -88,7 +88,6 @@ struct dt_pfdict;		/* see <dt_printf.h> */
 struct dt_arg;			/* see below */
 struct dt_provider;		/* see <dt_provider.h> */
 struct dt_probe;		/* see <dt_probe.h> */
-struct pid_probespec;		/* see <pid.h> */
 struct dt_pebset;		/* see <dt_peb.h> */
 struct dt_xlator;		/* see <dt_xlator.h> */
 
@@ -362,13 +361,6 @@ struct dtrace_hdl {
 	size_t dt_probes_sz;	/* size of array of probes */
 	uint32_t dt_probe_id;	/* next available probe id */
 
-	/*
-	 * uprobes potentially of interest: some may be instantiated as
-	 * dtrace probes.
-	 */
-	struct pid_probespec *dt_uprobespecs;
-	size_t dt_uprobespecs_sz; /* size of array of uprobes */
-
 	struct dt_probe *dt_error; /* ERROR probe */
 
 	dt_htab_t *dt_provs;	/* hash table of dt_provider_t's */
@@ -406,6 +398,7 @@ struct dtrace_hdl {
 	dt_list_t dt_lib_path;	/* linked-list forming library search path */
 	char *dt_module_path;	/* pathname of kernel module root */
 	dt_version_t dt_kernver;/* kernel version, used in the libpath */
+	char *dt_dofstash_path;	/* Path to the DOF stash.  */
 	uid_t dt_useruid;	/* lowest non-system uid: set via -xuseruid */
 	char *dt_sysslice;	/* the systemd system slice: set via -xsysslice */
 	uint_t dt_lazyload;	/* boolean:  set via -xlazyload */
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index a0a2cefb..53db0d6d 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -612,6 +612,7 @@ const dtrace_pattr_t _dtrace_prvdesc = {
 static const char *_dtrace_defcpp = "cpp"; /* default cpp(1) to invoke */
 static const char *_dtrace_defld = "ld";   /* default ld(1) to invoke */
 static const char *_dtrace_defproc = "/proc";   /* default /proc path */
+static const char *_dtrace_defdofstash = "/run/dtrace";   /* default DOF stash path */
 static const char *_dtrace_defsysslice = ":/system.slice/"; /* default systemd
 							       system slice */
 
@@ -747,6 +748,7 @@ dt_vopen(int version, int flags, int *errp,
 	dtp->dt_ld_path = strdup(_dtrace_defld);
 	Pset_procfs_path(_dtrace_defproc);
 	dtp->dt_sysslice = strdup(_dtrace_defsysslice);
+	dtp->dt_dofstash_path = strdup(_dtrace_defdofstash);
 	dtp->dt_useruid = DTRACE_USER_UID;
 	dtp->dt_vector = vector;
 	dtp->dt_varg = arg;
@@ -1303,7 +1305,6 @@ dtrace_close(dtrace_hdl_t *dtp)
 	dt_pfdict_destroy(dtp);
 	dt_dof_fini(dtp);
 	dt_probe_fini(dtp);
-	dt_pid_free_uprobespecs(dtp);
 
 	/*
 	 * FIXME:
@@ -1326,6 +1327,7 @@ dtrace_close(dtrace_hdl_t *dtp)
 	free(dtp->dt_cpp_path);
 	free(dtp->dt_ld_path);
 	free(dtp->dt_sysslice);
+	free(dtp->dt_dofstash_path);
 
 	free(dtp->dt_freopen_filename);
 	free(dtp->dt_sprintf_buf);
diff --git a/libdtrace/dt_options.c b/libdtrace/dt_options.c
index 2261dc3a..997f11a2 100644
--- a/libdtrace/dt_options.c
+++ b/libdtrace/dt_options.c
@@ -421,6 +421,23 @@ dt_opt_disasm(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
 	return 0;
 }
 
+/*ARGSUSED*/
+static int
+dt_opt_dofstash_path(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
+{
+	char *path;
+
+	if (arg == NULL)
+		return dt_set_errno(dtp, EDT_BADOPTVAL);
+
+	if ((path = strdup(arg)) == NULL)
+		return dt_set_errno(dtp, EDT_NOMEM);
+	free(dtp->dt_dofstash_path);
+	dtp->dt_dofstash_path = path;
+
+	return 0;
+}
+
 /*ARGSUSED*/
 static int
 dt_opt_evaltime(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
@@ -1123,6 +1140,7 @@ static const dt_option_t _dtrace_ctoptions[] = {
 	{ "debugassert", dt_opt_debug_assert },
 	{ "define", dt_opt_cpp_opts, (uintptr_t)"-D" },
 	{ "disasm", dt_opt_disasm },
+	{ "dofstashpath", dt_opt_dofstash_path },
 	{ "droptags", dt_opt_droptags },
 	{ "dtypes", dt_opt_dtypes },
 	{ "empty", dt_opt_cflags, DTRACE_C_EMPTY },
diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
index a6d58857..ffec3eed 100644
--- a/libdtrace/dt_pid.c
+++ b/libdtrace/dt_pid.c
@@ -5,7 +5,15 @@
  * http://oss.oracle.com/licenses/upl.
  */
 
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <stddef.h>
 #include <assert.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <glob.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -22,7 +30,7 @@
 #endif
 
 #include <port.h>
-#include <uprobes.h>
+#include <dof_parser.h>
 
 #include <dt_impl.h>
 #include <dt_program.h>
@@ -31,7 +39,7 @@
 #include <dt_string.h>
 
 /*
- * Information on a PID or USDT probe.
+ * Information on a PID probe.
  */
 typedef struct dt_pid_probe {
 	dtrace_hdl_t *dpp_dtp;
@@ -62,15 +70,12 @@ static char *
 dt_pid_objname(Lmid_t lmid, const char *obj)
 {
 	char *buf;
-	int len;
 
 	if (lmid == LM_ID_BASE)
 		return strdup(obj);
 
-	len = snprintf(NULL, 0, "LM%lx`%s", lmid, obj) + 1;
-	buf = malloc(len);
-	if (buf)
-		snprintf(buf, len, "LM%lx`%s", lmid, obj);
+	if (asprintf(&buf, "LM%lx`%s", lmid, obj) < 0)
+		return NULL;
 
 	return buf;
 }
@@ -99,25 +104,6 @@ dt_pid_error(dtrace_hdl_t *dtp, dt_pcb_t *pcb, dt_proc_t *dpr,
 	return 1;
 }
 
-void
-dt_pid_free_uprobespecs(dtrace_hdl_t *dtp)
-{
-	size_t i;
-
-	if (!dtp->dt_uprobespecs)
-		return;
-
-	for (i = 0; i < dtp->dt_uprobespecs_sz; i++) {
-		free(dtp->dt_uprobespecs[i].pps_prv);
-		free(dtp->dt_uprobespecs[i].pps_mod);
-		free(dtp->dt_uprobespecs[i].pps_fun);
-		free(dtp->dt_uprobespecs[i].pps_prb);
-	}
-
-	free(dtp->dt_uprobespecs);
-	dtp->dt_uprobespecs = NULL;
-}
-
 static int
 dt_pid_create_one_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     pid_probespec_t *psp, const GElf_Sym *symp, pid_probetype_t type)
@@ -716,173 +702,83 @@ dt_pid_create_pid_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp,
 
 	return ret;
 }
-
 /*
- * Scan the uprobe list and remember its contents.
- *
- * This avoids us having to rescan the whole thing every time we create every
- * single probe in turn.
+ * Read a file into a buffer and return it.
  */
-static int
-dt_pid_scan_uprobes(dtrace_hdl_t *dtp, dt_pcb_t *pcb)
+static void *
+read_file(const char *name, size_t *size)
 {
-	typedef struct uprobe_line
-	{
-		dt_list_t list;
-		char *line;
-		int is_enabled;
-	} uprobe_line_t;
-	dt_list_t lines = {0};
-	uprobe_line_t *linep, *old_linep;
-	size_t i = 0;
-	int ret = 0;
-
-	FILE *f;
+	int fd;
+	struct stat s;
 	char *buf = NULL;
-	size_t sz;
+	char *bufptr;
+	int len;
 
-	f = fopen(TRACEFS "uprobe_events", "r");
-	if (!f) {
-		dt_dprintf("cannot open " TRACEFS "uprobe_events: %s\n",
-		    strerror(errno));
-		return -1;
+	if ((fd = open(name, O_RDONLY | O_CLOEXEC)) < 0) {
+		dt_dprintf("cannot open %s while scanning for USDT DOF: %s\n",
+			   name, strerror(errno));
+		return NULL;
 	}
 
-	/*
-	 * We are only interested in pid uprobes, not any other uprobes that may
-	 * exist.  Some of these may be for pid probes, some for usdt: we keep
-	 * track of all of them regardless.
-	 *
-	 * Suck in the list of uprobes in one go, since we need to run over it
-	 * twice (once to count pids and allocate space, once to populate them)
-	 * and it might change between reads.
-	 */
-
-#define UPROBE_PREFIX "p:dt_pid/p_"
-#define UPROBE_IS_ENABLED_PREFIX "p:dt_pid_is_enabled/p_"
-
-	while (getline(&buf, &sz, f) >= 0) {
-		uprobe_line_t *line;
-		int is_enabled;
-
-		if (strncmp(buf, UPROBE_PREFIX,
-			strlen(UPROBE_PREFIX)) == 0)
-			is_enabled = 0;
-		else if (strncmp(buf, UPROBE_IS_ENABLED_PREFIX,
-			strlen(UPROBE_IS_ENABLED_PREFIX)) == 0)
-			is_enabled = 1;
-		else
+	if (fstat(fd, &s) < 0) {
+		dt_dprintf("cannot stat while scanning for USDT DOF: %s\n",
+			   strerror(errno));
+		goto err;
+	}
+	if ((buf = malloc(s.st_size)) == NULL) {
+		dt_dprintf("Out of memory allocating %zi bytes while scanning for USDT DOF\n",
+			   s.st_size);
+		goto err;
+	}
+	*size = s.st_size;
+
+	bufptr = buf;
+	while ((len = read(fd, bufptr, s.st_size)) < s.st_size) {
+		if (len < 0) {
+			if (errno != EINTR) {
+				dt_dprintf("Cannot read USDT DOF: %s\n",
+					   strerror(errno));
+				goto err;
+			}
 			continue;
-
-		line = dt_zalloc(dtp, sizeof (struct uprobe_line));
-		if (!line) {
-			fclose(f);
-			goto err; 		/* errno is set for us. */
 		}
-
-		line->line = buf;
-		line->is_enabled = is_enabled;
-		dt_list_append(&lines, line);
-		sz = 0;
-		dtp->dt_uprobespecs_sz++;
-		buf = NULL;
+		s.st_size -= len;
+		bufptr += len;
 	}
-	fclose(f);
-
-	dtp->dt_uprobespecs = dt_calloc(dtp, dtp->dt_uprobespecs_sz,
-	    sizeof(pid_probespec_t));
-	if (!dtp->dt_uprobespecs)
-		goto err;			/* errno is set for us.  */
-
-	/*
-	 * Now we know how many specs exist, parse and create them.
-	 */
-	for (linep = dt_list_next(&lines); linep != NULL;
-	     linep = dt_list_next(linep)) {
-		uint64_t off;
-		const char *fmt;
-		unsigned long long dev, inum;
-		char *spec = NULL;
-		char *eprv = NULL, *emod = NULL, *efun = NULL, *eprb = NULL;
-		char *prv = NULL, *mod = NULL, *fun = NULL, *prb = NULL;
-		pid_probespec_t *psp;
-
-#define UPROBE_PROBE_FMT "%llx_%llx_%lx %ms P%m[^= ]=\\1 M%m[^= ]=\\2 F%m[^= ]=\\3 N%m[^= ]=\\4"
-#define UPROBE_FMT UPROBE_PREFIX UPROBE_PROBE_FMT
-#define UPROBE_IS_ENABLED_FMT UPROBE_IS_ENABLED_PREFIX UPROBE_PROBE_FMT
-
-		if (!linep->is_enabled)
-			fmt = UPROBE_FMT;
-		else
-			fmt = UPROBE_IS_ENABLED_FMT;
-
-		switch (sscanf(linep->line, fmt, &dev, &inum,
-			       &off, &spec, &eprv, &emod, &efun, &eprb)) {
-		case 8: /* Includes dtrace probe names: decode them. */
-			prv = uprobe_decode_name(eprv);
-			mod = uprobe_decode_name(emod);
-			fun = uprobe_decode_name(efun);
-			prb = uprobe_decode_name(eprb);
-			break;
-		case 4: /* No dtrace probe name - not a USDT probe. */
-			goto next;
-		default:
-			if ((strlen(linep->line) > 0) &&
-			    (linep->line[strlen(linep->line)-1] == '\n'))
-				linep->line[strlen(linep->line)-1] = 0;
-			dt_dprintf("Cannot parse %s as a DTrace uprobe name\n",
-			    linep->line);
-			dtp->dt_uprobespecs_sz--;
-			goto next;
-		}
-
-		psp = &dtp->dt_uprobespecs[i++];
-		psp->pps_type = linep->is_enabled ? DTPPT_IS_ENABLED : DTPPT_OFFSETS;
-		psp->pps_nameoff = 0;
-
-		/*
-		 * These components are only used for creation of an underlying
-		 * probe with no overlying counterpart: usually these are those
-		 * not explicitly listed in the D program, which will never be
-		 * enabled.  In future this may change.
-		 */
-		psp->pps_prv = prv;
-		psp->pps_mod = mod;
-		psp->pps_fun = fun;
-		psp->pps_prb = prb;
+	close(fd);
+	return buf;
+err:
+	free(buf);
+	close(fd);
+	return NULL;
+}
 
-		/*
-		 * Always used.
-		 */
-		psp->pps_dev = dev;
-		psp->pps_inum = inum;
-		psp->pps_off = off;
-	next:
-		free(eprv); free(emod); free(efun); free(eprb);
-		free(spec);
+/*
+ * A quick check that a parsed DOF record read hasn't incurred a buffer overrun
+ * and is of the type expected.
+ */
+static int
+validate_dof_record(const char *path, const dof_parsed_t *parsed,
+		    dof_parsed_info_t expected, size_t buf_size,
+		    size_t seen_size)
+{
+	if (buf_size < seen_size) {
+		dt_dprintf("DOF too small when adding probes (seen %zi bytes)\n",
+			   seen_size);
+		return 0;
 	}
 
-	goto out;
-
-err:
-	ret = -1;
-
-out:
-	old_linep = NULL;
-
-	for (linep = dt_list_next(&lines); linep != NULL;
-	     linep = dt_list_next(linep)) {
-		free(linep->line);
-		free(old_linep);
-		old_linep = linep;
+	if (parsed->type != expected) {
+		dt_dprintf("%s format invalid: expected %i, got %i\n", path,
+			   expected, parsed->type);
+		return 0;
 	}
-	free(old_linep);
-
-	return ret;
+	return 1;
 }
 
+
 /*
- * Rescan the PID uprobe list and create suitable underlying probes.
+ * Create underlying probes relating to the probespec passed on input.
  *
  * If dpr is set, just set up probes relating to mappings found in that one
  * process.  (dpr must in this case be locked.)
@@ -891,81 +787,227 @@ out:
  * probes is not an error.)
  */
 static int
-dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr, dt_pcb_t *pcb)
+dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr, dtrace_probedesc_t *pdp,
+			  dt_pcb_t *pcb)
 {
 	const dt_provider_t *pvp;
-	size_t i;
 	int ret = 0;
+	char *probepath = NULL;
+	glob_t probeglob = {0};
 
 	/*
 	 * Systemwide probing: not yet implemented.
 	 */
-	assert(dpr != NULL);
+	assert(dpr != NULL && dpr->dpr_proc);
+	assert(MUTEX_HELD(&dpr->dpr_lock));
 
-	dt_dprintf("Scanning for usdt probes matching %i\n", dpr->dpr_pid);
+	dt_dprintf("Scanning for usdt probes in %i matching %s:%s:%s\n",
+		   dpr->dpr_pid, pdp->mod, pdp->fun, pdp->prb);
+
+	pvp = dt_provider_lookup(dtp, "usdt");
+	assert(pvp != NULL);
+
+	if (Pstate(dpr->dpr_proc) == PS_DEAD)
+		return 0;
 
 	/*
-	 * For now, we only read the list of probes once.  In time we will
-	 * reread it whenever necessary.
+	 * Look for DOF matching this probe in the global probe DOF stash, in
+	 * /run/dtrace/probes/$pid/$pid$prv/$mod/$fun/$prb: glob expansion means
+	 * that this may relate to multiple probes.  (This is why we retain
+	 * a run-together $pid$prv component, because the glob may match text on
+	 * both sides of the boundary between $pid and $prv.)
+	 *
+	 * Using this is safe because the parsed DOF is guaranteed up to date
+	 * with the current DTrace, being reparsed by the currently-running
+	 * daemon, and was parsed in a seccomp jail.  The most a process can do
+	 * by messing with this is force probes to be dropped in the wrong place
+	 * in itself: and if a process wants to perturb tracing of itself there
+	 * are many simpler ways, such as overwriting the DOF symbol before the
+	 * ELF constructor runs, etc.
+	 *
+	 * Note: future use of parsed DOF (after DTrace has been running for a
+	 * while) may not be safe, since the daemon may be newer than DTrace
+	 * and thus have newer parsed DOF. A version comparison will suffice to
+	 * check that: for safety we do it here too.
 	 */
-	pvp = dtp->dt_prov_usdt;
-	if (!pvp) {
-		pvp = dt_provider_lookup(dtp, "usdt");
-		assert(pvp != NULL);
-		dtp->dt_prov_usdt = pvp;
-		if (dt_pid_scan_uprobes(dtp, pcb) < 0)
-			return -1;		/* errno is set for us.  */
-	}
 
 	assert(pvp->impl != NULL && pvp->impl->provide_probe != NULL);
 
-	/*
-	 * We are only interested in pid uprobes, not any other uprobes that may
-	 * exist.  Some of these may be for pid probes, some for usdt: we create
-	 * underlying probes for all of them, if we are interested in creating
-	 * mappings for that process at all.
-	 */
-	for (i = 0; i < dtp->dt_uprobespecs_sz; i++) {
-		pid_probespec_t *psp = &dtp->dt_uprobespecs[i];
+	if (strchr(pdp->prv, '.') != NULL ||
+	    strchr(pdp->mod, '.') != NULL ||
+	    strchr(pdp->fun, '.') != NULL ||
+	    strchr(pdp->prb, '.') != NULL) {
+		dt_dprintf("Probe component contains dots: cannot be a USDT probe.\n");
+		return 0;
+	}
+
+	if (asprintf(&probepath, "%s/probes/%i/%s/%s/%s/%s", dtp->dt_dofstash_path,
+		     dpr->dpr_pid, pdp->prv[0] == '\0' ? "*" : pdp->prv,
+		     pdp->mod[0] == '\0' ? "*" : pdp->mod,
+		     pdp->fun[0] == '\0' ? "*" : pdp->fun,
+		     pdp->prb[0] == '\0' ? "*" : pdp->prb) < 0)
+		goto scan_err;
 
+	switch(glob(probepath, GLOB_NOSORT | GLOB_ERR | GLOB_PERIOD, NULL, &probeglob)) {
+	case GLOB_NOSPACE:
+	case GLOB_ABORTED:
 		/*
-		 * Filter out probes not related to the process of interest.
+		 * Directory missing?  PID not present or has no DOF, which is
+		 * fine, though it might lead to a match failure later on.
 		 */
-		if (dpr && dpr->dpr_proc) {
-			assert(MUTEX_HELD(&dpr->dpr_lock));
-			if (Pinode_to_file_map(dpr->dpr_proc, psp->pps_dev,
-				psp->pps_inum) == NULL)
-				continue;
+		if (errno == ENOENT)
+			return 0;
 
-			/*
-			 * This is overwritten repeatedly with each relevant PID
-			 * in turn.
-			 */
-			psp->pps_pid = Pgetpid(dpr->dpr_proc);
+		dt_dprintf("Cannot glob probe components in %s: %s\n", probepath, strerror(errno));
+		goto scan_err;
+	case GLOB_NOMATCH:
+		/* No probes match, which is fine. */
+		return 0;
+	}
+
+	for (size_t i = 0; i < probeglob.gl_pathc; i++) {
+		char *dof_buf = NULL, *p;
+		struct stat s;
+		char *path;
+		size_t dof_buf_size, seen_size = 0;
+		uint64_t *dof_version;
+		char *prv, *mod, *fun, *prb;
+		dof_parsed_t *provider, *probe;
+
+		/*
+		 * Regular files only: in particular, skip . and ..,
+		 * which can appear due to GLOB_PERIOD.
+		 */
+		if ((lstat(probeglob.gl_pathv[i], &s) < 0) ||
+		    (!S_ISREG(s.st_mode)))
+			continue;
+
+		path = strdup(probeglob.gl_pathv[i]);
+		if (path == NULL)
+			goto per_mapping_err;
+
+		dof_buf = read_file(path, &dof_buf_size);
+		if (dof_buf == NULL)
+			goto per_mapping_err;
+		dof_version = (uint64_t *) dof_buf;
+		if (*dof_version != DOF_PARSED_VERSION) {
+			dt_dprintf("Parsed DOF version incorrect (daemon / running DTrace version skew?) %lli (daemon) versus %i (DTrace)\n",
+				   (long long) *dof_version, DOF_PARSED_VERSION);
+			goto per_mapping_err;
 		}
+		p = dof_buf + sizeof(uint64_t);
+		dof_buf_size -= sizeof(uint64_t);
 
 		/*
-		 * Create an underlying probe using psp, if not already present.
-		 *
-		 * Complain if any probe cannot be created: at this stage we
-		 * cannot reliably tell whether a corresponding overlying probe
-		 * will be created (since dt_setcontext only calls us for the
-		 * first one in any given provider).
+		 * The first two pieces of parsed DOF are always provider and
+		 * probe.
+		 */
+		provider = (dof_parsed_t *) p;
+		if (!validate_dof_record(path, provider, DIT_PROVIDER, dof_buf_size,
+					 seen_size))
+                        goto parse_err;
+
+		prv = provider->provider.name;
+
+		p += provider->size;
+		seen_size += provider->size;
+
+		probe = (dof_parsed_t *) p;
+		if (!validate_dof_record(path, probe, DIT_PROBE, dof_buf_size,
+					 seen_size))
+                        goto parse_err;
+
+                mod = probe->probe.name;
+		fun = mod + strlen(mod) + 1;
+		prb = fun + strlen(fun) + 1;
+
+		p += probe->size;
+		seen_size += probe->size;
+
+                /*
+		 * Now the parsed DOF for this probe's tracepoints.
 		 */
+		for (size_t j = 0; j < probe->probe.ntp; j++) {
+			dof_parsed_t *tp = (dof_parsed_t *) p;
+			pid_probespec_t psp = {0};
+			const prmap_t *pmp;
+
+			if (!validate_dof_record(path, tp, DIT_TRACEPOINT,
+						 dof_buf_size, seen_size))
+				goto parse_err;
 
-		dt_dprintf("providing %s:%s:%s:%s\n", psp->pps_prv, psp->pps_mod,
-			   psp->pps_fun, psp->pps_prb);
-		if (pvp->impl->provide_probe(dtp, psp) < 0) {
-			dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
-				     "failed to instantiate %sprobe %s for pid %d: %s",
-				     psp->pps_type == DTPPT_IS_ENABLED ?
-				     "is-enabled ": "", psp->pps_prb, dpr->dpr_pid,
-				     dtrace_errmsg(dtp, dtrace_errno(dtp)));
-			ret = -1;
+			p += tp->size;
+			seen_size += tp->size;
+
+			/*
+			 * Check for process death in the inner loop to handle
+			 * the process dying while its DOF is being pulled in.
+			 */
+			if (Pstate(dpr->dpr_proc) == PS_DEAD)
+				continue;
+
+			pmp = Paddr_to_map(dpr->dpr_proc, tp->tracepoint.addr);
+			if (!pmp) {
+				dt_dprintf("%i: cannot determine 0x%lx's mapping\n",
+					   Pgetpid(dpr->dpr_proc), tp->tracepoint.addr);
+				continue;
+			}
+
+			psp.pps_fn = Pmap_mapfile_name(dpr->dpr_proc, pmp);
+			if (psp.pps_fn == NULL) {
+				dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
+					     "Cannot get name of mapping containing "
+					     "%sprobe %s for pid %d\n",
+					     tp->tracepoint.is_enabled ? "is-enabled ": "",
+					     psp.pps_prb, dpr->dpr_pid);
+				goto oom;
+			}
+
+			psp.pps_type = tp->tracepoint.is_enabled ? DTPPT_IS_ENABLED : DTPPT_OFFSETS;
+			psp.pps_prv = prv;
+			psp.pps_mod = mod;
+			psp.pps_fun = fun;
+			psp.pps_prb = prb;
+			psp.pps_dev = pmp->pr_dev;
+			psp.pps_inum = pmp->pr_inum;
+			psp.pps_pid = dpr->dpr_pid;
+			psp.pps_off = tp->tracepoint.addr - pmp->pr_file->first_segment->pr_vaddr;
+			psp.pps_nameoff = 0;
+
+			dt_dprintf("providing %s:%s:%s:%s for pid %d\n", psp.pps_prv,
+				   psp.pps_mod, psp.pps_fun, psp.pps_prb, psp.pps_pid);
+			if (pvp->impl->provide_probe(dtp, &psp) < 0) {
+				dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
+					     "failed to instantiate %sprobe %s for pid %d: %s",
+					     tp->tracepoint.is_enabled ? "is-enabled ": "",
+					     psp.pps_prb, psp.pps_pid,
+					     dtrace_errmsg(dtp, dtrace_errno(dtp)));
+				ret = -1;
+			}
+			free(psp.pps_fn);
 		}
+
+		free(path);
+		free(dof_buf);
+		continue;
+
+	  parse_err:
+		dt_dprintf("Parsed DOF corrupt. This should never happen.\n");
+	  oom: ;
+	  per_mapping_err:
+		free(path);
+		free(dof_buf);
+		globfree(&probeglob);
+		return -1;
 	}
 
+	globfree(&probeglob);
 	return ret;
+
+scan_err:
+	dt_dprintf("Cannot read DOF stash directory %s: %s\n",
+		   probepath, strerror(errno));
+	return -1;
 }
 
 #if 0 /* Almost certainly unnecessary in this form */
@@ -1093,7 +1135,8 @@ dt_pid_create_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb)
 	 * If it's not strictly a pid provider, we might match a USDT provider.
 	 */
 	if (strcmp(provname, pdp->prv) != 0) {
-		if (dt_proc_grab_lock(dtp, pid, DTRACE_PROC_WAITING) < 0) {
+		if (dt_proc_grab_lock(dtp, pid, DTRACE_PROC_WAITING |
+				      DTRACE_PROC_SHORTLIVED) < 0) {
 			dt_pid_error(dtp, pcb, NULL, D_PROC_GRAB,
 			    "failed to grab process %d", (int)pid);
 			return -1;
@@ -1102,10 +1145,7 @@ dt_pid_create_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb)
 		dpr = dt_proc_lookup(dtp, pid);
 		assert(dpr != NULL);
 
-		if (!dpr->dpr_usdt) {
-			err = dt_pid_create_usdt_probes(dtp, dpr, pcb);
-			dpr->dpr_usdt = B_TRUE;
-		}
+		err = dt_pid_create_usdt_probes(dtp, dpr, pdp, pcb);
 
 		/*
 		 * Put the module name in its canonical form.
@@ -1155,7 +1195,7 @@ dt_pid_create_probes_module(dtrace_hdl_t *dtp, dt_proc_t *dpr)
 			 * a USDT provider.
 			 */
 			if (strcmp(provname, pdp->prv) != 0) {
-				if (dt_pid_create_usdt_probes(dtp, dpr, NULL) < 0)
+				if (dt_pid_create_usdt_probes(dtp, dpr, pdp, NULL) < 0)
 					ret = 1;
 				else
 					dt_pid_fix_mod(NULL, pdp, dtp, dpr->dpr_pid);
diff --git a/libdtrace/dt_pid.h b/libdtrace/dt_pid.h
index e06186eb..497c7751 100644
--- a/libdtrace/dt_pid.h
+++ b/libdtrace/dt_pid.h
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2024, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -21,7 +21,6 @@ extern int dt_pid_create_probes(dtrace_probedesc_t *, dtrace_hdl_t *,
 extern int dt_pid_create_probes_module(dtrace_hdl_t *, dt_proc_t *);
 extern pid_t dt_pid_get_pid(const dtrace_probedesc_t *, dtrace_hdl_t *, dt_pcb_t *,
 			    dt_proc_t *);
-extern void dt_pid_free_uprobespecs(dtrace_hdl_t *);
 
 #ifdef	__cplusplus
 }
diff --git a/libdtrace/dt_proc.h b/libdtrace/dt_proc.h
index 1ca1aeb6..77611f7f 100644
--- a/libdtrace/dt_proc.h
+++ b/libdtrace/dt_proc.h
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2007, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2024, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -41,7 +41,6 @@ typedef struct dt_proc {
 	uint_t dpr_refs;		/* reference count */
 	uint8_t dpr_stop;		/* stop mask: see flag bits below */
 	uint8_t dpr_done;		/* done flag: ctl thread has exited */
-	uint8_t dpr_usdt;		/* usdt flag: usdt probes created */
 	uint8_t dpr_created;            /* proc flag: true if we created this
 					   process, false if we grabbed it */
 	uint8_t dpr_monitoring;		/* true if we should background-monitor
diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
index 9206b084..e1f7e23f 100644
--- a/libdtrace/dt_prov_uprobe.c
+++ b/libdtrace/dt_prov_uprobe.c
@@ -20,16 +20,14 @@
 #include "dt_probe.h"
 #include "dt_pid.h"
 #include "dt_string.h"
-#include "uprobes.h"
 
 /* Provider name for the underlying probes. */
 static const char	prvname[] = "uprobe";
 static const char	prvname_is_enabled[] = "uprobe__is_enabled";
 
-#define PP_IS_MINE	1
-#define PP_IS_RETURN	2
-#define PP_IS_FUNCALL	4
-#define PP_IS_ENABLED	8
+#define PP_IS_RETURN	1
+#define PP_IS_FUNCALL	2
+#define PP_IS_ENABLED	4
 
 typedef struct dt_uprobe {
 	dev_t		dev;
@@ -179,8 +177,7 @@ static dt_probe_t *create_underlying(dtrace_hdl_t *dtp,
 		upp->dev = psp->pps_dev;
 		upp->inum = psp->pps_inum;
 		upp->off = psp->pps_off;
-		if (psp->pps_fn)
-			upp->fn = strdup(psp->pps_fn);
+		upp->fn = strdup(psp->pps_fn);
 		upp->tp = dt_tp_alloc(dtp);
 		if (upp->tp == NULL)
 			goto fail;
@@ -231,9 +228,9 @@ static int provide_probe(dtrace_hdl_t *dtp, const pid_probespec_t *psp,
 	pd.prb = prb;
 
 	/* Get (or create) the provider for the PID of the probe. */
-	pvp = dt_provider_lookup(dtp, prv);
+	pvp = dt_provider_lookup(dtp, pd.prb);
 	if (pvp == NULL) {
-		pvp = dt_provider_create(dtp, prv, pvops, &pattr, NULL);
+		pvp = dt_provider_create(dtp, pd.prv, pvops, &pattr, NULL);
 		if (pvp == NULL)
 			return -1;
 	}
@@ -318,7 +315,7 @@ static int provide_pid_probe(dtrace_hdl_t *dtp, const pid_probespec_t *psp)
 		return -1;
 	}
 
-	return provide_probe(dtp, psp, prb, &dt_usdt, PP_IS_MINE);
+	return provide_probe(dtp, psp, prb, &dt_pid, 0);
 }
 
 static int provide_usdt_probe(dtrace_hdl_t *dtp, const pid_probespec_t *psp)
@@ -587,6 +584,57 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
 	return 0;
 }
 
+static char *uprobe_name(dev_t dev, ino_t ino, uint64_t addr, int flags)
+{
+	char	*name;
+
+	if (asprintf(&name, "dt_pid%s/%c_%llx_%llx_%lx",
+		     flags & PP_IS_ENABLED ? "_is_enabled" : "",
+		     flags & PP_IS_RETURN ? 'r' : 'p', (unsigned long long)dev,
+		     (unsigned long long)ino, (unsigned long)addr) < 0)
+		return NULL;
+
+	return name;
+}
+
+/*
+ * Create a uprobe for a given dev/ino, mapping filename, and address: the
+ * uprobe may be a uretprobe or an is-enabled probe.  Return the probe's name as
+ * a new dynamically-allocated string, or NULL on error.
+ */
+static char *uprobe_create(dev_t dev, ino_t ino, const char *mapping_fn,
+			   uint64_t addr, int flags)
+{
+	int	fd = -1;
+	int	rc = -1;
+	char	*name;
+	char	*spec;
+
+	if (asprintf(&spec, "%s:0x%lx", mapping_fn, addr) < 0)
+		return NULL;
+
+	name = uprobe_name(dev, ino, addr, flags);
+	if (!name)
+		goto out;
+
+	/* Add the uprobe. */
+	fd = open(TRACEFS "uprobe_events", O_WRONLY | O_APPEND);
+	if (fd == -1)
+		goto out;
+
+	rc = dprintf(fd, "%c:%s %s\n", flags & PP_IS_RETURN ? 'r' : 'p', name, spec);
+
+out:
+	if (fd != -1)
+		close(fd);
+	if (rc < 0) {
+		free(name);
+		return NULL;
+	}
+
+	return name;
+}
+
 static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
 {
 	dt_uprobe_t	*upp = prp->prv_data;
@@ -599,29 +647,20 @@ static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
 	if (dt_tp_is_created(tpp))
 		goto attach_bpf;
 
-	if (upp->flags & PP_IS_MINE) {
-		char	*spec;
+	assert(upp->fn != NULL);
 
-		assert(upp->fn != NULL);
+	prb = uprobe_create(upp->dev, upp->inum, upp->fn, upp->off,
+			    upp->flags);
 
-		if (asprintf(&spec, "%s:0x%lx", upp->fn, upp->off) < 0)
-			return -ENOENT;
-
-		prb = uprobe_create(upp->dev, upp->inum, upp->off, spec,
-				    upp->flags & PP_IS_RETURN, 0);
-		free(spec);
-
-		/*
-		 * If the uprobe creation failed, it is possible it already
-		 * existed because someone else created it.  Try to access its
-		 * tracefs info and if that fail, we really failed.
-		 */
-	}
+	/*
+	 * If the uprobe creation failed, it is possible it already
+	 * existed because someone else created it.  Try to access its
+	 * tracefs info and if that fails, we really failed.
+	 */
 
 	if (prb == NULL)
 		prb = uprobe_name(upp->dev, upp->inum, upp->off,
-				  upp->flags & PP_IS_RETURN,
-				  upp->flags & PP_IS_ENABLED);
+				  upp->flags);
 
 	/* open format file */
 	rc = asprintf(&fn, "%s%s/format", EVENTSFS, prb);
@@ -653,6 +692,35 @@ static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
 	return 0;
 }
 
+/*
+ * Destroy a uprobe for a given device and address.
+ */
+static int
+uprobe_delete(dev_t dev, ino_t ino, uint64_t addr, int flags)
+{
+	int	fd = -1;
+	int	rc = -1;
+	char	*name;
+
+	name = uprobe_name(dev, ino, addr, flags);
+	if (!name)
+		goto out;
+
+	fd = open(TRACEFS "uprobe_events", O_WRONLY | O_APPEND);
+	if (fd == -1)
+		goto out;
+
+
+	rc = dprintf(fd, "-:%s\n", name);
+
+out:
+	if (fd != -1)
+		close(fd);
+	free(name);
+
+	return rc < 0 ? -1 : 0;
+}
+
 /*
  * Try to clean up system resources that may have been allocated for this
  * probe.
@@ -674,11 +742,7 @@ static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
 
 	dt_tp_detach(dtp, tpp);
 
-	if (!(upp->flags & PP_IS_MINE))
-		return;
-
-	uprobe_delete(upp->dev, upp->inum, upp->off, upp->flags & PP_IS_RETURN,
-		      upp->flags & PP_IS_ENABLED);
+	uprobe_delete(upp->dev, upp->inum, upp->off, upp->flags);
 }
 
 /*
-- 
2.42.0




More information about the DTrace-devel mailing list