[DTrace-devel] [PATCH 4/4] dtprobed: empty the DOF stash for exec()ed processes

Nick Alcock nick.alcock at oracle.com
Thu Aug 10 20:31:36 UTC 2023


The DOF stash, and USDT in general, currently have problems with fork()ed
and exec()ed processes. The worst of these problems is with exec().  Since
exec() replaces a process image with another atomically, ELF destructors do
not get to run, so dtprobed never receives a DTRACEHIOC_REMOVE request for
any of the DOF in it.  If the exec()ed process also contains DOF, this DOF
will add to the DOF already in the stash, and DTrace will think that the
exec()ed process contains all the probes in the original process in addition
to its own.

The fix is fairly simple, though it involves a bit of annoying refactoring:
record the (dev, ino) of the executable mapping of every process
contributing DOF in a new dof-pid file "exec-mapping", then compare this
with the executable mapping of the contributing procerss when new DOF is
contributed.  Any exec() will necessarily change this and we can clean out
the old DOF dir (hence the refactoring, since this adds a second thing
beyond stale process cleanup that wants to delete whole PIDs out of the DOF
stash).

Testing this is a little painful because DTrace proper does not detect
exec() and rescan for new USDT probes (it would also need to wait until the
new process had run its drti ELF constructor); but it can be done.

Technically there is still one case in which the DOF can go stale: if a
process does dlopen()s, then exec()s itself.   This is fixable (by detecting
new-DOF contributions from a mapping that we already know about for this
PID), but is a slightly distinct fix with a different testcase, so is best
put in a separate commit.

Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
---
 dtprobed/dof_stash.c                          | 379 ++++++++++++------
 dtprobed/dof_stash.h                          |  22 +-
 dtprobed/dtprobed.c                           |  38 +-
 test/unittest/usdt/tst.exec-dof-replacement.r |   1 +
 .../usdt/tst.exec-dof-replacement.r.p         |   2 +
 .../unittest/usdt/tst.exec-dof-replacement.sh |  99 +++++
 6 files changed, 391 insertions(+), 150 deletions(-)
 create mode 100644 test/unittest/usdt/tst.exec-dof-replacement.r
 create mode 100755 test/unittest/usdt/tst.exec-dof-replacement.r.p
 create mode 100755 test/unittest/usdt/tst.exec-dof-replacement.sh

diff --git a/dtprobed/dof_stash.c b/dtprobed/dof_stash.c
index 0e25f85bcd7be..e42331ebe503a 100644
--- a/dtprobed/dof_stash.c
+++ b/dtprobed/dof_stash.c
@@ -34,6 +34,10 @@
  * until its deletion (short-lived processes, mostly).  Deleted at daemon
  * startup.
  *
+ * /usr/dtrace/stash/dof-pid/$pid/exec-mapping: a dev/ino pair in the form
+ * $dev-$ino (as in the $dev-$ino directory entries): the dev/ino of the
+ * process's primary text mapping, as given by libproc.
+ *
  * /run/dtrace/stash/dof-pid/$pid/$gen: symlink to a $dev-$ino dir. "$gen" is an
  * incrementing generation counter starting at zero for every unique PID, used
  * by clients to uniquely identify DOF pieces to remove.
@@ -418,17 +422,72 @@ early_err:
 }
 
 /*
- * Write out the DOF helper.  A trivial wrapper.
+ * Read a file into a buffer and return it.  If READ_SIZE is non-negative, read
+ * only that many bytes (and silently fail if the file isn't at least that
+ * long).
+ */
+static void *
+read_file(int fd, ssize_t read_size, size_t *size)
+{
+	struct stat s;
+	char *buf;
+	char *bufptr;
+	int len;
+
+        if (fstat(fd, &s) < 0) {
+		fuse_log(FUSE_LOG_ERR, "cannot stat: %s\n", strerror(errno));
+		return NULL;
+	}
+	if (read_size >= 0) {
+		if (s.st_size < read_size)
+			return NULL;
+	} else
+		read_size = s.st_size;
+
+	if ((buf = malloc(read_size)) == NULL) {
+		fuse_log(FUSE_LOG_ERR, "out of memory allocating %zi bytes\n",
+			 read_size);
+		return NULL;
+	}
+
+	if (size)
+		*size = read_size;
+
+        bufptr = buf;
+	while ((len = read(fd, bufptr, read_size)) < read_size) {
+		if (len < 0) {
+			if (errno != EINTR) {
+				fuse_log(FUSE_LOG_ERR, "cannot read: %s",
+					 strerror(errno));
+				free(buf);
+				return NULL;
+			}
+			continue;
+		}
+		read_size -= len;
+		bufptr += len;
+	}
+	return buf;
+}
+
+/*
+ * Write out a buffer into a file. A trivial wrapper.
+ *
+ * If exists_ok is set, just do nothing if the file already exists.
  */
 static
-int dof_stash_write_dh(int dirfd, const dof_helper_t *dh)
+int dof_stash_write_file(int dirfd, const char *name, const void *buf,
+			 size_t bufsiz, int exists_ok)
 {
 	int fd;
 
-        if ((fd = openat(dirfd, "dh", O_CREAT | O_EXCL | O_WRONLY, 0644)) < 0)
+        if ((fd = openat(dirfd, name, O_CREAT | O_EXCL | O_WRONLY, 0644)) < 0) {
+		if (exists_ok && errno == EEXIST)
+			return 0;
 		goto err;
+	}
 
-	if (write_chunk(fd, dh, sizeof(dof_helper_t *)) < 0) {
+	if (write_chunk(fd, buf, bufsiz) < 0) {
 		close(fd);
 		goto err;
 	}
@@ -439,19 +498,62 @@ int dof_stash_write_dh(int dirfd, const dof_helper_t *dh)
 	return 0;
 
  err:
-	fuse_log(FUSE_LOG_ERR, "dtprobed: cannot write out DOF helper: %s\n",
+	fuse_log(FUSE_LOG_ERR, "dtprobed: cannot write out %s: %s\n", name,
 		 strerror(errno));
 	return -1;
 }
 
 /*
- * Add a piece of raw DOF from a given (pid, dev, ino) triplet.
+ * Figure out if a PID's entry in the DOF stash has been invalidated by an
+ * exec(), by comparing the passed-in (dev, ino) against that recorded in
+ * the exec-mapping.  Having no exec-mapping is perfectly valid and
+ * indicates that no DOF has previously been seen for this PID at all.
+ */
+static int
+dof_stash_execed(pid_t pid, int perpid_dir, dev_t dev, ino_t ino)
+{
+	char *exec_mapping;
+        size_t size;
+	int fd;
+	dev_t old_dev;
+	ino_t old_ino;
+
+        if ((fd = openat(perpid_dir, "exec-mapping", O_RDONLY)) < 0) {
+		if (errno == ENOENT)
+			return 0;
+		goto err;
+	}
+
+	exec_mapping = read_file(fd, -1, &size);
+	if (exec_mapping == NULL)
+		goto err_close;
+
+	if (split_dof_name(exec_mapping, &old_dev, &old_ino) < 0) {
+		fuse_log(FUSE_LOG_ERR, "PID %i, exec mapping \"%s\" unparseable\n",
+			 pid, exec_mapping);
+		goto err_free;
+	}
+	return !((dev == old_dev) && (ino == old_ino));
+
+  err_free:
+	free(exec_mapping);
+  err_close:
+	close(fd);
+  err:
+	fuse_log(FUSE_LOG_ERR, "Cannot determine if PID %i has execed; assuming not: %s\n",
+		 pid, strerror(errno));
+	return 0;
+}
+
+/*
+ * Add a piece of raw DOF from a given (pid, dev, ino) triplet.  May remove
+ * stale DOF in the process.
  *
  * Return the new DOF generation number, or -1 on error.
  */
 int
-dof_stash_add(pid_t pid, dev_t dev, ino_t ino, const dof_helper_t *dh,
-	      const void *dof, size_t size)
+dof_stash_add(pid_t pid, dev_t dev, ino_t ino, dev_t exec_dev, dev_t exec_ino,
+	      const dof_helper_t *dh, const void *dof, size_t size)
 {
 	char *dof_name = make_dof_name(dev, ino);
 	char *pid_name = make_numeric_name(pid);
@@ -464,17 +566,33 @@ dof_stash_add(pid_t pid, dev_t dev, ino_t ino, const dof_helper_t *dh,
 	int new_dof = 0;
 	int err = -1;
 
-	if (!pid_name || !dof_name)
+	if (!pid_name || !dof_name) {
+		fuse_log(FUSE_LOG_ERR, "Out of memory stashing DOF\n");
 		goto out_free;
-
-
-	/* Make the directories. */
+	}
 
         perpid_dir = make_state_dirat(pid_dir, pid_name, "PID", 0);
 
         if (perpid_dir < 0)
 		goto out_free;
 
+	/* Figure out if the executable mapping has changed: if it has, purge
+	   the entire PID and recreate it.  */
+
+	if (dof_stash_execed(pid, perpid_dir, exec_dev, exec_ino)) {
+		fuse_log(FUSE_LOG_DEBUG, "%i: exec() detected, removing old DOF\n", pid);
+		if (dof_stash_remove_pid(pid) < 0) {
+			fuse_log(FUSE_LOG_ERR, "PID %i exec()ed, but cannot remove dead DOF\n",
+				 pid);
+			goto err_unlink_nomsg;
+		}
+
+		perpid_dir = make_state_dirat(pid_dir, pid_name, "PID", 0);
+
+		if (perpid_dir < 0)
+			goto out_free;
+	}
+
         perpid_dof_dir = make_state_dirat(perpid_dir, dof_name, "per-pid DOF", 0);
 
         if (perpid_dof_dir < 0)
@@ -518,9 +636,34 @@ dof_stash_add(pid_t pid, dev_t dev, ino_t ino, const dof_helper_t *dh,
         if (linkat(dof_dir, dof_name, perpid_dof_dir, "raw", 0) < 0)
 		goto err_unlink_msg;
 
-	if (dof_stash_write_dh(perpid_dof_dir, dh) < 0)
+	if (dof_stash_write_file(perpid_dof_dir, "dh", dh,
+				 sizeof(dof_helper_t), 0) < 0)
 		goto err_unlink_nomsg;
 
+	if (new_dof) {
+		char *exec_mapping = make_dof_name(exec_dev, exec_ino);
+
+                if (!exec_mapping) {
+			fuse_log(FUSE_LOG_ERR, "Out of memory stashing new DOF\n");
+			goto err_unlink_nomsg;
+		}
+
+		/*
+		 * The exec-mapping wants writing out iff this is the first new
+		 * DOF written for this PID.  We already checked for exec()
+		 * above, so if exec-mapping still exists, its content must be
+		 * identical to what we're about to write out: so treat an
+		 * already-existing file as a do-nothing condition.
+		 */
+                if (dof_stash_write_file(perpid_dir, "exec-mapping", exec_mapping,
+					 strlen(exec_mapping), 1) < 0) {
+			free(exec_mapping);
+			goto err_unlink_nomsg;
+		}
+
+                free(exec_mapping);
+	}
+
 	/*
 	 * Update the generation counter and mark this DOF's generation.  On
 	 * error after this point we leak generation counter values.
@@ -680,7 +823,9 @@ dof_stash_remove(pid_t pid, int gen)
 	 * non-per-PID stuff; if zero, unlink that too.
 	 */
 
-	fuse_log(FUSE_LOG_DEBUG, "gen_name: %s; gen_linkname: %s; perpid_dof_dir: %i\n", gen_name, gen_linkname, perpid_dof_dir);
+	fuse_log(FUSE_LOG_DEBUG, "%i: gen_name: %s; gen_linkname: %s; perpid_dof_dir: %i\n",
+		 pid, gen_name, gen_linkname, perpid_dof_dir);
+
         if (unlinkat(perpid_dof_dir, "raw", 0) != 0 && errno != ENOENT) {
 		fuse_log(FUSE_LOG_ERR, "dtprobed: cannot unlink per-PID raw DOF for PID %i generation %i: %s\n",
 			 pid, gen, strerror(errno));
@@ -703,13 +848,12 @@ dof_stash_remove(pid_t pid, int gen)
 		unlink_err = 1;
 	}
 
-	if (unlink_err)
-		fuse_log(FUSE_LOG_ERR, "dtprobed: cannot unlink per-PID DOF %s for PID %i generation %i: %s\n",
-			 unlink_errfn, pid, gen, strerror(errno));
+	if (refcount_cleanup(dof_dir, gen_linkname, 0) < 0) {
+		unlink_errfn = gen_linkname;
+		unlink_err = 1;
+	}
 
-	refcount_cleanup(dof_dir, gen_linkname, 0);
-
-	/*
+        /*
 	 * Clean up the PID directory itself, if it is now empty.  We can't just
 	 * use refcount_cleanup here because we also have to delete the
 	 * generation counter right before deleting the directory.
@@ -727,9 +871,20 @@ dof_stash_remove(pid_t pid, int gen)
 				 pid, strerror(errno));
 			goto err;
 		}
+		if (unlinkat(perpid_dir, "exec-mapping", 0) < 0
+			&& errno != ENOENT) {
+			fuse_log(FUSE_LOG_ERR, "dtprobed: cannot clean up exec-mapping in per-PID dir for PID %i: %s\n",
+				 pid, strerror(errno));
+			goto err;
+		}
 		refcount_cleanup(pid_dir, pid_name, 1);
 	}
 
+	if (unlink_err)
+		fuse_log(FUSE_LOG_ERR, "dtprobed: cannot unlink per-PID DOF %s for PID %i generation %i: %s\n",
+			 unlink_errfn, pid, gen, strerror(errno));
+
+
  out:
 	err = 0;
 
@@ -748,6 +903,72 @@ oom:
 	goto err;
 }
 
+/*
+ * Remove all DOF registered for a given PID.  Used when new DOF arrives and the
+ * primary text mapping is found to be different from what it was (when exec()),
+ * and when processes die without deregistering (see below).
+ */
+int
+dof_stash_remove_pid(pid_t pid)
+{
+	char *pid_name = make_numeric_name(pid);
+	struct dirent *ent;
+	DIR *dir;
+	int tmp;
+	int err = -1;
+
+        if ((tmp = openat(pid_dir, pid_name, O_RDONLY | O_CLOEXEC)) < 0) {
+                fuse_log(FUSE_LOG_ERR, "cannot open per-PID DOF mappings directory for pid %s for cleanup: %s\n",
+                         pid_name, strerror(errno));
+                goto out;
+        }
+
+        if ((dir = fdopendir(tmp)) == NULL) {
+                fuse_log(FUSE_LOG_ERR, "cannot clean up per-PID DOF mappings for PID %s: %s\n",
+                         pid_name, strerror(errno));
+                close(tmp);
+                goto out;
+        }
+
+        fuse_log(FUSE_LOG_DEBUG, "pruning dead/execed PID %s\n", pid_name);
+
+        /* Work over all the mappings in this PID. */
+
+        while (errno = 0, (ent = readdir(dir)) != NULL) {
+                int gen;
+                char *end_gen;
+
+                if (errno != 0) {
+                        fuse_log(FUSE_LOG_ERR, "cannot read per-PID DOF mappings for PID %i for cleanup: %s\n",
+                                 pid, strerror(errno));
+                        closedir(dir);
+			goto out;
+                }
+
+                if (ent->d_type != DT_LNK)
+                        continue;
+
+                fuse_log(FUSE_LOG_DEBUG, "Working over generation %s\n",
+                         ent->d_name);
+
+                gen = strtol(ent->d_name, &end_gen, 10);
+                if (*end_gen != '\0')
+                        continue;
+
+                if (dof_stash_remove(pid, gen) < 0) {
+			fuse_log(FUSE_LOG_ERR, "cannot remove dead pid %i\n", pid);
+			closedir(dir);
+			goto out;
+		}
+        }
+        closedir(dir);
+	err = 0;
+
+  out:
+	free(pid_name);
+	return err;
+}
+
 /*
  * Prune dead processes out of the DOF stash.  This is not a correctness
  * operation, just a space-waste reducer. If a process with DOF died uncleanly
@@ -767,8 +988,8 @@ oom:
 void
 dof_stash_prune_dead(void)
 {
-	DIR *all_pids_dir;
-	struct dirent *pid_ent;
+	DIR *dir;
+	struct dirent *ent;
 	int tmp;
 
 	fuse_log(FUSE_LOG_DEBUG, "Pruning dead PIDs\n");
@@ -778,34 +999,32 @@ dof_stash_prune_dead(void)
 		return;
 	}
 
-        if ((all_pids_dir = fdopendir(tmp)) == NULL) {
+        if ((dir = fdopendir(tmp)) == NULL) {
 		close(tmp);
 		fuse_log(FUSE_LOG_ERR, "cannot clean up per-PID DOF directory: %s\n",
 			 strerror(errno));
 		return;
 	}
-	rewinddir(all_pids_dir);
+	rewinddir(dir);
 
 	/*
 	 * Work over all the PIDs.
 	 */
-	while (errno = 0, (pid_ent = readdir(all_pids_dir)) != NULL) {
-		DIR *perpid_dir;
-		struct dirent *mapping_ent;
+	while (errno = 0, (ent = readdir(dir)) != NULL) {
 		char *end_pid;
 		pid_t pid;
 
 		if (errno != 0)
 			goto scan_failure;
 
-		if (pid_ent->d_type != DT_DIR)
+		if (ent->d_type != DT_DIR)
 			continue;
 
 		/*
 		 * Only directories with numeric names can be PIDs: skip all the
 		 * rest.
 		 */
-		pid = strtol(pid_ent->d_name, &end_pid, 10);
+		pid = strtol(ent->d_name, &end_pid, 10);
 		if (*end_pid != '\0')
 			continue;
 
@@ -818,46 +1037,7 @@ dof_stash_prune_dead(void)
 		 * mapping in turn.
 		 */
 
-		if ((tmp = openat(pid_dir, pid_ent->d_name, O_RDONLY | O_CLOEXEC)) < 0) {
-			fuse_log(FUSE_LOG_ERR,"cannot open per-PID DOF mappings directory for cleanup: %s\n",
-				 strerror(errno));
-			goto out;
-		}
-
-		if ((perpid_dir = fdopendir(tmp)) == NULL) {
-			fuse_log(FUSE_LOG_ERR, "cannot clean up per-PID DOF mappings: %s\n",
-				 strerror(errno));
-			close(tmp);
-			goto out;
-		}
-
-		fuse_log(FUSE_LOG_DEBUG, "Pruning dead PID %s\n", pid_ent->d_name);
-
-		/* Work over all the mappings in this PID. */
-
-                while (errno = 0, (mapping_ent = readdir(perpid_dir)) != NULL) {
-			int gen;
-			char *end_gen;
-
-                        if (errno != 0) {
-				fuse_log(FUSE_LOG_ERR, "cannot read per-PID DOF mappings for cleanup: %s\n",
-					 strerror(errno));
-				closedir(perpid_dir);
-				goto out;
-			}
-
-			if (mapping_ent->d_type != DT_LNK)
-				continue;
-
-			fuse_log(FUSE_LOG_DEBUG, "Working over generation %s\n", mapping_ent->d_name);
-
-			gen = strtol(mapping_ent->d_name, &end_gen, 10);
-			if (*end_gen != '\0')
-				continue;
-
-                        dof_stash_remove(pid, gen);
-		}
-		closedir(perpid_dir);
+		dof_stash_remove_pid(pid);
 		errno = 0;
 	}
 
@@ -865,7 +1045,7 @@ dof_stash_prune_dead(void)
 		goto scan_failure;
 
 out:
-	closedir(all_pids_dir);
+	closedir(dir);
 	return;
 
 scan_failure:
@@ -874,55 +1054,6 @@ scan_failure:
 	goto out;
 }
 
-/*
- * Read a file into a buffer and return it.  If READ_SIZE is non-negative, read
- * only that many bytes (and silently fail if the file isn't at least that
- * long).
- */
-static void *
-read_file(int fd, ssize_t read_size, size_t *size)
-{
-	struct stat s;
-	char *buf;
-	char *bufptr;
-	int len;
-
-        if (fstat(fd, &s) < 0) {
-		fuse_log(FUSE_LOG_ERR, "cannot stat: %s\n", strerror(errno));
-		return NULL;
-	}
-	if (read_size >= 0) {
-		if (s.st_size < read_size)
-			return NULL;
-	} else
-		read_size = s.st_size;
-
-	if ((buf = malloc(read_size)) == NULL) {
-		fuse_log(FUSE_LOG_ERR, "out of memory allocating %zi bytes\n",
-			 read_size);
-		return NULL;
-	}
-
-	if (size)
-		*size = read_size;
-
-        bufptr = buf;
-	while ((len = read(fd, bufptr, read_size)) < read_size) {
-		if (len < 0) {
-			if (errno != EINTR) {
-				fuse_log(FUSE_LOG_ERR, "cannot read: %s",
-					 strerror(errno));
-				free(buf);
-				return NULL;
-			}
-			continue;
-		}
-		read_size -= len;
-		bufptr += len;
-	}
-	return buf;
-}
-
 /*
  * Reparse all DOF.  Invoked at daemon startup, so reports errors down the
  * sync_fd for printing in the parent.  Mappings that cannot be reparsed are
@@ -934,9 +1065,9 @@ read_file(int fd, ssize_t read_size, size_t *size)
  */
 int
 reparse_dof(int out, int in,
-	    int (*reparse)(int pid, int out, int in, dev_t dev, ino_t inum,
-			   dof_helper_t *dh, const void *in_buf, size_t in_bufsz,
-			   int reparsing),
+	    int (*reparse)(int pid, int out, int in, dev_t dev, ino_t ino,
+			   dev_t unused1, ino_t unused2, dof_helper_t *dh,
+			   const void *in_buf, size_t in_bufsz, int reparsing),
 	    int force)
 {
 	DIR *all_pids_dir;
@@ -1069,7 +1200,7 @@ reparse_dof(int out, int in,
 			}
 
 			if (split_dof_name(mapping_ent->d_name, &dev, &ino) < 0) {
-				fuse_log(FUSE_LOG_ERR, "when reparsing DOF for PID %s, cannot derive dev/inum from %s: ignored\n",
+				fuse_log(FUSE_LOG_ERR, "when reparsing DOF for PID %s, cannot derive dev/ino from %s: ignored\n",
 					 pid_ent->d_name, mapping_ent->d_name);
 				continue;
 			}
@@ -1106,7 +1237,7 @@ reparse_dof(int out, int in,
 
 			fuse_log(FUSE_LOG_DEBUG, "Reparsing DOF for PID %s, mapping %s\n",
 				 pid_ent->d_name, mapping_ent->d_name);
-			if (reparse(pid, out, in, dev, ino, dh, dof, dof_size, 1) < 0)
+			if (reparse(pid, out, in, dev, ino, 0, 0, dh, dof, dof_size, 1) < 0)
 				fuse_log(FUSE_LOG_ERR, "when reparsing DOF, cannot parse DOF for PID %s, mapping %s: ignored\n",
 					    pid_ent->d_name, mapping_ent->d_name);
 			free(dof);
diff --git a/dtprobed/dof_stash.h b/dtprobed/dof_stash.h
index 2754a8180622e..854cea82c1ffd 100644
--- a/dtprobed/dof_stash.h
+++ b/dtprobed/dof_stash.h
@@ -22,23 +22,21 @@ typedef struct dof_parsed_list {
 int dof_stash_init(const char *statedir);
 
 int dof_stash_push_parsed(dt_list_t *accum, dof_parsed_t *parsed);
-
 int dof_stash_write_parsed(pid_t pid, dev_t dev, ino_t ino, dt_list_t *accum);
-
 void dof_stash_flush(dt_list_t *accum);
 
-int dof_stash_add(pid_t pid, dev_t dev, ino_t ino, const dof_helper_t *dh,
-		  const void *dof, size_t size);
-
+int dof_stash_add(pid_t pid, dev_t dev, ino_t ino, dev_t exec_dev,
+		  dev_t exec_ino, const dof_helper_t *dh, const void *dof,
+		  size_t size);
 int dof_stash_remove(pid_t pid, int gen);
+int dof_stash_remove_pid(pid_t pid);
 
 void dof_stash_prune_dead(void);
-
-int
-reparse_dof(int out, int in,
-	    int (*reparse)(int pid, int out, int in, dev_t dev, ino_t inum,
-			   dof_helper_t *dh, const void *in_buf,
-			   size_t in_bufsz, int reparsing),
-	    int force);
+int reparse_dof(int out, int in,
+		int (*reparse)(int pid, int out, int in, dev_t dev, ino_t ino,
+			       dev_t unused1, ino_t unused2, dof_helper_t *dh,
+			       const void *in_buf, size_t in_bufsz,
+			       int reparsing),
+		int force);
 
 #endif
diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
index 104313a7bfd5e..62b1814daf2da 100644
--- a/dtprobed/dtprobed.c
+++ b/dtprobed/dtprobed.c
@@ -88,8 +88,9 @@ static const struct cuse_lowlevel_ops dtprobed_clop = {
 };
 
 static int
-process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dof_helper_t *dh,
-	    const void *in_buf, size_t in_bufsz, int reparsing);
+process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dev_t exec_dev,
+	    dev_t exec_inum, dof_helper_t *dh, const void *in_buf,
+	    size_t in_bufsz, int reparsing);
 
 static void
 log_msg(enum fuse_log_level level, const char *fmt, va_list ap)
@@ -432,13 +433,16 @@ dof_read(pid_t pid, int in)
 }
 
 /*
- * Get the prmap_t of a passed-in address's mapping.
+ * Get the prmap_t of a passed-in address's mapping, and of its executable
+ * mapping.  (If there are multiple, it doesn't matter which we choose as long
+ * as we are consistent.)
  */
 static const int
-mapping_dev_inum(pid_t pid, uintptr_t addr, dev_t *dev, ino_t *inum)
+mapping_dev_inums(pid_t pid, uintptr_t addr, dev_t *dev, ino_t *inum,
+		  dev_t *exec_dev, dev_t *exec_inum)
 {
 	ps_prochandle *P;
-	const prmap_t *mapp;
+	const prmap_t *mapp, *exec_mapp;
 	int err = 0;
 
 	if ((P = Pgrab(pid, 2, 0, NULL, &err)) == NULL) {
@@ -448,9 +452,10 @@ mapping_dev_inum(pid_t pid, uintptr_t addr, dev_t *dev, ino_t *inum)
 	}
 
 	mapp = Paddr_to_map(P, addr);
+	exec_mapp = Plmid_to_map(P, LM_ID_BASE, PR_OBJ_EXEC);
 
 	err = -1;
-        if (mapp == NULL) {
+        if (mapp == NULL || exec_mapp == NULL) {
 		fuse_log(FUSE_LOG_ERR, "%i: dtprobed: cannot look up mapping (process dead?)\n",
 			 pid);
 		goto out;
@@ -458,6 +463,8 @@ mapping_dev_inum(pid_t pid, uintptr_t addr, dev_t *dev, ino_t *inum)
 
         *dev = mapp->pr_dev;
 	*inum = mapp->pr_inum;
+        *exec_dev = exec_mapp->pr_dev;
+	*exec_inum = exec_mapp->pr_inum;
 
 	err = 0;
 out:
@@ -481,8 +488,8 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 	dtprobed_userdata_t *userdata = get_userdata(pid);
 	const char *errmsg;
 	const void *buf;
-	dev_t dev = 0;
-	ino_t inum = 0;
+	dev_t dev = 0, exec_dev = 0;
+	ino_t inum = 0, exec_inum = 0;
 	int gen;
 
 	/*
@@ -670,12 +677,13 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 	if (userdata->buf)
 		buf = userdata->buf;
 
-	if ((mapping_dev_inum(pid, userdata->dh.dofhp_dof, &dev, &inum)) < 0)
+	if ((mapping_dev_inums(pid, userdata->dh.dofhp_dof, &dev, &inum,
+		     &exec_dev, &exec_inum)) < 0)
 		goto process_err;
 
 	if ((gen = process_dof(pid, parser_out_pipe, parser_in_pipe,
-			       dev, inum, &userdata->dh, buf,
-			       userdata->dof_hdr.dofh_loadsz, 0)) < 0)
+			       dev, inum, exec_dev, exec_inum, &userdata->dh,
+			       buf, userdata->dof_hdr.dofh_loadsz, 0)) < 0)
 		goto process_err;
 
 	if (fuse_reply_ioctl(req, gen, NULL, 0) < 0)
@@ -726,8 +734,9 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
  * the parsed DOF representation.
  */
 static int
-process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dof_helper_t *dh,
-	    const void *in_buf, size_t in_bufsz, int reparsing)
+process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dev_t exec_dev,
+	    dev_t exec_inum, dof_helper_t *dh, const void *in_buf,
+	    size_t in_bufsz, int reparsing)
 {
 	dof_parsed_t *provider;
 	size_t i;
@@ -789,7 +798,8 @@ process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dof_helper_t *dh,
 	}
 
 	if (!reparsing)
-		if ((gen = dof_stash_add(pid, dev, inum, dh, in_buf, in_bufsz)) < 0)
+		if ((gen = dof_stash_add(pid, dev, inum, exec_dev, exec_inum, dh,
+					 in_buf, in_bufsz)) < 0)
 			goto fileio;
 
 	if (dof_stash_write_parsed(pid, dev, inum, &accum) < 0) {
diff --git a/test/unittest/usdt/tst.exec-dof-replacement.r b/test/unittest/usdt/tst.exec-dof-replacement.r
new file mode 100644
index 0000000000000..7547f85e55af5
--- /dev/null
+++ b/test/unittest/usdt/tst.exec-dof-replacement.r
@@ -0,0 +1 @@
+PID test_prov test2 main succeeded
diff --git a/test/unittest/usdt/tst.exec-dof-replacement.r.p b/test/unittest/usdt/tst.exec-dof-replacement.r.p
new file mode 100755
index 0000000000000..1a5871f73d1f1
--- /dev/null
+++ b/test/unittest/usdt/tst.exec-dof-replacement.r.p
@@ -0,0 +1,2 @@
+#!/bin/sh
+grep -v '^ *ID' | sed 's,^[0-9]*,PID,; s,prov[0-9]*,prov,g; s,  *, ,g'
diff --git a/test/unittest/usdt/tst.exec-dof-replacement.sh b/test/unittest/usdt/tst.exec-dof-replacement.sh
new file mode 100755
index 0000000000000..8b65cd29b504d
--- /dev/null
+++ b/test/unittest/usdt/tst.exec-dof-replacement.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+# Make sure that when a program with DOF exec()s another program with
+# different DOF, the first program's DOF does not survive.
+
+if [ $# != 1 ]; then
+	echo expected one argument: '<'dtrace-path'>'
+	exit 2
+fi
+
+dtrace=$1
+CC=/usr/bin/gcc
+CFLAGS=
+
+DIRNAME="$tmpdir/usdt-exec-dof-replacement.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+cat > prov1.d <<EOF
+provider test_prov {
+	probe failed(int);
+};
+EOF
+
+cat > prov2.d <<EOF
+provider test_prov {
+	probe succeeded();
+};
+EOF
+
+if ! { $dtrace -h -s prov1.d && dtrace -h -s prov2.d; } then
+	echo "failed to generate header files" >&2
+	exit 1
+fi
+
+cat > test1.c <<EOF
+#include <errno.h>
+#include <unistd.h>
+#include "prov1.h"
+
+int
+main(int argc, char **argv)
+{
+	execl("test2", "test2", NULL);
+        TEST_PROV_FAILED(errno);
+        return 1;
+}
+EOF
+
+cat > test2.c <<EOF
+#include <unistd.h>
+#include "prov2.h"
+
+int
+main(int argc, char **argv)
+{
+        while(1) {
+		sleep(1);
+	        TEST_PROV_SUCCEEDED();
+	}
+}
+EOF
+
+if ! { ${CC} ${CFLAGS} -c test1.c && ${CC} ${CFLAGS} -c test2.c; } then
+	echo "failed to compile test programs" >&2
+	exit 1
+fi
+if ! { $dtrace -G -s prov1.d test1.o && $dtrace -G -s prov2.d test2.o; } then
+	echo "failed to create DOF" >& 2
+	exit 1
+fi
+if ! { ${CC} ${CFLAGS} -o test1 test1.o prov1.o && ${CC} ${CFLAGS} -o test2 test2.o prov2.o; } then
+	echo "failed to link final executables" >& 2
+	exit 1
+fi
+
+./test1 &
+PROC=$!
+
+# Wait for the exec, then list all the target's probes.
+# We cannot use pure dtrace to do this because it doesn't check
+# for new probes enough to hook up new ones, even with -Z and
+# even if a target exec()s.
+while [[ -d /proc/$PROC ]] && [[ "$(readlink /proc/$PROC/exe)" =~ test1$ ]]; do
+    sleep 1
+done
+$dtrace -p $PROC '-Ptest_prov$target' -l
+status2=$?
+kill $PROC
+
+if [[ $status1 -ne 0 ]] || [[ $status2 -ne 0 ]]; then
+	exit 1
+fi
+exit 0
-- 
2.41.0.270.g68fa1d84b5




More information about the DTrace-devel mailing list