[DTrace-devel] [PATCH] dtprobed: prevent inter-request contamination of state

Nick Alcock nick.alcock at oracle.com
Wed May 24 14:18:57 UTC 2023


On fairly rare occasions, one sometimes sees dtprobed saying things like
this:

46157: dtprobed: helper size incorrect: expected at least 80, not 0

or

25815: dtprobed: DOF too small: expected at least 64, not 0

The message varies, but the size is always 0.  Whenever this happens
one object's-worth of incoming DOF is thrown out.

The cause is, it turns out, simple.  CUSE ioctls are handled via a
multistage request/response protocol, where the CUSE server (dtprobed)
repeatedly calls fuse_reply_ioctl_retry() and returns and is immediately
called back with the result of extracting a bit more info from the
client's address space: this process is fast but still involves a
protocol roundtrip to the FUSE device, so it's not instantaneous.  To
carry state across this call FUSE has a userdata pointer that is shared
across calls to all the FUSE callbacks.

Now *obviously* CUSE isn't going to interleave the results of this
request/response process with further requests coming in from new
clients, right? I mean even the FUSE examples are written assuming that
this won't happen.

It does, and this problem is, while not terribly common, common enough
that I'm not the only one who's seen it. You can be halfway through a
request/response cycle and then some new PID can come blundering in and
you have to handle that as well, in parallel.

So we can't just pass a single userdata state vector around and have it
track everything because all our requests are serialized, since they're
*not* serialized.  So switch to managing them by hand: maintain a
dt_list_t of manually-malloced userdatas, reverse-insertion-sorted by
pid so that unless the pid wraps the pid we want is almost always at the
head of the list.  We need to clean up this list periodically because
processes can be killed in the middle of their ioctl() so we can get
stale stuff in the list: to save a bit of time and make sure the cleanup
code doesn't rust, *only* free in a periodic cleanup cycle: the cycle
goes through the list and frees every element with no associated process
or whose state indicates that it's not in the middle of a
request/response cycle right now.

For now, clean up every 128 requests -- rare enough that it's
amortized-free, common enough that even when pids wrap we hardly have to
traverse any stale ones to get to the one we care about.

Add a test for the cleanup code (and a bit of a load-test for dtprobed)
that just kicks off a bunch of processes with probes.  This won't fail
itself but if things go wrong it might provoke a dtprobed crash and a
failure of later usdt tests, which is not a valueless thing (since the
cleanup code is otherwise not tested at all, and it found a bug when I
wrote it, so let's keep it).

Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
---
 dtprobed/dtprobed.c                 | 94 +++++++++++++++++++++++++++--
 test/unittest/usdt/tst.manyprocs.sh | 70 +++++++++++++++++++++
 2 files changed, 158 insertions(+), 6 deletions(-)
 create mode 100755 test/unittest/usdt/tst.manyprocs.sh

diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
index 29cddba1d9658..6f77553b64246 100644
--- a/dtprobed/dtprobed.c
+++ b/dtprobed/dtprobed.c
@@ -51,6 +51,7 @@
 #include <systemd/sd-daemon.h>
 #endif
 
+#include <dt_list.h>
 #include "dof_parser.h"
 #include "uprobes.h"
 
@@ -59,6 +60,8 @@
 #define DOF_MAXSZ 512 * 1024 * 1024
 #define DOF_CHUNKSZ 64 * 1024
 
+#define CLEANUP_INTERVAL 128
+
 static struct fuse_session *cuse_session;
 
 int _dtrace_debug = 0;
@@ -69,6 +72,7 @@ static pid_t parser_pid;
 static int parser_in_pipe[2];
 static int parser_out_pipe[2];
 static int timeout = 5000; 			/* In seconds.  */
+static int rq_count = 0;
 
 static void helper_ioctl(fuse_req_t req, int cmd, void *arg,
 			 struct fuse_file_info *fi, unsigned int flags,
@@ -147,6 +151,8 @@ typedef enum dtprobed_fuse_state {
  * State crossing calls to CUSE request functions.
  */
 typedef struct dtprobed_userdata {
+	dt_list_t list;
+	pid_t pid;
 	dtprobed_fuse_state_t state;
 	dof_helper_t dh;
 	dof_hdr_t dof_hdr;
@@ -154,8 +160,10 @@ typedef struct dtprobed_userdata {
 	char *buf;
 } dtprobed_userdata_t;
 
+static dt_list_t all_userdata;
+
 struct fuse_session *
-setup_helper_device(int argc, char **argv, char *devname, dtprobed_userdata_t *userdata)
+setup_helper_device(int argc, char **argv, char *devname)
 {
 	struct cuse_info ci;
 	struct fuse_session *cs;
@@ -175,7 +183,7 @@ setup_helper_device(int argc, char **argv, char *devname, dtprobed_userdata_t *u
 	ci.dev_info_argv = dev_info_argv;
 
 	cs = cuse_lowlevel_setup(argc, argv, &ci, &dtprobed_clop,
-				 &multithreaded, userdata);
+				 &multithreaded, NULL);
 
 	if (cs == NULL) {
 		perror("allocating helper device");
@@ -204,6 +212,73 @@ teardown_device(void)
 	cuse_lowlevel_teardown(cuse_session);
 }
 
+/*
+ * Allocate a userdata for the given PID, or return an existing one
+ * if possible.  Userdatas are insertion-sorted into descending order
+ * to make old stale ones from processes killed at the wrong time a
+ * bit less likely to impose overhead.
+ */
+static dtprobed_userdata_t *
+get_userdata(pid_t pid)
+{
+	dtprobed_userdata_t *userdatap;
+	dtprobed_userdata_t *new;
+	int prepend = 0;
+
+	for (userdatap = dt_list_next(&all_userdata); userdatap != NULL;
+	     userdatap = dt_list_next(userdatap)) {
+		if (userdatap->pid == pid)
+			return userdatap;
+
+		if (userdatap->pid < pid)
+			break;
+	}
+
+	if ((new = calloc(1, sizeof(struct dtprobed_userdata))) == NULL)
+		return NULL;
+
+	/*
+	 * Go back one, to the last list entry with a pid higher than this one,
+	 * since dt_list_insert inserts *after* the entry passed in.
+	 */
+	if (userdatap) {
+		userdatap = dt_list_prev(userdatap);
+		if (userdatap == NULL)
+			prepend = 1;
+	}
+
+	new->pid = pid;
+
+	if (!prepend)
+		dt_list_insert(&all_userdata, userdatap, new);
+	else
+		dt_list_prepend(&all_userdata, new);
+
+	return new;
+}
+
+/*
+ * Clean up userdatas no longer involved in ioctls or no longer corresponding to
+ * live pids.
+ */
+static void
+cleanup_userdata(void)
+{
+	dtprobed_userdata_t *userdatap;
+	dtprobed_userdata_t *last_userdatap = NULL;
+
+	for (userdatap = dt_list_next(&all_userdata); userdatap != NULL;
+	     userdatap = dt_list_next(userdatap)) {
+		if (userdatap->state == DTP_IOCTL_START || kill(userdatap->pid, 0) < 0) {
+			dt_list_delete(&all_userdata, userdatap);
+			free(userdatap->buf);
+			free(last_userdatap);
+			last_userdatap = userdatap;
+		}
+	}
+	free(last_userdatap);
+}
+
 /*
  * Parse a piece of DOF.  Return 0 iff the pipe has closed and no more parsing
  * is possible.
@@ -360,9 +435,9 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 	     struct fuse_file_info *fi, unsigned int flags,
 	     const void *in_buf, size_t in_bufsz, size_t out_bufsz)
 {
-	dtprobed_userdata_t *userdata = fuse_req_userdata(req);
 	struct iovec in;
 	pid_t pid = fuse_req_ctx(req)->pid;
+	dtprobed_userdata_t *userdata = get_userdata(pid);
 	const char *errmsg;
 	const void *buf;
 
@@ -554,9 +629,17 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 
 	free(userdata->buf);
 	userdata->buf = NULL;
-
 	userdata->state = DTP_IOCTL_START;
 
+	/*
+	 * Periodically clean up old userdata (applying to pids with no live
+	 * transaction, or pids which no longer exist).
+	 */
+	if (rq_count++ > CLEANUP_INTERVAL) {
+		cleanup_userdata();
+		rq_count = 0;
+	}
+
 	return;
 
   fuse_errmsg:
@@ -750,7 +833,6 @@ main(int argc, char *argv[])
 	int sync_fd = -1;
 	int ret;
 	struct sigaction sa = {0};
-	dtprobed_userdata_t userdata = {0};
 
 	/*
 	 * These are "command-line" arguments to FUSE itself: our args are
@@ -797,7 +879,7 @@ main(int argc, char *argv[])
 	close_range(3, ~0U, 0);
 
 	if ((cuse_session = setup_helper_device(fuse_argc, fuse_argv,
-						devname, &userdata)) == NULL)
+						devname)) == NULL)
 		exit(1);
 
 	if (!foreground) {
diff --git a/test/unittest/usdt/tst.manyprocs.sh b/test/unittest/usdt/tst.manyprocs.sh
new file mode 100755
index 0000000000000..6ebd873672b4b
--- /dev/null
+++ b/test/unittest/usdt/tst.manyprocs.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+# Verify that dtprobed can handle lots of processes.  We don't check that
+# it's cleaning them up, and we don't explicitly check that it's not dead,
+# but if the dead-process-cleanup process fails and kills dtprobed we will
+# find out when later tests fail due to lack of dtprobed.
+#
+if [ $# != 1 ]; then
+	echo expected one argument: '<'dtrace-path'>'
+	exit 2
+fi
+
+dtrace=$1
+CC=/usr/bin/gcc
+CFLAGS=
+
+DIRNAME="$tmpdir/usdt-manyprocs.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+cat > prov.d <<EOF
+provider test_prov {
+	probe go();
+};
+EOF
+
+$dtrace -h -s prov.d
+if [ $? -ne 0 ]; then
+	echo "failed to generate header file" >& 2
+	exit 1
+fi
+
+cat > test.c <<EOF
+#include "prov.h"
+
+int
+main(int argc, char **argv)
+{
+	TEST_PROV_GO();
+
+	return 0;
+}
+EOF
+
+${CC} ${CFLAGS} -c test.c
+if [ $? -ne 0 ]; then
+	echo "failed to compile test.c" >& 2
+	exit 1
+fi
+$dtrace -G -s prov.d test.o
+if [ $? -ne 0 ]; then
+	echo "failed to create DOF" >& 2
+	exit 1
+fi
+${CC} ${CFLAGS} -o test test.o prov.o
+if [ $? -ne 0 ]; then
+	echo "failed to link final executable" >& 2
+	exit 1
+fi
+
+for ((i=0; i < 1024; i++)); do
+    ./test
+done
+
+exit 0

base-commit: f543fa6706c0b31364356c01bf3de63e3cce8ad1
prerequisite-patch-id: fa667248b7b02e92ee6b3807123ebcf2ac0acc38
prerequisite-patch-id: 0cec5f1a31d528a493d6143644c1e6b78c7f403b
prerequisite-patch-id: 1e1ced5fcec01a0c2e38d12f1ae5a124c8f93860
prerequisite-patch-id: eb290f098e38981181d85240150e1713979dc505
prerequisite-patch-id: c25b38ab68e815a63f064aff7496408684f94a36
prerequisite-patch-id: b86eb7e00b5883a586bd4dc7143ab25094912288
prerequisite-patch-id: 85d3c176dc4a7105e4f04dbcdb4ccbdf18741795
prerequisite-patch-id: 94206b53a67820bb1c9e828c11e57a0cb36949b6
prerequisite-patch-id: c5a64d246ff26047723e5e1a9c16b42a1d5fbf87
prerequisite-patch-id: 89cf4c6b9242525883ac02bb8ef90856ef908d0f
prerequisite-patch-id: 0992642ac657a1da617588a6d7b28f3669960374
prerequisite-patch-id: e8a65c309b3eaf3341ceac421ef602bcc3f057fa
prerequisite-patch-id: 4d38462a4bf2b242f8fcec3bf28f1ad97fa40ca7
prerequisite-patch-id: 6430030eaa9b5f09eedf6d41b698549096905c56
prerequisite-patch-id: aefef52e1438f34dd49e0cde010e791d96ef06d9
prerequisite-patch-id: f9ca834410ab770ec2d0b4f4f1ea70d5bea37c84
prerequisite-patch-id: a2e41bc39d3eb5bea3c1543928d1a695320cce52
prerequisite-patch-id: 5998403c58e4b48f0cb0d87971e553d2999421fc
prerequisite-patch-id: 8766128335d133bdd548ed5978aa9eefeb6f7b6d
prerequisite-patch-id: 35e83414bbe54be31fe7cc15c7d6368543cd4e8f
prerequisite-patch-id: fe12afceb640e98f3e1c71915e005f114677b22c
prerequisite-patch-id: 3793aeb5329f974f12647158fe55329f5a801d06
-- 
2.39.1.268.g9de2f9a303




More information about the DTrace-devel mailing list