[DTrace-devel] [PATCH 3/6] dtprobed: support DOF > 64KiB

Nick Alcock nick.alcock at oracle.com
Mon May 22 20:20:14 UTC 2023


dtprobed's ioctl handler is a state machine that reads in client DOF in
multiple phases:

 DTP_IOCTL_START: read the struct dof_helper_t

 DTP_IOCTL_HDR: read the struct dof_hdr_t, giving us the size of the DOF

 DTP_IOCTL_DOFHDR: read the DOF itself

 DTP_IOCTL_DOF: (everything is read; actually parse the DOF)

These various phases are triggered by issuing a fuse_reply_ioctl_retry()
to ask FUSE to go away and read things out of the ioctl() caller for us.
We can read from any address in the client (we are root), but that
doesn't mean we can read any *length* -- the kernel refuses to read
beyond a certain length and alas that length is customizable via mount
options (or, for CUSE, via kernel command-line options) and we can't
easily tell what it is.

I think it reasonable to say that anyone who *reduces* the length below
the default is asking for it: I can't find any CUSE or FUSE-using code
anywhere on the Internet that does anything more than hardwire this
value to 128KiB. For paranoia, we use 64KiB ourselves -- that's still a
lot of probes in one go.

We add a new state, DTP_IOCTL_DOFCHUNK, and read in one piece of DOF
per chunk until all the chunks are read in, concatenating them before
handing it off to the parser.

A new test checks this stuff: it has 206KiB of DOF which is enough to
test "first chunk", "neither first nor last chunk" and "last chunk"
cases: the DOF itself isn't formatted such that this equates to "first
probe, probe in the middle, last probe" but if any of those chunks is
missing or misaligned the string tables will be fubared and all the
probe names will be wrong, triggering a test failure (at best).

Orabug: 35411920
Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
---
 dtprobed/dtprobed.c                  | 105 ++++++++++++++++++++++++---
 test/unittest/usdt/tst.manyprobes.r  |   4 +
 test/unittest/usdt/tst.manyprobes.sh |  77 ++++++++++++++++++++
 3 files changed, 177 insertions(+), 9 deletions(-)
 create mode 100644 test/unittest/usdt/tst.manyprobes.r
 create mode 100755 test/unittest/usdt/tst.manyprobes.sh

diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
index 9eadf8cffc15d..01a30ab95129b 100644
--- a/dtprobed/dtprobed.c
+++ b/dtprobed/dtprobed.c
@@ -5,6 +5,7 @@
  * http://oss.oracle.com/licenses/upl.
  */
 
+#include <sys/param.h>
 #include <sys/uio.h>
 #include <sys/wait.h>
 #include <errno.h>
@@ -56,6 +57,7 @@
 #include "seccomp-assistance.h"
 
 #define DOF_MAXSZ 512 * 1024 * 1024
+#define DOF_CHUNKSZ 64 * 1024
 
 static struct fuse_session *cuse_session;
 
@@ -137,7 +139,8 @@ typedef enum dtprobed_fuse_state {
 	DTP_IOCTL_START = 0,
 	DTP_IOCTL_HDR = 1,
 	DTP_IOCTL_DOFHDR = 2,
-	DTP_IOCTL_DOF = 3
+	DTP_IOCTL_DOFCHUNK = 3,
+	DTP_IOCTL_DOF = 4
 } dtprobed_fuse_state_t;
 
 /*
@@ -147,6 +150,8 @@ typedef struct dtprobed_userdata {
 	dtprobed_fuse_state_t state;
 	dof_helper_t dh;
 	dof_hdr_t dof_hdr;
+	size_t next_chunk;
+	char *buf;
 } dtprobed_userdata_t;
 
 struct fuse_session *
@@ -357,6 +362,7 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 	struct iovec in;
 	pid_t pid = fuse_req_ctx(req)->pid;
 	const char *errmsg;
+	const void *buf;
 
 	/*
 	 * We can just ignore FUSE_IOCTL_COMPAT: the 32-bit and 64-bit versions
@@ -385,6 +391,13 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 		errmsg = "error reading ioctl size";
 		if (fuse_reply_ioctl_retry(req, &in, 1, NULL, 0) < 0)
 			goto fuse_errmsg;
+
+		/*
+		 * userdata->buf should already be freed, but if somehow it is
+		 * non-NULL make absolutely sure it is cleared out.
+		 */
+		userdata->buf = NULL;
+		userdata->next_chunk = 0;
 		userdata->state = DTP_IOCTL_HDR;
 		return;
 	}
@@ -425,17 +438,18 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 	}
 
 	/*
-	 * Third call: validate the DOF length and get the DOF itself.
+	 * Third call: validate the DOF length and stash it away.
 	 */
 	if (userdata->state == DTP_IOCTL_DOFHDR) {
 		/*
 		 * Too much data is as bad as too little.
 		 */
-		if (in_bufsz > sizeof(dof_hdr_t)) {
-			errmsg = "DOF header size incorrect";
-			fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s: %zi, not %zi\n",
-				 pid, errmsg, in_bufsz, sizeof(dof_hdr_t));
-			goto fuse_err;
+		if (userdata->state == DTP_IOCTL_DOFHDR &&
+		    (in_bufsz > sizeof(dof_hdr_t))) {
+			    errmsg = "DOF header size incorrect";
+			    fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s: %zi, not %zi\n",
+				     pid, errmsg, in_bufsz, sizeof(dof_hdr_t));
+			    goto fuse_err;
 		}
 		memcpy(&userdata->dof_hdr, in_buf, sizeof(dof_hdr_t));
 
@@ -443,16 +457,78 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 			fuse_log(FUSE_LOG_WARNING, "%i: dtprobed: DOF size of %zi longer than is sane\n",
 				 pid, userdata->dof_hdr.dofh_loadsz);
 
+		/* Fall through. */
+	}
+
+	/*
+	 * Third and possibly further calls: get the DOF itself, in chunks if
+	 * it's too big.
+	 */
+	if (userdata->state == DTP_IOCTL_DOFHDR ||
+		userdata->state == DTP_IOCTL_DOFCHUNK) {
+		int by_chunks = 0;
+
 		in.iov_base = (void *) userdata->dh.dofhp_dof;
 		in.iov_len = userdata->dof_hdr.dofh_loadsz;
 
+		/*
+		 * If the data being read in is too large, read by chunks.  We
+		 * cannot determine the chunk size we can use: the first failure
+		 * returns -ENOMEM to the caller of ioctl().
+		 */
+		if (userdata->dof_hdr.dofh_loadsz > DOF_CHUNKSZ) {
+			by_chunks = 1;
+
+			if (userdata->state == DTP_IOCTL_DOFHDR) {
+				userdata->buf = malloc(userdata->dof_hdr.dofh_loadsz);
+				in.iov_len = DOF_CHUNKSZ;
+
+				if (userdata->buf == NULL) {
+					errmsg = "out of memory allocating DOF";
+					goto fuse_errmsg;
+				}
+			} else {
+				ssize_t remaining;
+
+				remaining = userdata->dof_hdr.dofh_loadsz -
+					    userdata->next_chunk;
+
+				if (remaining < 0)
+					remaining = 0;
+
+				/*
+				 * We've already read a chunk: preserve it and
+				 * go on to the next, until we are out.
+				 */
+
+				memcpy(userdata->buf + userdata->next_chunk, in_buf,
+				       MIN(in_bufsz, remaining));
+
+				userdata->next_chunk += in_bufsz;
+				remaining -= in_bufsz;
+
+				if (remaining <= 0) {
+					userdata->state = DTP_IOCTL_DOF;
+					goto chunks_done;
+				}
+
+				in.iov_base = (char *) in.iov_base + userdata->next_chunk;
+				in.iov_len = MIN(DOF_CHUNKSZ, remaining);
+			}
+		}
+
 		errmsg = "cannot read DOF";
 		if (fuse_reply_ioctl_retry(req, &in, 1, NULL, 0) < 0)
 			goto fuse_errmsg;
-		userdata->state = DTP_IOCTL_DOF;
+
+		if (by_chunks)
+			userdata->state = DTP_IOCTL_DOFCHUNK;
+		else
+			userdata->state = DTP_IOCTL_DOF;
 		return;
 	}
 
+  chunks_done:
 	if (userdata->state != DTP_IOCTL_DOF) {
 		errmsg = "FUSE internal state incorrect";
 		goto fuse_errmsg;
@@ -463,13 +539,20 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 	 * to unblock the ioctl() caller and return to start state.
 	 */
 
+	buf = in_buf;
+	if (userdata->buf)
+		buf = userdata->buf;
+
 	if (process_dof(req, parser_in_pipe[1], parser_out_pipe[0], pid,
-			&userdata->dh, in_buf) < 0)
+			&userdata->dh, buf) < 0)
 		goto process_err;
 
 	if (fuse_reply_ioctl(req, 0, NULL, 0) < 0)
 		goto process_err;
 
+	free(userdata->buf);
+	userdata->buf = NULL;
+
 	userdata->state = DTP_IOCTL_START;
 
 	return;
@@ -482,6 +565,8 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 		fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s\n", pid,
 			 "cannot send error to ioctl caller: %s",
 			errmsg);
+	free(userdata->buf);
+	userdata->buf = NULL;
 	userdata->state = DTP_IOCTL_START;
 	return;
 
@@ -489,6 +574,8 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
 	if (fuse_reply_err(req, EINVAL) < 0)
 		fuse_log(FUSE_LOG_ERR, "%i: cannot unblock caller\n",
 			 pid);
+	free(userdata->buf);
+	userdata->buf = NULL;
 	userdata->state = DTP_IOCTL_START;
 	return;
 }
diff --git a/test/unittest/usdt/tst.manyprobes.r b/test/unittest/usdt/tst.manyprobes.r
new file mode 100644
index 0000000000000..5c128019946ac
--- /dev/null
+++ b/test/unittest/usdt/tst.manyprobes.r
@@ -0,0 +1,4 @@
+test:main:test1
+test:main:test750
+test:main:test1999
+
diff --git a/test/unittest/usdt/tst.manyprobes.sh b/test/unittest/usdt/tst.manyprobes.sh
new file mode 100755
index 0000000000000..0f3fcb2b50fd9
--- /dev/null
+++ b/test/unittest/usdt/tst.manyprobes.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+if [ $# != 1 ]; then
+	echo expected one argument: '<'dtrace-path'>'
+	exit 2
+fi
+
+dtrace=$1
+CC=/usr/bin/gcc
+CFLAGS="-I${PWD}/uts/common"
+DIRNAME="$tmpdir/usdt-manyprobes.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+gen_test()
+{
+    local -i n=$1
+    echo "#include <sys/sdt.h>"
+    echo "void main(void) {"
+    for ((i=0; i<$n; i++)); do
+      echo 'DTRACE_PROBE1(manyprobes, 'test$i, '"foo");'
+    done
+    echo "}"
+}
+
+gen_provider()
+{
+    local -i n=$1
+    echo "provider manyprobes {"
+    for ((i=0; i<$n; i++)); do
+      echo "probe test$i(string);"
+    done
+    echo "};"
+}
+
+nprobes=2000
+gen_test $nprobes > test.c
+gen_provider $nprobes > manyprobes.d
+
+${CC} ${CFLAGS} -c test.c
+if [ $? -ne 0 ]; then
+	echo "failed to compile test.c" >& 2
+	exit 1
+fi
+$dtrace -G -s manyprobes.d test.o
+if [ $? -ne 0 ]; then
+	echo "failed to create DOF" >& 2
+	exit 1
+fi
+${CC} ${CFLAGS} -o test test.o manyprobes.o
+if [ $? -ne 0 ]; then
+	echo "failed to link final executable" >& 2
+	exit 1
+fi
+
+script()
+{
+	$dtrace -c ./test -qs /dev/stdin <<EOF
+	manyprobes\$target:::test1, manyprobes\$target:::test750, manyprobes\$target:::test1999
+	{
+		printf("%s:%s:%s\n", probemod, probefunc, probename);
+	}
+EOF
+}
+
+script
+status=$?
+
+# Temporary, until autocleanup is implemented
+echo - > /sys/kernel/tracing/uprobe_events 2>/dev/null
+
+exit $status
-- 
2.39.1.268.g9de2f9a303




More information about the DTrace-devel mailing list