[DTrace-devel] [PATCH 3/6] dtprobed: support DOF > 64KiB

Kris Van Hees kris.van.hees at oracle.com
Thu May 25 21:00:30 UTC 2023


On Mon, May 22, 2023 at 09:20:14PM +0100, Nick Alcock via DTrace-devel wrote:
> dtprobed's ioctl handler is a state machine that reads in client DOF in
> multiple phases:
> 
>  DTP_IOCTL_START: read the struct dof_helper_t
> 
>  DTP_IOCTL_HDR: read the struct dof_hdr_t, giving us the size of the DOF
> 
>  DTP_IOCTL_DOFHDR: read the DOF itself
> 
>  DTP_IOCTL_DOF: (everything is read; actually parse the DOF)
> 
> These various phases are triggered by issuing a fuse_reply_ioctl_retry()
> to ask FUSE to go away and read things out of the ioctl() caller for us.
> We can read from any address in the client (we are root), but that
> doesn't mean we can read any *length* -- the kernel refuses to read
> beyond a certain length and alas that length is customizable via mount
> options (or, for CUSE, via kernel command-line options) and we can't
> easily tell what it is.
> 
> I think it reasonable to say that anyone who *reduces* the length below
> the default is asking for it: I can't find any CUSE or FUSE-using code
> anywhere on the Internet that does anything more than hardwire this
> value to 128KiB. For paranoia, we use 64KiB ourselves -- that's still a
> lot of probes in one go.
> 
> We add a new state, DTP_IOCTL_DOFCHUNK, and read in one piece of DOF
> per chunk until all the chunks are read in, concatenating them before
> handing it off to the parser.
> 
> A new test checks this stuff: it has 206KiB of DOF which is enough to
> test "first chunk", "neither first nor last chunk" and "last chunk"
> cases: the DOF itself isn't formatted such that this equates to "first
> probe, probe in the middle, last probe" but if any of those chunks is
> missing or misaligned the string tables will be fubared and all the
> probe names will be wrong, triggering a test failure (at best).
> 
> Orabug: 35411920
> Signed-off-by: Nick Alcock <nick.alcock at oracle.com>

Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

... with Eugene's comments addressed.

> ---
>  dtprobed/dtprobed.c                  | 105 ++++++++++++++++++++++++---
>  test/unittest/usdt/tst.manyprobes.r  |   4 +
>  test/unittest/usdt/tst.manyprobes.sh |  77 ++++++++++++++++++++
>  3 files changed, 177 insertions(+), 9 deletions(-)
>  create mode 100644 test/unittest/usdt/tst.manyprobes.r
>  create mode 100755 test/unittest/usdt/tst.manyprobes.sh
> 
> diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
> index 9eadf8cffc15d..01a30ab95129b 100644
> --- a/dtprobed/dtprobed.c
> +++ b/dtprobed/dtprobed.c
> @@ -5,6 +5,7 @@
>   * http://oss.oracle.com/licenses/upl.
>   */
>  
> +#include <sys/param.h>
>  #include <sys/uio.h>
>  #include <sys/wait.h>
>  #include <errno.h>
> @@ -56,6 +57,7 @@
>  #include "seccomp-assistance.h"
>  
>  #define DOF_MAXSZ 512 * 1024 * 1024
> +#define DOF_CHUNKSZ 64 * 1024
>  
>  static struct fuse_session *cuse_session;
>  
> @@ -137,7 +139,8 @@ typedef enum dtprobed_fuse_state {
>  	DTP_IOCTL_START = 0,
>  	DTP_IOCTL_HDR = 1,
>  	DTP_IOCTL_DOFHDR = 2,
> -	DTP_IOCTL_DOF = 3
> +	DTP_IOCTL_DOFCHUNK = 3,
> +	DTP_IOCTL_DOF = 4
>  } dtprobed_fuse_state_t;
>  
>  /*
> @@ -147,6 +150,8 @@ typedef struct dtprobed_userdata {
>  	dtprobed_fuse_state_t state;
>  	dof_helper_t dh;
>  	dof_hdr_t dof_hdr;
> +	size_t next_chunk;
> +	char *buf;
>  } dtprobed_userdata_t;
>  
>  struct fuse_session *
> @@ -357,6 +362,7 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
>  	struct iovec in;
>  	pid_t pid = fuse_req_ctx(req)->pid;
>  	const char *errmsg;
> +	const void *buf;
>  
>  	/*
>  	 * We can just ignore FUSE_IOCTL_COMPAT: the 32-bit and 64-bit versions
> @@ -385,6 +391,13 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
>  		errmsg = "error reading ioctl size";
>  		if (fuse_reply_ioctl_retry(req, &in, 1, NULL, 0) < 0)
>  			goto fuse_errmsg;
> +
> +		/*
> +		 * userdata->buf should already be freed, but if somehow it is
> +		 * non-NULL make absolutely sure it is cleared out.
> +		 */
> +		userdata->buf = NULL;
> +		userdata->next_chunk = 0;
>  		userdata->state = DTP_IOCTL_HDR;
>  		return;
>  	}
> @@ -425,17 +438,18 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
>  	}
>  
>  	/*
> -	 * Third call: validate the DOF length and get the DOF itself.
> +	 * Third call: validate the DOF length and stash it away.
>  	 */
>  	if (userdata->state == DTP_IOCTL_DOFHDR) {
>  		/*
>  		 * Too much data is as bad as too little.
>  		 */
> -		if (in_bufsz > sizeof(dof_hdr_t)) {
> -			errmsg = "DOF header size incorrect";
> -			fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s: %zi, not %zi\n",
> -				 pid, errmsg, in_bufsz, sizeof(dof_hdr_t));
> -			goto fuse_err;
> +		if (userdata->state == DTP_IOCTL_DOFHDR &&
> +		    (in_bufsz > sizeof(dof_hdr_t))) {
> +			    errmsg = "DOF header size incorrect";
> +			    fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s: %zi, not %zi\n",
> +				     pid, errmsg, in_bufsz, sizeof(dof_hdr_t));
> +			    goto fuse_err;
>  		}
>  		memcpy(&userdata->dof_hdr, in_buf, sizeof(dof_hdr_t));
>  
> @@ -443,16 +457,78 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
>  			fuse_log(FUSE_LOG_WARNING, "%i: dtprobed: DOF size of %zi longer than is sane\n",
>  				 pid, userdata->dof_hdr.dofh_loadsz);
>  
> +		/* Fall through. */
> +	}
> +
> +	/*
> +	 * Third and possibly further calls: get the DOF itself, in chunks if
> +	 * it's too big.
> +	 */
> +	if (userdata->state == DTP_IOCTL_DOFHDR ||
> +		userdata->state == DTP_IOCTL_DOFCHUNK) {
> +		int by_chunks = 0;
> +
>  		in.iov_base = (void *) userdata->dh.dofhp_dof;
>  		in.iov_len = userdata->dof_hdr.dofh_loadsz;
>  
> +		/*
> +		 * If the data being read in is too large, read by chunks.  We
> +		 * cannot determine the chunk size we can use: the first failure
> +		 * returns -ENOMEM to the caller of ioctl().
> +		 */
> +		if (userdata->dof_hdr.dofh_loadsz > DOF_CHUNKSZ) {
> +			by_chunks = 1;
> +
> +			if (userdata->state == DTP_IOCTL_DOFHDR) {
> +				userdata->buf = malloc(userdata->dof_hdr.dofh_loadsz);
> +				in.iov_len = DOF_CHUNKSZ;
> +
> +				if (userdata->buf == NULL) {
> +					errmsg = "out of memory allocating DOF";
> +					goto fuse_errmsg;
> +				}
> +			} else {
> +				ssize_t remaining;
> +
> +				remaining = userdata->dof_hdr.dofh_loadsz -
> +					    userdata->next_chunk;
> +
> +				if (remaining < 0)
> +					remaining = 0;
> +
> +				/*
> +				 * We've already read a chunk: preserve it and
> +				 * go on to the next, until we are out.
> +				 */
> +
> +				memcpy(userdata->buf + userdata->next_chunk, in_buf,
> +				       MIN(in_bufsz, remaining));
> +
> +				userdata->next_chunk += in_bufsz;
> +				remaining -= in_bufsz;
> +
> +				if (remaining <= 0) {
> +					userdata->state = DTP_IOCTL_DOF;
> +					goto chunks_done;
> +				}
> +
> +				in.iov_base = (char *) in.iov_base + userdata->next_chunk;
> +				in.iov_len = MIN(DOF_CHUNKSZ, remaining);
> +			}
> +		}
> +
>  		errmsg = "cannot read DOF";
>  		if (fuse_reply_ioctl_retry(req, &in, 1, NULL, 0) < 0)
>  			goto fuse_errmsg;
> -		userdata->state = DTP_IOCTL_DOF;
> +
> +		if (by_chunks)
> +			userdata->state = DTP_IOCTL_DOFCHUNK;
> +		else
> +			userdata->state = DTP_IOCTL_DOF;
>  		return;
>  	}
>  
> +  chunks_done:
>  	if (userdata->state != DTP_IOCTL_DOF) {
>  		errmsg = "FUSE internal state incorrect";
>  		goto fuse_errmsg;
> @@ -463,13 +539,20 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
>  	 * to unblock the ioctl() caller and return to start state.
>  	 */
>  
> +	buf = in_buf;
> +	if (userdata->buf)
> +		buf = userdata->buf;
> +
>  	if (process_dof(req, parser_in_pipe[1], parser_out_pipe[0], pid,
> -			&userdata->dh, in_buf) < 0)
> +			&userdata->dh, buf) < 0)
>  		goto process_err;
>  
>  	if (fuse_reply_ioctl(req, 0, NULL, 0) < 0)
>  		goto process_err;
>  
> +	free(userdata->buf);
> +	userdata->buf = NULL;
> +
>  	userdata->state = DTP_IOCTL_START;
>  
>  	return;
> @@ -482,6 +565,8 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
>  		fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s\n", pid,
>  			 "cannot send error to ioctl caller: %s",
>  			errmsg);
> +	free(userdata->buf);
> +	userdata->buf = NULL;
>  	userdata->state = DTP_IOCTL_START;
>  	return;
>  
> @@ -489,6 +574,8 @@ helper_ioctl(fuse_req_t req, int cmd, void *arg,
>  	if (fuse_reply_err(req, EINVAL) < 0)
>  		fuse_log(FUSE_LOG_ERR, "%i: cannot unblock caller\n",
>  			 pid);
> +	free(userdata->buf);
> +	userdata->buf = NULL;
>  	userdata->state = DTP_IOCTL_START;
>  	return;
>  }
> diff --git a/test/unittest/usdt/tst.manyprobes.r b/test/unittest/usdt/tst.manyprobes.r
> new file mode 100644
> index 0000000000000..5c128019946ac
> --- /dev/null
> +++ b/test/unittest/usdt/tst.manyprobes.r
> @@ -0,0 +1,4 @@
> +test:main:test1
> +test:main:test750
> +test:main:test1999
> +
> diff --git a/test/unittest/usdt/tst.manyprobes.sh b/test/unittest/usdt/tst.manyprobes.sh
> new file mode 100755
> index 0000000000000..0f3fcb2b50fd9
> --- /dev/null
> +++ b/test/unittest/usdt/tst.manyprobes.sh
> @@ -0,0 +1,77 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +if [ $# != 1 ]; then
> +	echo expected one argument: '<'dtrace-path'>'
> +	exit 2
> +fi
> +
> +dtrace=$1
> +CC=/usr/bin/gcc
> +CFLAGS="-I${PWD}/uts/common"
> +DIRNAME="$tmpdir/usdt-manyprobes.$$.$RANDOM"
> +mkdir -p $DIRNAME
> +cd $DIRNAME
> +
> +gen_test()
> +{
> +    local -i n=$1
> +    echo "#include <sys/sdt.h>"
> +    echo "void main(void) {"
> +    for ((i=0; i<$n; i++)); do
> +      echo 'DTRACE_PROBE1(manyprobes, 'test$i, '"foo");'
> +    done
> +    echo "}"
> +}
> +
> +gen_provider()
> +{
> +    local -i n=$1
> +    echo "provider manyprobes {"
> +    for ((i=0; i<$n; i++)); do
> +      echo "probe test$i(string);"
> +    done
> +    echo "};"
> +}
> +
> +nprobes=2000
> +gen_test $nprobes > test.c
> +gen_provider $nprobes > manyprobes.d
> +
> +${CC} ${CFLAGS} -c test.c
> +if [ $? -ne 0 ]; then
> +	echo "failed to compile test.c" >& 2
> +	exit 1
> +fi
> +$dtrace -G -s manyprobes.d test.o
> +if [ $? -ne 0 ]; then
> +	echo "failed to create DOF" >& 2
> +	exit 1
> +fi
> +${CC} ${CFLAGS} -o test test.o manyprobes.o
> +if [ $? -ne 0 ]; then
> +	echo "failed to link final executable" >& 2
> +	exit 1
> +fi
> +
> +script()
> +{
> +	$dtrace -c ./test -qs /dev/stdin <<EOF
> +	manyprobes\$target:::test1, manyprobes\$target:::test750, manyprobes\$target:::test1999
> +	{
> +		printf("%s:%s:%s\n", probemod, probefunc, probename);
> +	}
> +EOF
> +}
> +
> +script
> +status=$?
> +
> +# Temporary, until autocleanup is implemented
> +echo - > /sys/kernel/tracing/uprobe_events 2>/dev/null
> +
> +exit $status
> -- 
> 2.39.1.268.g9de2f9a303
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list