[DTrace-devel] [PATCH v4 09/19] Use usdt_prids map to call clauses conditionally for USDT probes

Kris Van Hees kris.van.hees at oracle.com
Thu Oct 24 14:21:55 UTC 2024


On Wed, Oct 16, 2024 at 12:01:38PM -0400, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> This version supports only up to 64 clauses for an underlying
> probe, but it can be extended to more clauses.
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>

Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

... with a very tiny pedantic comment and some comment about future work.

> ---
>  libdtrace/dt_prov_uprobe.c            | 166 ++++++++++++++++++-------
>  test/unittest/usdt/tst.nusdtprobes.r  |   5 +
>  test/unittest/usdt/tst.nusdtprobes.sh | 172 ++++++++++++++++++++++++++
>  3 files changed, 300 insertions(+), 43 deletions(-)
>  create mode 100644 test/unittest/usdt/tst.nusdtprobes.r
>  create mode 100755 test/unittest/usdt/tst.nusdtprobes.sh
> 
> diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
> index b010d8687..268708179 100644
> --- a/libdtrace/dt_prov_uprobe.c
> +++ b/libdtrace/dt_prov_uprobe.c
> @@ -739,11 +739,16 @@ static void enable_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
>   */
>  static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
>  {
> +	dtrace_hdl_t		*dtp = pcb->pcb_hdl;
>  	dt_irlist_t		*dlp = &pcb->pcb_ir;
>  	const dt_probe_t	*uprp = pcb->pcb_probe;
>  	const dt_uprobe_t	*upp = uprp->prv_data;
>  	const list_probe_t	*pop;
>  	uint_t			lbl_exit = pcb->pcb_exitlbl;
> +	dt_ident_t		*usdt_prids = dt_dlib_get_map(dtp, "usdt_prids");
> +	int			n;
> +
> +	assert(usdt_prids != NULL);
>  
>  	dt_cg_tramp_prologue(pcb);                             // call this only once... is PRID set/relocated correctly?
>  
> @@ -755,14 +760,17 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
>  	dt_cg_tramp_copy_regs(pcb);                            // call this only once for all clauses?
>  
>  	/*
> -	 * Loop over overlying probes, calling clauses for those that match:
> +	 * pid probes.
> +	 *
> +	 * Loop over overlying pid probes, calling clauses for those that match:
>  	 *
> -	 *	for overlying probes (that match except possibly for pid)
> +	 *	for overlying pid probes (that match except possibly for pid)
>  	 *		if (pid matches) {
>  	 *			dctx->mst->prid = PRID1;
>  	 *			< any number of clause calls >
>  	 *		}
>  	 */
> +

Pedantic - but no empty line is needed here.

>  	for (pop = dt_list_next(&upp->probes); pop != NULL;
>  	     pop = dt_list_next(pop)) {
>  		const dt_probe_t	*prp = pop->probe;
> @@ -770,6 +778,9 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
>  		pid_t			pid;
>  		dt_ident_t		*idp;
>  
> +		if (prp->prov->impl != &dt_pid)
> +			continue;

Stuff for future: put pid probes and USDT probes in separate lists so you
don't need to have a conditional like this.

> +
>  		pid = dt_pid_get_pid(prp->desc, pcb->pcb_hdl, pcb, NULL);
>  		assert(pid != -1);
>  
> @@ -801,6 +812,93 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
>  			   BPF_NOP());
>  	}
>  
> +	/*
> +	 * USDT
> +	 */
> +
> +	/* In some cases, we know there are no USDT probes. */  // FIXME: add more checks
> +	if (upp->flags & PP_IS_RETURN)
> +		goto out;
> +
> +	dt_cg_tramp_copy_args_from_regs(pcb, 0);
> +
> +	/*
> +	 * Retrieve the PID of the process that caused the probe to fire.
> +	 */
> +	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_get_current_pid_tgid));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
> +
> +	/*
> +	 * Look up in the BPF 'usdt_prids' map.  Space for the look-up key
> +	 * will be used on the BPF stack:
> +	 *
> +	 *     offset                                       value
> +	 *
> +	 *     -sizeof(usdt_prids_map_key_t)                pid (in %r0)
> +	 *
> +	 *     -sizeof(usdt_prids_map_key_t) + sizeof(pid_t)
> +	 *     ==
> +	 *     -sizeof(dtrace_id_t)                         underlying-probe prid
> +	 */
> +	emit(dlp,  BPF_STORE(BPF_W, BPF_REG_9, (int)(-sizeof(usdt_prids_map_key_t)), BPF_REG_0));
> +	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_9, (int)(-sizeof(dtrace_id_t)), uprp->desc->id));
> +	dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (int)(-sizeof(usdt_prids_map_key_t))));
> +	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
> +
> +	/* Read the PRID from the table lookup and store to mst->prid. */
> +	emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
> +	emit(dlp,  BPF_STORE(BPF_W, BPF_REG_7, DMST_PRID, BPF_REG_1));
> +
> +	/* Read the bit mask from the table lookup in %r6. */    // FIXME someday, extend this past 64 bits
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_0, offsetof(usdt_prids_map_val_t, mask)));
> +
> +	/*
> +	 * Hold the bit mask in %r6 between clause calls.
> +	 */
> +	for (n = 0; n < dtp->dt_stmt_nextid; n++) {
> +		dtrace_stmtdesc_t *stp;
> +		dt_ident_t	*idp;
> +		uint_t		lbl_next = dt_irlist_label(dlp);
> +
> +		stp = dtp->dt_stmts[n];
> +		if (stp == NULL)
> +			continue;
> +
> +		idp = stp->dtsd_clause;
> +
> +		/* If the lowest %r6 bit is 0, skip over this clause. */
> +		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_6));
> +		emit(dlp,  BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 1));
> +		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, lbl_next));
> +
> +		/*
> +		 *      if (*dctx.act != act)   // ldw %r0, [%r9 + DCTX_ACT]
> +		 *	      goto exit;      // ldw %r0, [%r0 + 0]
> +		 *			      // jne %r0, act, lbl_exit
> +		 */
> +		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DCTX_ACT));
> +		emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_0, 0));
> +		emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, DT_ACTIVITY_ACTIVE, lbl_exit));
> +
> +		/* dctx.mst->scratch_top = 8 */
> +		emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_SCRATCH_TOP, 8));
> +
> +		/* Call clause. */
> +		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_9));
> +		emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> +
> +		/* Finished this clause. */
> +		emitl(dlp, lbl_next,
> +			   BPF_NOP());
> +
> +		/* Right-shift %r6. */
> +		emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 1));
> +	}
> +
> +out:
>  	dt_cg_tramp_return(pcb);
>  
>  	return 0;
> @@ -846,10 +944,9 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
>  {
>  	dt_irlist_t		*dlp = &pcb->pcb_ir;
>  	const dt_probe_t	*uprp = pcb->pcb_probe;
> -	const dt_uprobe_t	*upp = uprp->prv_data;
> -	const list_probe_t	*pop;
> -	uint_t			lbl_assign = dt_irlist_label(dlp);
> -	uint_t			lbl_exit = pcb->pcb_exitlbl;
> +	dt_ident_t		*usdt_prids = dt_dlib_get_map(pcb->pcb_hdl, "usdt_prids");
> +
> +	assert(usdt_prids != NULL);
>  
>  	dt_cg_tramp_prologue(pcb);
>  
> @@ -858,7 +955,6 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
>  	 *				//     (%r7 = dctx->mst)
>  	 *				//     (%r8 = dctx->ctx)
>  	 */
> -
>  	dt_cg_tramp_copy_regs(pcb);
>  
>  	/*
> @@ -876,46 +972,30 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
>  	emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
>  
>  	/*
> -	 * Generate a composite conditional clause, as above, except that rather
> -	 * than emitting call_clauses, we emit copyouts instead, using
> -	 * copyout_val() above:
> +	 * Look up in the BPF 'usdt_prids' map.  Space for the look-up key
> +	 * will be used on the BPF stack:
>  	 *
> -	 *	if (pid == PID1) {
> -	 *		goto assign;
> -	 *	} else if (pid == PID2) {
> -	 *		goto assign;
> -	 *	} else if (pid == ...) {
> -	 *		goto assign;
> -	 *	}
> -	 *	goto exit;
> -	 *	assign:
> -	 *	    *arg0 = 1;
> -	 *	goto exit;
> +	 *     offset                                       value
>  	 *
> -	 * It is valid and safe to use %r0 to hold the pid value because there
> -	 * are no assignments to %r0 possible in between the conditional
> -	 * statements.
> +	 *     -sizeof(usdt_prids_map_key_t)                pid (in %r0)
> +	 *
> +	 *     -sizeof(usdt_prids_map_key_t) + sizeof(pid_t)
> +	 *     ==
> +	 *     -sizeof(dtrace_id_t)                         underlying-probe prid
>  	 */
> -	for (pop = dt_list_next(&upp->probes); pop != NULL;
> -	     pop = dt_list_next(pop)) {
> -		const dt_probe_t	*prp = pop->probe;
> -		pid_t			pid;
> -		dt_ident_t		*idp;
> +	emit(dlp,  BPF_STORE(BPF_W, BPF_REG_9, (int)(-sizeof(usdt_prids_map_key_t)), BPF_REG_0));
> +	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_9, (int)(-sizeof(dtrace_id_t)), uprp->desc->id));
> +	dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (int)(-sizeof(usdt_prids_map_key_t))));
> +	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
>  
> -		pid = dt_pid_get_pid(prp->desc, pcb->pcb_hdl, pcb, NULL);
> -		assert(pid != -1);
> -
> -		idp = dt_dlib_add_probe_var(pcb->pcb_hdl, prp);
> -		assert(idp != NULL);
> -
> -		/*
> -		 * Check whether this pid-provider probe serves the current
> -		 * process, and copy out a 1 into arg 0 if so.
> -		 */
> -		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, pid, lbl_assign));
> -	}
> -	emit(dlp,  BPF_JUMP(lbl_exit));
> -	copyout_val(pcb, lbl_assign, 1, 0);
> +	/*
> +	 * If we succeeded, then we use copyout_val() above to assign:
> +	 *	    *arg0 = 1;
> +	 */
> +	copyout_val(pcb, DT_LBL_NONE, 1, 0);
>  
>  	dt_cg_tramp_return(pcb);
>  
> diff --git a/test/unittest/usdt/tst.nusdtprobes.r b/test/unittest/usdt/tst.nusdtprobes.r
> new file mode 100644
> index 000000000..d894af92e
> --- /dev/null
> +++ b/test/unittest/usdt/tst.nusdtprobes.r
> @@ -0,0 +1,5 @@
> +try ""
> +try "-xnusdtprobes=40"
> +try "-xnusdtprobes=39"
> +Files check.txt.sorted and dtrace.out.sorted differ
> +success
> diff --git a/test/unittest/usdt/tst.nusdtprobes.sh b/test/unittest/usdt/tst.nusdtprobes.sh
> new file mode 100755
> index 000000000..50f18a6ca
> --- /dev/null
> +++ b/test/unittest/usdt/tst.nusdtprobes.sh
> @@ -0,0 +1,172 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +# This test verifies the nusdtprobes option.
> +# @@timeout: 100
> +
> +dtrace=$1
> +
> +# Set up test directory.
> +
> +DIRNAME=$tmpdir/nusdtprobes.$$.$RANDOM
> +mkdir -p $DIRNAME
> +cd $DIRNAME
> +
> +# Make the trigger.
> +
> +cat << EOF > prov.d
> +provider testprov {
> +	probe foo0();
> +	probe foo1();
> +	probe foo2();
> +	probe foo3();
> +	probe foo4();
> +	probe foo5();
> +	probe foo6();
> +	probe foo7();
> +	probe foo8();
> +	probe foo9();
> +};
> +EOF
> +
> +cat << EOF > main.c
> +#include <unistd.h>
> +#include "prov.h"
> +
> +int
> +main(int argc, char **argv)
> +{
> +	while (1) {
> +		usleep(1000);
> +
> +		TESTPROV_FOO0();
> +		TESTPROV_FOO1();
> +		TESTPROV_FOO2();
> +		TESTPROV_FOO3();
> +		TESTPROV_FOO4();
> +		TESTPROV_FOO5();
> +		TESTPROV_FOO6();
> +		TESTPROV_FOO7();
> +		TESTPROV_FOO8();
> +		TESTPROV_FOO9();
> +	}
> +
> +	return 0;
> +}
> +EOF
> +
> +# Build the trigger.
> +
> +$dtrace -h -s prov.d
> +if [ $? -ne 0 ]; then
> +	echo "failed to generate header file" >&2
> +	exit 1
> +fi
> +cc $test_cppflags -c main.c
> +if [ $? -ne 0 ]; then
> +	echo "failed to compile test" >&2
> +	exit 1
> +fi
> +$dtrace -G -64 -s prov.d main.o
> +if [ $? -ne 0 ]; then
> +	echo "failed to create DOF" >&2
> +	exit 1
> +fi
> +cc $test_cppflags -o main main.o prov.o
> +if [ $? -ne 0 ]; then
> +	echo "failed to link final executable" >&2
> +	exit 1
> +fi
> +
> +# Test nusdtprobes settings.
> +#
> +# We will start teams of processes, each with 4 members, each in turn
> +# with 10 USDT probes.  So, regardless of how many teams are run in
> +# succession, at any one time DTrace needs room for at least 40 USDT
> +# probes.  The default and -xnusdtprobes=40 settings should work, but
> +# -xnusdtprobes=39 should not.
> +nteams=2
> +nmmbrs=4
> +
> +for nusdt in "" "-xnusdtprobes=40" "-xnusdtprobes=39"; do
> +
> +	echo try '"'$nusdt'"'
> +
> +	# Start DTrace.
> +
> +	rm -f dtrace.out
> +	$dtrace $dt_flags $nusdt -Zq -o dtrace.out -n '
> +	testprov*:::
> +	{
> +		@[probeprov, probemod, probefunc, probename] = count();
> +	}' &
> +	dtpid=$!
> +	sleep 2
> +	if [[ ! -d /proc/$dtpid ]]; then
> +		echo ERROR dtrace died
> +		cat dtrace.out
> +		exit 1
> +	fi
> +
> +	# Start teams of processes, only one team at a time.
> +
> +	rm -f check.txt
> +	for (( iteam = 0; iteam < $nteams; iteam++ )); do
> +		# Start the team, writing out expected output.
> +		sleep 2
> +		for (( immbr = 0; immbr < $nmmbrs; immbr++ )); do
> +			./main &
> +			pids[$immbr]=$!
> +			for j in `seq 0 9`; do
> +				echo testprov${pids[$immbr]} main main foo$j >> check.txt
> +			done
> +		done
> +
> +		# Kill the team.
> +		sleep 3
> +		for (( immbr = 0; immbr < $nmmbrs; immbr++ )); do
> +			kill ${pids[$immbr]}
> +		done
> +	done
> +
> +	# Kill DTrace.
> +
> +	kill $dtpid
> +	wait
> +
> +	# Strip the count() value out since we do not know its exact value.
> +
> +	awk 'NF == 5 { print $1, $2, $3, $4 }' dtrace.out | sort > dtrace.out.sorted
> +
> +	# Check.
> +
> +	sort check.txt > check.txt.sorted
> +	if [ x$nusdt == x"-xnusdtprobes=39" ]; then
> +		# Results should not agree with check.txt.
> +		if diff -q check.txt.sorted dtrace.out.sorted; then
> +			echo ERROR unexpected agreement
> +			cat dtrace.out
> +			exit 0
> +		fi
> +	else
> +		# Results should agree with check.txt.
> +		if ! diff -q check.txt.sorted dtrace.out.sorted; then
> +			echo ERROR output disagrees
> +			echo === expected ===
> +			cat check.txt.sorted
> +			echo === got ===
> +			cat dtrace.out.sorted
> +			echo === diff ===
> +			diff check.txt.sorted dtrace.out.sorted
> +			exit 1
> +		fi
> +	fi
> +done
> +
> +echo success
> +
> +exit 0
> -- 
> 2.43.5
> 



More information about the DTrace-devel mailing list