[DTrace-devel] [PATCH v4 09/19] Use usdt_prids map to call clauses conditionally for USDT probes

eugene.loh at oracle.com eugene.loh at oracle.com
Wed Oct 16 16:01:38 UTC 2024


From: Eugene Loh <eugene.loh at oracle.com>

This version supports only up to 64 clauses for an underlying
probe, but it can be extended to more clauses.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_prov_uprobe.c            | 166 ++++++++++++++++++-------
 test/unittest/usdt/tst.nusdtprobes.r  |   5 +
 test/unittest/usdt/tst.nusdtprobes.sh | 172 ++++++++++++++++++++++++++
 3 files changed, 300 insertions(+), 43 deletions(-)
 create mode 100644 test/unittest/usdt/tst.nusdtprobes.r
 create mode 100755 test/unittest/usdt/tst.nusdtprobes.sh

diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
index b010d8687..268708179 100644
--- a/libdtrace/dt_prov_uprobe.c
+++ b/libdtrace/dt_prov_uprobe.c
@@ -739,11 +739,16 @@ static void enable_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
  */
 static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
 {
+	dtrace_hdl_t		*dtp = pcb->pcb_hdl;
 	dt_irlist_t		*dlp = &pcb->pcb_ir;
 	const dt_probe_t	*uprp = pcb->pcb_probe;
 	const dt_uprobe_t	*upp = uprp->prv_data;
 	const list_probe_t	*pop;
 	uint_t			lbl_exit = pcb->pcb_exitlbl;
+	dt_ident_t		*usdt_prids = dt_dlib_get_map(dtp, "usdt_prids");
+	int			n;
+
+	assert(usdt_prids != NULL);
 
 	dt_cg_tramp_prologue(pcb);                             // call this only once... is PRID set/relocated correctly?
 
@@ -755,14 +760,17 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
 	dt_cg_tramp_copy_regs(pcb);                            // call this only once for all clauses?
 
 	/*
-	 * Loop over overlying probes, calling clauses for those that match:
+	 * pid probes.
+	 *
+	 * Loop over overlying pid probes, calling clauses for those that match:
 	 *
-	 *	for overlying probes (that match except possibly for pid)
+	 *	for overlying pid probes (that match except possibly for pid)
 	 *		if (pid matches) {
 	 *			dctx->mst->prid = PRID1;
 	 *			< any number of clause calls >
 	 *		}
 	 */
+
 	for (pop = dt_list_next(&upp->probes); pop != NULL;
 	     pop = dt_list_next(pop)) {
 		const dt_probe_t	*prp = pop->probe;
@@ -770,6 +778,9 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
 		pid_t			pid;
 		dt_ident_t		*idp;
 
+		if (prp->prov->impl != &dt_pid)
+			continue;
+
 		pid = dt_pid_get_pid(prp->desc, pcb->pcb_hdl, pcb, NULL);
 		assert(pid != -1);
 
@@ -801,6 +812,93 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
 			   BPF_NOP());
 	}
 
+	/*
+	 * USDT
+	 */
+
+	/* In some cases, we know there are no USDT probes. */  // FIXME: add more checks
+	if (upp->flags & PP_IS_RETURN)
+		goto out;
+
+	dt_cg_tramp_copy_args_from_regs(pcb, 0);
+
+	/*
+	 * Retrieve the PID of the process that caused the probe to fire.
+	 */
+	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_get_current_pid_tgid));
+	emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
+
+	/*
+	 * Look up in the BPF 'usdt_prids' map.  Space for the look-up key
+	 * will be used on the BPF stack:
+	 *
+	 *     offset                                       value
+	 *
+	 *     -sizeof(usdt_prids_map_key_t)                pid (in %r0)
+	 *
+	 *     -sizeof(usdt_prids_map_key_t) + sizeof(pid_t)
+	 *     ==
+	 *     -sizeof(dtrace_id_t)                         underlying-probe prid
+	 */
+	emit(dlp,  BPF_STORE(BPF_W, BPF_REG_9, (int)(-sizeof(usdt_prids_map_key_t)), BPF_REG_0));
+	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_9, (int)(-sizeof(dtrace_id_t)), uprp->desc->id));
+	dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
+	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (int)(-sizeof(usdt_prids_map_key_t))));
+	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
+
+	/* Read the PRID from the table lookup and store to mst->prid. */
+	emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
+	emit(dlp,  BPF_STORE(BPF_W, BPF_REG_7, DMST_PRID, BPF_REG_1));
+
+	/* Read the bit mask from the table lookup in %r6. */    // FIXME someday, extend this past 64 bits
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_0, offsetof(usdt_prids_map_val_t, mask)));
+
+	/*
+	 * Hold the bit mask in %r6 between clause calls.
+	 */
+	for (n = 0; n < dtp->dt_stmt_nextid; n++) {
+		dtrace_stmtdesc_t *stp;
+		dt_ident_t	*idp;
+		uint_t		lbl_next = dt_irlist_label(dlp);
+
+		stp = dtp->dt_stmts[n];
+		if (stp == NULL)
+			continue;
+
+		idp = stp->dtsd_clause;
+
+		/* If the lowest %r6 bit is 0, skip over this clause. */
+		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_6));
+		emit(dlp,  BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 1));
+		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, lbl_next));
+
+		/*
+		 *      if (*dctx.act != act)   // ldw %r0, [%r9 + DCTX_ACT]
+		 *	      goto exit;      // ldw %r0, [%r0 + 0]
+		 *			      // jne %r0, act, lbl_exit
+		 */
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DCTX_ACT));
+		emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_0, 0));
+		emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, DT_ACTIVITY_ACTIVE, lbl_exit));
+
+		/* dctx.mst->scratch_top = 8 */
+		emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_SCRATCH_TOP, 8));
+
+		/* Call clause. */
+		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_9));
+		emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
+
+		/* Finished this clause. */
+		emitl(dlp, lbl_next,
+			   BPF_NOP());
+
+		/* Right-shift %r6. */
+		emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 1));
+	}
+
+out:
 	dt_cg_tramp_return(pcb);
 
 	return 0;
@@ -846,10 +944,9 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
 {
 	dt_irlist_t		*dlp = &pcb->pcb_ir;
 	const dt_probe_t	*uprp = pcb->pcb_probe;
-	const dt_uprobe_t	*upp = uprp->prv_data;
-	const list_probe_t	*pop;
-	uint_t			lbl_assign = dt_irlist_label(dlp);
-	uint_t			lbl_exit = pcb->pcb_exitlbl;
+	dt_ident_t		*usdt_prids = dt_dlib_get_map(pcb->pcb_hdl, "usdt_prids");
+
+	assert(usdt_prids != NULL);
 
 	dt_cg_tramp_prologue(pcb);
 
@@ -858,7 +955,6 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
 	 *				//     (%r7 = dctx->mst)
 	 *				//     (%r8 = dctx->ctx)
 	 */
-
 	dt_cg_tramp_copy_regs(pcb);
 
 	/*
@@ -876,46 +972,30 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
 	emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
 
 	/*
-	 * Generate a composite conditional clause, as above, except that rather
-	 * than emitting call_clauses, we emit copyouts instead, using
-	 * copyout_val() above:
+	 * Look up in the BPF 'usdt_prids' map.  Space for the look-up key
+	 * will be used on the BPF stack:
 	 *
-	 *	if (pid == PID1) {
-	 *		goto assign;
-	 *	} else if (pid == PID2) {
-	 *		goto assign;
-	 *	} else if (pid == ...) {
-	 *		goto assign;
-	 *	}
-	 *	goto exit;
-	 *	assign:
-	 *	    *arg0 = 1;
-	 *	goto exit;
+	 *     offset                                       value
 	 *
-	 * It is valid and safe to use %r0 to hold the pid value because there
-	 * are no assignments to %r0 possible in between the conditional
-	 * statements.
+	 *     -sizeof(usdt_prids_map_key_t)                pid (in %r0)
+	 *
+	 *     -sizeof(usdt_prids_map_key_t) + sizeof(pid_t)
+	 *     ==
+	 *     -sizeof(dtrace_id_t)                         underlying-probe prid
 	 */
-	for (pop = dt_list_next(&upp->probes); pop != NULL;
-	     pop = dt_list_next(pop)) {
-		const dt_probe_t	*prp = pop->probe;
-		pid_t			pid;
-		dt_ident_t		*idp;
+	emit(dlp,  BPF_STORE(BPF_W, BPF_REG_9, (int)(-sizeof(usdt_prids_map_key_t)), BPF_REG_0));
+	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_9, (int)(-sizeof(dtrace_id_t)), uprp->desc->id));
+	dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
+	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (int)(-sizeof(usdt_prids_map_key_t))));
+	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
 
-		pid = dt_pid_get_pid(prp->desc, pcb->pcb_hdl, pcb, NULL);
-		assert(pid != -1);
-
-		idp = dt_dlib_add_probe_var(pcb->pcb_hdl, prp);
-		assert(idp != NULL);
-
-		/*
-		 * Check whether this pid-provider probe serves the current
-		 * process, and copy out a 1 into arg 0 if so.
-		 */
-		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, pid, lbl_assign));
-	}
-	emit(dlp,  BPF_JUMP(lbl_exit));
-	copyout_val(pcb, lbl_assign, 1, 0);
+	/*
+	 * If we succeeded, then we use copyout_val() above to assign:
+	 *	    *arg0 = 1;
+	 */
+	copyout_val(pcb, DT_LBL_NONE, 1, 0);
 
 	dt_cg_tramp_return(pcb);
 
diff --git a/test/unittest/usdt/tst.nusdtprobes.r b/test/unittest/usdt/tst.nusdtprobes.r
new file mode 100644
index 000000000..d894af92e
--- /dev/null
+++ b/test/unittest/usdt/tst.nusdtprobes.r
@@ -0,0 +1,5 @@
+try ""
+try "-xnusdtprobes=40"
+try "-xnusdtprobes=39"
+Files check.txt.sorted and dtrace.out.sorted differ
+success
diff --git a/test/unittest/usdt/tst.nusdtprobes.sh b/test/unittest/usdt/tst.nusdtprobes.sh
new file mode 100755
index 000000000..50f18a6ca
--- /dev/null
+++ b/test/unittest/usdt/tst.nusdtprobes.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+# This test verifies the nusdtprobes option.
+# @@timeout: 100
+
+dtrace=$1
+
+# Set up test directory.
+
+DIRNAME=$tmpdir/nusdtprobes.$$.$RANDOM
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+# Make the trigger.
+
+cat << EOF > prov.d
+provider testprov {
+	probe foo0();
+	probe foo1();
+	probe foo2();
+	probe foo3();
+	probe foo4();
+	probe foo5();
+	probe foo6();
+	probe foo7();
+	probe foo8();
+	probe foo9();
+};
+EOF
+
+cat << EOF > main.c
+#include <unistd.h>
+#include "prov.h"
+
+int
+main(int argc, char **argv)
+{
+	while (1) {
+		usleep(1000);
+
+		TESTPROV_FOO0();
+		TESTPROV_FOO1();
+		TESTPROV_FOO2();
+		TESTPROV_FOO3();
+		TESTPROV_FOO4();
+		TESTPROV_FOO5();
+		TESTPROV_FOO6();
+		TESTPROV_FOO7();
+		TESTPROV_FOO8();
+		TESTPROV_FOO9();
+	}
+
+	return 0;
+}
+EOF
+
+# Build the trigger.
+
+$dtrace -h -s prov.d
+if [ $? -ne 0 ]; then
+	echo "failed to generate header file" >&2
+	exit 1
+fi
+cc $test_cppflags -c main.c
+if [ $? -ne 0 ]; then
+	echo "failed to compile test" >&2
+	exit 1
+fi
+$dtrace -G -64 -s prov.d main.o
+if [ $? -ne 0 ]; then
+	echo "failed to create DOF" >&2
+	exit 1
+fi
+cc $test_cppflags -o main main.o prov.o
+if [ $? -ne 0 ]; then
+	echo "failed to link final executable" >&2
+	exit 1
+fi
+
+# Test nusdtprobes settings.
+#
+# We will start teams of processes, each with 4 members, each in turn
+# with 10 USDT probes.  So, regardless of how many teams are run in
+# succession, at any one time DTrace needs room for at least 40 USDT
+# probes.  The default and -xnusdtprobes=40 settings should work, but
+# -xnusdtprobes=39 should not.
+nteams=2
+nmmbrs=4
+
+for nusdt in "" "-xnusdtprobes=40" "-xnusdtprobes=39"; do
+
+	echo try '"'$nusdt'"'
+
+	# Start DTrace.
+
+	rm -f dtrace.out
+	$dtrace $dt_flags $nusdt -Zq -o dtrace.out -n '
+	testprov*:::
+	{
+		@[probeprov, probemod, probefunc, probename] = count();
+	}' &
+	dtpid=$!
+	sleep 2
+	if [[ ! -d /proc/$dtpid ]]; then
+		echo ERROR dtrace died
+		cat dtrace.out
+		exit 1
+	fi
+
+	# Start teams of processes, only one team at a time.
+
+	rm -f check.txt
+	for (( iteam = 0; iteam < $nteams; iteam++ )); do
+		# Start the team, writing out expected output.
+		sleep 2
+		for (( immbr = 0; immbr < $nmmbrs; immbr++ )); do
+			./main &
+			pids[$immbr]=$!
+			for j in `seq 0 9`; do
+				echo testprov${pids[$immbr]} main main foo$j >> check.txt
+			done
+		done
+
+		# Kill the team.
+		sleep 3
+		for (( immbr = 0; immbr < $nmmbrs; immbr++ )); do
+			kill ${pids[$immbr]}
+		done
+	done
+
+	# Kill DTrace.
+
+	kill $dtpid
+	wait
+
+	# Strip the count() value out since we do not know its exact value.
+
+	awk 'NF == 5 { print $1, $2, $3, $4 }' dtrace.out | sort > dtrace.out.sorted
+
+	# Check.
+
+	sort check.txt > check.txt.sorted
+	if [ x$nusdt == x"-xnusdtprobes=39" ]; then
+		# Results should not agree with check.txt.
+		if diff -q check.txt.sorted dtrace.out.sorted; then
+			echo ERROR unexpected agreement
+			cat dtrace.out
+			exit 0
+		fi
+	else
+		# Results should agree with check.txt.
+		if ! diff -q check.txt.sorted dtrace.out.sorted; then
+			echo ERROR output disagrees
+			echo === expected ===
+			cat check.txt.sorted
+			echo === got ===
+			cat dtrace.out.sorted
+			echo === diff ===
+			diff check.txt.sorted dtrace.out.sorted
+			exit 1
+		fi
+	fi
+done
+
+echo success
+
+exit 0
-- 
2.43.5




More information about the DTrace-devel mailing list