[DTrace-devel] [PATCH v4 09/19] Use usdt_prids map to call clauses conditionally for USDT probes
eugene.loh at oracle.com
eugene.loh at oracle.com
Wed Oct 16 16:01:38 UTC 2024
From: Eugene Loh <eugene.loh at oracle.com>
This version supports only up to 64 clauses for an underlying
probe, but it can be extended to more clauses.
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
libdtrace/dt_prov_uprobe.c | 166 ++++++++++++++++++-------
test/unittest/usdt/tst.nusdtprobes.r | 5 +
test/unittest/usdt/tst.nusdtprobes.sh | 172 ++++++++++++++++++++++++++
3 files changed, 300 insertions(+), 43 deletions(-)
create mode 100644 test/unittest/usdt/tst.nusdtprobes.r
create mode 100755 test/unittest/usdt/tst.nusdtprobes.sh
diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
index b010d8687..268708179 100644
--- a/libdtrace/dt_prov_uprobe.c
+++ b/libdtrace/dt_prov_uprobe.c
@@ -739,11 +739,16 @@ static void enable_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
*/
static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
{
+ dtrace_hdl_t *dtp = pcb->pcb_hdl;
dt_irlist_t *dlp = &pcb->pcb_ir;
const dt_probe_t *uprp = pcb->pcb_probe;
const dt_uprobe_t *upp = uprp->prv_data;
const list_probe_t *pop;
uint_t lbl_exit = pcb->pcb_exitlbl;
+ dt_ident_t *usdt_prids = dt_dlib_get_map(dtp, "usdt_prids");
+ int n;
+
+ assert(usdt_prids != NULL);
dt_cg_tramp_prologue(pcb); // call this only once... is PRID set/relocated correctly?
@@ -755,14 +760,17 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
dt_cg_tramp_copy_regs(pcb); // call this only once for all clauses?
/*
- * Loop over overlying probes, calling clauses for those that match:
+ * pid probes.
+ *
+ * Loop over overlying pid probes, calling clauses for those that match:
*
- * for overlying probes (that match except possibly for pid)
+ * for overlying pid probes (that match except possibly for pid)
* if (pid matches) {
* dctx->mst->prid = PRID1;
* < any number of clause calls >
* }
*/
+
for (pop = dt_list_next(&upp->probes); pop != NULL;
pop = dt_list_next(pop)) {
const dt_probe_t *prp = pop->probe;
@@ -770,6 +778,9 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
pid_t pid;
dt_ident_t *idp;
+ if (prp->prov->impl != &dt_pid)
+ continue;
+
pid = dt_pid_get_pid(prp->desc, pcb->pcb_hdl, pcb, NULL);
assert(pid != -1);
@@ -801,6 +812,93 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
BPF_NOP());
}
+ /*
+ * USDT
+ */
+
+ /* In some cases, we know there are no USDT probes. */ // FIXME: add more checks
+ if (upp->flags & PP_IS_RETURN)
+ goto out;
+
+ dt_cg_tramp_copy_args_from_regs(pcb, 0);
+
+ /*
+ * Retrieve the PID of the process that caused the probe to fire.
+ */
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_get_current_pid_tgid));
+ emit(dlp, BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
+
+ /*
+ * Look up in the BPF 'usdt_prids' map. Space for the look-up key
+ * will be used on the BPF stack:
+ *
+ * offset value
+ *
+ * -sizeof(usdt_prids_map_key_t) pid (in %r0)
+ *
+ * -sizeof(usdt_prids_map_key_t) + sizeof(pid_t)
+ * ==
+ * -sizeof(dtrace_id_t) underlying-probe prid
+ */
+ emit(dlp, BPF_STORE(BPF_W, BPF_REG_9, (int)(-sizeof(usdt_prids_map_key_t)), BPF_REG_0));
+ emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, (int)(-sizeof(dtrace_id_t)), uprp->desc->id));
+ dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (int)(-sizeof(usdt_prids_map_key_t))));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
+
+ /* Read the PRID from the table lookup and store to mst->prid. */
+ emit(dlp, BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
+ emit(dlp, BPF_STORE(BPF_W, BPF_REG_7, DMST_PRID, BPF_REG_1));
+
+ /* Read the bit mask from the table lookup in %r6. */ // FIXME someday, extend this past 64 bits
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_0, offsetof(usdt_prids_map_val_t, mask)));
+
+ /*
+ * Hold the bit mask in %r6 between clause calls.
+ */
+ for (n = 0; n < dtp->dt_stmt_nextid; n++) {
+ dtrace_stmtdesc_t *stp;
+ dt_ident_t *idp;
+ uint_t lbl_next = dt_irlist_label(dlp);
+
+ stp = dtp->dt_stmts[n];
+ if (stp == NULL)
+ continue;
+
+ idp = stp->dtsd_clause;
+
+ /* If the lowest %r6 bit is 0, skip over this clause. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_6));
+ emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 1));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, lbl_next));
+
+ /*
+ * if (*dctx.act != act) // ldw %r0, [%r9 + DCTX_ACT]
+ * goto exit; // ldw %r0, [%r0 + 0]
+ * // jne %r0, act, lbl_exit
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DCTX_ACT));
+ emit(dlp, BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_0, 0));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, DT_ACTIVITY_ACTIVE, lbl_exit));
+
+ /* dctx.mst->scratch_top = 8 */
+ emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_SCRATCH_TOP, 8));
+
+ /* Call clause. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_9));
+ emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
+
+ /* Finished this clause. */
+ emitl(dlp, lbl_next,
+ BPF_NOP());
+
+ /* Right-shift %r6. */
+ emit(dlp, BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 1));
+ }
+
+out:
dt_cg_tramp_return(pcb);
return 0;
@@ -846,10 +944,9 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
{
dt_irlist_t *dlp = &pcb->pcb_ir;
const dt_probe_t *uprp = pcb->pcb_probe;
- const dt_uprobe_t *upp = uprp->prv_data;
- const list_probe_t *pop;
- uint_t lbl_assign = dt_irlist_label(dlp);
- uint_t lbl_exit = pcb->pcb_exitlbl;
+ dt_ident_t *usdt_prids = dt_dlib_get_map(pcb->pcb_hdl, "usdt_prids");
+
+ assert(usdt_prids != NULL);
dt_cg_tramp_prologue(pcb);
@@ -858,7 +955,6 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
* // (%r7 = dctx->mst)
* // (%r8 = dctx->ctx)
*/
-
dt_cg_tramp_copy_regs(pcb);
/*
@@ -876,46 +972,30 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
emit(dlp, BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
/*
- * Generate a composite conditional clause, as above, except that rather
- * than emitting call_clauses, we emit copyouts instead, using
- * copyout_val() above:
+ * Look up in the BPF 'usdt_prids' map. Space for the look-up key
+ * will be used on the BPF stack:
*
- * if (pid == PID1) {
- * goto assign;
- * } else if (pid == PID2) {
- * goto assign;
- * } else if (pid == ...) {
- * goto assign;
- * }
- * goto exit;
- * assign:
- * *arg0 = 1;
- * goto exit;
+ * offset value
*
- * It is valid and safe to use %r0 to hold the pid value because there
- * are no assignments to %r0 possible in between the conditional
- * statements.
+ * -sizeof(usdt_prids_map_key_t) pid (in %r0)
+ *
+ * -sizeof(usdt_prids_map_key_t) + sizeof(pid_t)
+ * ==
+ * -sizeof(dtrace_id_t) underlying-probe prid
*/
- for (pop = dt_list_next(&upp->probes); pop != NULL;
- pop = dt_list_next(pop)) {
- const dt_probe_t *prp = pop->probe;
- pid_t pid;
- dt_ident_t *idp;
+ emit(dlp, BPF_STORE(BPF_W, BPF_REG_9, (int)(-sizeof(usdt_prids_map_key_t)), BPF_REG_0));
+ emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, (int)(-sizeof(dtrace_id_t)), uprp->desc->id));
+ dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (int)(-sizeof(usdt_prids_map_key_t))));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
- pid = dt_pid_get_pid(prp->desc, pcb->pcb_hdl, pcb, NULL);
- assert(pid != -1);
-
- idp = dt_dlib_add_probe_var(pcb->pcb_hdl, prp);
- assert(idp != NULL);
-
- /*
- * Check whether this pid-provider probe serves the current
- * process, and copy out a 1 into arg 0 if so.
- */
- emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, pid, lbl_assign));
- }
- emit(dlp, BPF_JUMP(lbl_exit));
- copyout_val(pcb, lbl_assign, 1, 0);
+ /*
+ * If we succeeded, then we use copyout_val() above to assign:
+ * *arg0 = 1;
+ */
+ copyout_val(pcb, DT_LBL_NONE, 1, 0);
dt_cg_tramp_return(pcb);
diff --git a/test/unittest/usdt/tst.nusdtprobes.r b/test/unittest/usdt/tst.nusdtprobes.r
new file mode 100644
index 000000000..d894af92e
--- /dev/null
+++ b/test/unittest/usdt/tst.nusdtprobes.r
@@ -0,0 +1,5 @@
+try ""
+try "-xnusdtprobes=40"
+try "-xnusdtprobes=39"
+Files check.txt.sorted and dtrace.out.sorted differ
+success
diff --git a/test/unittest/usdt/tst.nusdtprobes.sh b/test/unittest/usdt/tst.nusdtprobes.sh
new file mode 100755
index 000000000..50f18a6ca
--- /dev/null
+++ b/test/unittest/usdt/tst.nusdtprobes.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+# This test verifies the nusdtprobes option.
+# @@timeout: 100
+
+dtrace=$1
+
+# Set up test directory.
+
+DIRNAME=$tmpdir/nusdtprobes.$$.$RANDOM
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+# Make the trigger.
+
+cat << EOF > prov.d
+provider testprov {
+ probe foo0();
+ probe foo1();
+ probe foo2();
+ probe foo3();
+ probe foo4();
+ probe foo5();
+ probe foo6();
+ probe foo7();
+ probe foo8();
+ probe foo9();
+};
+EOF
+
+cat << EOF > main.c
+#include <unistd.h>
+#include "prov.h"
+
+int
+main(int argc, char **argv)
+{
+ while (1) {
+ usleep(1000);
+
+ TESTPROV_FOO0();
+ TESTPROV_FOO1();
+ TESTPROV_FOO2();
+ TESTPROV_FOO3();
+ TESTPROV_FOO4();
+ TESTPROV_FOO5();
+ TESTPROV_FOO6();
+ TESTPROV_FOO7();
+ TESTPROV_FOO8();
+ TESTPROV_FOO9();
+ }
+
+ return 0;
+}
+EOF
+
+# Build the trigger.
+
+$dtrace -h -s prov.d
+if [ $? -ne 0 ]; then
+ echo "failed to generate header file" >&2
+ exit 1
+fi
+cc $test_cppflags -c main.c
+if [ $? -ne 0 ]; then
+ echo "failed to compile test" >&2
+ exit 1
+fi
+$dtrace -G -64 -s prov.d main.o
+if [ $? -ne 0 ]; then
+ echo "failed to create DOF" >&2
+ exit 1
+fi
+cc $test_cppflags -o main main.o prov.o
+if [ $? -ne 0 ]; then
+ echo "failed to link final executable" >&2
+ exit 1
+fi
+
+# Test nusdtprobes settings.
+#
+# We will start teams of processes, each with 4 members, each in turn
+# with 10 USDT probes. So, regardless of how many teams are run in
+# succession, at any one time DTrace needs room for at least 40 USDT
+# probes. The default and -xnusdtprobes=40 settings should work, but
+# -xnusdtprobes=39 should not.
+nteams=2
+nmmbrs=4
+
+for nusdt in "" "-xnusdtprobes=40" "-xnusdtprobes=39"; do
+
+ echo try '"'$nusdt'"'
+
+ # Start DTrace.
+
+ rm -f dtrace.out
+ $dtrace $dt_flags $nusdt -Zq -o dtrace.out -n '
+ testprov*:::
+ {
+ @[probeprov, probemod, probefunc, probename] = count();
+ }' &
+ dtpid=$!
+ sleep 2
+ if [[ ! -d /proc/$dtpid ]]; then
+ echo ERROR dtrace died
+ cat dtrace.out
+ exit 1
+ fi
+
+ # Start teams of processes, only one team at a time.
+
+ rm -f check.txt
+ for (( iteam = 0; iteam < $nteams; iteam++ )); do
+ # Start the team, writing out expected output.
+ sleep 2
+ for (( immbr = 0; immbr < $nmmbrs; immbr++ )); do
+ ./main &
+ pids[$immbr]=$!
+ for j in `seq 0 9`; do
+ echo testprov${pids[$immbr]} main main foo$j >> check.txt
+ done
+ done
+
+ # Kill the team.
+ sleep 3
+ for (( immbr = 0; immbr < $nmmbrs; immbr++ )); do
+ kill ${pids[$immbr]}
+ done
+ done
+
+ # Kill DTrace.
+
+ kill $dtpid
+ wait
+
+ # Strip the count() value out since we do not know its exact value.
+
+ awk 'NF == 5 { print $1, $2, $3, $4 }' dtrace.out | sort > dtrace.out.sorted
+
+ # Check.
+
+ sort check.txt > check.txt.sorted
+ if [ x$nusdt == x"-xnusdtprobes=39" ]; then
+ # Results should not agree with check.txt.
+ if diff -q check.txt.sorted dtrace.out.sorted; then
+ echo ERROR unexpected agreement
+ cat dtrace.out
+ exit 0
+ fi
+ else
+ # Results should agree with check.txt.
+ if ! diff -q check.txt.sorted dtrace.out.sorted; then
+ echo ERROR output disagrees
+ echo === expected ===
+ cat check.txt.sorted
+ echo === got ===
+ cat dtrace.out.sorted
+ echo === diff ===
+ diff check.txt.sorted dtrace.out.sorted
+ exit 1
+ fi
+ fi
+done
+
+echo success
+
+exit 0
--
2.43.5
More information about the DTrace-devel
mailing list