[DTrace-devel] [PATCH v4 09/19] Use usdt_prids map to call clauses conditionally for USDT probes
Kris Van Hees
kris.van.hees at oracle.com
Thu Oct 24 14:21:55 UTC 2024
On Wed, Oct 16, 2024 at 12:01:38PM -0400, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
>
> This version supports only up to 64 clauses for an underlying
> probe, but it can be extended to more clauses.
>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
... with a very tiny pedantic comment and some comment about future work.
> ---
> libdtrace/dt_prov_uprobe.c | 166 ++++++++++++++++++-------
> test/unittest/usdt/tst.nusdtprobes.r | 5 +
> test/unittest/usdt/tst.nusdtprobes.sh | 172 ++++++++++++++++++++++++++
> 3 files changed, 300 insertions(+), 43 deletions(-)
> create mode 100644 test/unittest/usdt/tst.nusdtprobes.r
> create mode 100755 test/unittest/usdt/tst.nusdtprobes.sh
>
> diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
> index b010d8687..268708179 100644
> --- a/libdtrace/dt_prov_uprobe.c
> +++ b/libdtrace/dt_prov_uprobe.c
> @@ -739,11 +739,16 @@ static void enable_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
> */
> static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> {
> + dtrace_hdl_t *dtp = pcb->pcb_hdl;
> dt_irlist_t *dlp = &pcb->pcb_ir;
> const dt_probe_t *uprp = pcb->pcb_probe;
> const dt_uprobe_t *upp = uprp->prv_data;
> const list_probe_t *pop;
> uint_t lbl_exit = pcb->pcb_exitlbl;
> + dt_ident_t *usdt_prids = dt_dlib_get_map(dtp, "usdt_prids");
> + int n;
> +
> + assert(usdt_prids != NULL);
>
> dt_cg_tramp_prologue(pcb); // call this only once... is PRID set/relocated correctly?
>
> @@ -755,14 +760,17 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> dt_cg_tramp_copy_regs(pcb); // call this only once for all clauses?
>
> /*
> - * Loop over overlying probes, calling clauses for those that match:
> + * pid probes.
> + *
> + * Loop over overlying pid probes, calling clauses for those that match:
> *
> - * for overlying probes (that match except possibly for pid)
> + * for overlying pid probes (that match except possibly for pid)
> * if (pid matches) {
> * dctx->mst->prid = PRID1;
> * < any number of clause calls >
> * }
> */
> +
Pedantic - but no empty line is needed here.
> for (pop = dt_list_next(&upp->probes); pop != NULL;
> pop = dt_list_next(pop)) {
> const dt_probe_t *prp = pop->probe;
> @@ -770,6 +778,9 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> pid_t pid;
> dt_ident_t *idp;
>
> + if (prp->prov->impl != &dt_pid)
> + continue;
Stuff for future: put pid probes and USDT probes in separate lists so you
don't need to have a conditional like this.
> +
> pid = dt_pid_get_pid(prp->desc, pcb->pcb_hdl, pcb, NULL);
> assert(pid != -1);
>
> @@ -801,6 +812,93 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> BPF_NOP());
> }
>
> + /*
> + * USDT
> + */
> +
> + /* In some cases, we know there are no USDT probes. */ // FIXME: add more checks
> + if (upp->flags & PP_IS_RETURN)
> + goto out;
> +
> + dt_cg_tramp_copy_args_from_regs(pcb, 0);
> +
> + /*
> + * Retrieve the PID of the process that caused the probe to fire.
> + */
> + emit(dlp, BPF_CALL_HELPER(BPF_FUNC_get_current_pid_tgid));
> + emit(dlp, BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
> +
> + /*
> + * Look up in the BPF 'usdt_prids' map. Space for the look-up key
> + * will be used on the BPF stack:
> + *
> + * offset value
> + *
> + * -sizeof(usdt_prids_map_key_t) pid (in %r0)
> + *
> + * -sizeof(usdt_prids_map_key_t) + sizeof(pid_t)
> + * ==
> + * -sizeof(dtrace_id_t) underlying-probe prid
> + */
> + emit(dlp, BPF_STORE(BPF_W, BPF_REG_9, (int)(-sizeof(usdt_prids_map_key_t)), BPF_REG_0));
> + emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, (int)(-sizeof(dtrace_id_t)), uprp->desc->id));
> + dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
> + emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (int)(-sizeof(usdt_prids_map_key_t))));
> + emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
> +
> + /* Read the PRID from the table lookup and store to mst->prid. */
> + emit(dlp, BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
> + emit(dlp, BPF_STORE(BPF_W, BPF_REG_7, DMST_PRID, BPF_REG_1));
> +
> + /* Read the bit mask from the table lookup in %r6. */ // FIXME someday, extend this past 64 bits
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_0, offsetof(usdt_prids_map_val_t, mask)));
> +
> + /*
> + * Hold the bit mask in %r6 between clause calls.
> + */
> + for (n = 0; n < dtp->dt_stmt_nextid; n++) {
> + dtrace_stmtdesc_t *stp;
> + dt_ident_t *idp;
> + uint_t lbl_next = dt_irlist_label(dlp);
> +
> + stp = dtp->dt_stmts[n];
> + if (stp == NULL)
> + continue;
> +
> + idp = stp->dtsd_clause;
> +
> + /* If the lowest %r6 bit is 0, skip over this clause. */
> + emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_6));
> + emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 1));
> + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, lbl_next));
> +
> + /*
> + * if (*dctx.act != act) // ldw %r0, [%r9 + DCTX_ACT]
> + * goto exit; // ldw %r0, [%r0 + 0]
> + * // jne %r0, act, lbl_exit
> + */
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DCTX_ACT));
> + emit(dlp, BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_0, 0));
> + emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, DT_ACTIVITY_ACTIVE, lbl_exit));
> +
> + /* dctx.mst->scratch_top = 8 */
> + emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_SCRATCH_TOP, 8));
> +
> + /* Call clause. */
> + emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_9));
> + emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> +
> + /* Finished this clause. */
> + emitl(dlp, lbl_next,
> + BPF_NOP());
> +
> + /* Right-shift %r6. */
> + emit(dlp, BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 1));
> + }
> +
> +out:
> dt_cg_tramp_return(pcb);
>
> return 0;
> @@ -846,10 +944,9 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
> {
> dt_irlist_t *dlp = &pcb->pcb_ir;
> const dt_probe_t *uprp = pcb->pcb_probe;
> - const dt_uprobe_t *upp = uprp->prv_data;
> - const list_probe_t *pop;
> - uint_t lbl_assign = dt_irlist_label(dlp);
> - uint_t lbl_exit = pcb->pcb_exitlbl;
> + dt_ident_t *usdt_prids = dt_dlib_get_map(pcb->pcb_hdl, "usdt_prids");
> +
> + assert(usdt_prids != NULL);
>
> dt_cg_tramp_prologue(pcb);
>
> @@ -858,7 +955,6 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
> * // (%r7 = dctx->mst)
> * // (%r8 = dctx->ctx)
> */
> -
> dt_cg_tramp_copy_regs(pcb);
>
> /*
> @@ -876,46 +972,30 @@ static int trampoline_is_enabled(dt_pcb_t *pcb, uint_t exitlbl)
> emit(dlp, BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
>
> /*
> - * Generate a composite conditional clause, as above, except that rather
> - * than emitting call_clauses, we emit copyouts instead, using
> - * copyout_val() above:
> + * Look up in the BPF 'usdt_prids' map. Space for the look-up key
> + * will be used on the BPF stack:
> *
> - * if (pid == PID1) {
> - * goto assign;
> - * } else if (pid == PID2) {
> - * goto assign;
> - * } else if (pid == ...) {
> - * goto assign;
> - * }
> - * goto exit;
> - * assign:
> - * *arg0 = 1;
> - * goto exit;
> + * offset value
> *
> - * It is valid and safe to use %r0 to hold the pid value because there
> - * are no assignments to %r0 possible in between the conditional
> - * statements.
> + * -sizeof(usdt_prids_map_key_t) pid (in %r0)
> + *
> + * -sizeof(usdt_prids_map_key_t) + sizeof(pid_t)
> + * ==
> + * -sizeof(dtrace_id_t) underlying-probe prid
> */
> - for (pop = dt_list_next(&upp->probes); pop != NULL;
> - pop = dt_list_next(pop)) {
> - const dt_probe_t *prp = pop->probe;
> - pid_t pid;
> - dt_ident_t *idp;
> + emit(dlp, BPF_STORE(BPF_W, BPF_REG_9, (int)(-sizeof(usdt_prids_map_key_t)), BPF_REG_0));
> + emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, (int)(-sizeof(dtrace_id_t)), uprp->desc->id));
> + dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
> + emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (int)(-sizeof(usdt_prids_map_key_t))));
> + emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
>
> - pid = dt_pid_get_pid(prp->desc, pcb->pcb_hdl, pcb, NULL);
> - assert(pid != -1);
> -
> - idp = dt_dlib_add_probe_var(pcb->pcb_hdl, prp);
> - assert(idp != NULL);
> -
> - /*
> - * Check whether this pid-provider probe serves the current
> - * process, and copy out a 1 into arg 0 if so.
> - */
> - emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, pid, lbl_assign));
> - }
> - emit(dlp, BPF_JUMP(lbl_exit));
> - copyout_val(pcb, lbl_assign, 1, 0);
> + /*
> + * If we succeeded, then we use copyout_val() above to assign:
> + * *arg0 = 1;
> + */
> + copyout_val(pcb, DT_LBL_NONE, 1, 0);
>
> dt_cg_tramp_return(pcb);
>
> diff --git a/test/unittest/usdt/tst.nusdtprobes.r b/test/unittest/usdt/tst.nusdtprobes.r
> new file mode 100644
> index 000000000..d894af92e
> --- /dev/null
> +++ b/test/unittest/usdt/tst.nusdtprobes.r
> @@ -0,0 +1,5 @@
> +try ""
> +try "-xnusdtprobes=40"
> +try "-xnusdtprobes=39"
> +Files check.txt.sorted and dtrace.out.sorted differ
> +success
> diff --git a/test/unittest/usdt/tst.nusdtprobes.sh b/test/unittest/usdt/tst.nusdtprobes.sh
> new file mode 100755
> index 000000000..50f18a6ca
> --- /dev/null
> +++ b/test/unittest/usdt/tst.nusdtprobes.sh
> @@ -0,0 +1,172 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +# This test verifies the nusdtprobes option.
> +# @@timeout: 100
> +
> +dtrace=$1
> +
> +# Set up test directory.
> +
> +DIRNAME=$tmpdir/nusdtprobes.$$.$RANDOM
> +mkdir -p $DIRNAME
> +cd $DIRNAME
> +
> +# Make the trigger.
> +
> +cat << EOF > prov.d
> +provider testprov {
> + probe foo0();
> + probe foo1();
> + probe foo2();
> + probe foo3();
> + probe foo4();
> + probe foo5();
> + probe foo6();
> + probe foo7();
> + probe foo8();
> + probe foo9();
> +};
> +EOF
> +
> +cat << EOF > main.c
> +#include <unistd.h>
> +#include "prov.h"
> +
> +int
> +main(int argc, char **argv)
> +{
> + while (1) {
> + usleep(1000);
> +
> + TESTPROV_FOO0();
> + TESTPROV_FOO1();
> + TESTPROV_FOO2();
> + TESTPROV_FOO3();
> + TESTPROV_FOO4();
> + TESTPROV_FOO5();
> + TESTPROV_FOO6();
> + TESTPROV_FOO7();
> + TESTPROV_FOO8();
> + TESTPROV_FOO9();
> + }
> +
> + return 0;
> +}
> +EOF
> +
> +# Build the trigger.
> +
> +$dtrace -h -s prov.d
> +if [ $? -ne 0 ]; then
> + echo "failed to generate header file" >&2
> + exit 1
> +fi
> +cc $test_cppflags -c main.c
> +if [ $? -ne 0 ]; then
> + echo "failed to compile test" >&2
> + exit 1
> +fi
> +$dtrace -G -64 -s prov.d main.o
> +if [ $? -ne 0 ]; then
> + echo "failed to create DOF" >&2
> + exit 1
> +fi
> +cc $test_cppflags -o main main.o prov.o
> +if [ $? -ne 0 ]; then
> + echo "failed to link final executable" >&2
> + exit 1
> +fi
> +
> +# Test nusdtprobes settings.
> +#
> +# We will start teams of processes, each with 4 members, each in turn
> +# with 10 USDT probes. So, regardless of how many teams are run in
> +# succession, at any one time DTrace needs room for at least 40 USDT
> +# probes. The default and -xnusdtprobes=40 settings should work, but
> +# -xnusdtprobes=39 should not.
> +nteams=2
> +nmmbrs=4
> +
> +for nusdt in "" "-xnusdtprobes=40" "-xnusdtprobes=39"; do
> +
> + echo try '"'$nusdt'"'
> +
> + # Start DTrace.
> +
> + rm -f dtrace.out
> + $dtrace $dt_flags $nusdt -Zq -o dtrace.out -n '
> + testprov*:::
> + {
> + @[probeprov, probemod, probefunc, probename] = count();
> + }' &
> + dtpid=$!
> + sleep 2
> + if [[ ! -d /proc/$dtpid ]]; then
> + echo ERROR dtrace died
> + cat dtrace.out
> + exit 1
> + fi
> +
> + # Start teams of processes, only one team at a time.
> +
> + rm -f check.txt
> + for (( iteam = 0; iteam < $nteams; iteam++ )); do
> + # Start the team, writing out expected output.
> + sleep 2
> + for (( immbr = 0; immbr < $nmmbrs; immbr++ )); do
> + ./main &
> + pids[$immbr]=$!
> + for j in `seq 0 9`; do
> + echo testprov${pids[$immbr]} main main foo$j >> check.txt
> + done
> + done
> +
> + # Kill the team.
> + sleep 3
> + for (( immbr = 0; immbr < $nmmbrs; immbr++ )); do
> + kill ${pids[$immbr]}
> + done
> + done
> +
> + # Kill DTrace.
> +
> + kill $dtpid
> + wait
> +
> + # Strip the count() value out since we do not know its exact value.
> +
> + awk 'NF == 5 { print $1, $2, $3, $4 }' dtrace.out | sort > dtrace.out.sorted
> +
> + # Check.
> +
> + sort check.txt > check.txt.sorted
> + if [ x$nusdt == x"-xnusdtprobes=39" ]; then
> + # Results should not agree with check.txt.
> + if diff -q check.txt.sorted dtrace.out.sorted; then
> + echo ERROR unexpected agreement
> + cat dtrace.out
> + exit 0
> + fi
> + else
> + # Results should agree with check.txt.
> + if ! diff -q check.txt.sorted dtrace.out.sorted; then
> + echo ERROR output disagrees
> + echo === expected ===
> + cat check.txt.sorted
> + echo === got ===
> + cat dtrace.out.sorted
> + echo === diff ===
> + diff check.txt.sorted dtrace.out.sorted
> + exit 1
> + fi
> + fi
> +done
> +
> +echo success
> +
> +exit 0
> --
> 2.43.5
>
More information about the DTrace-devel
mailing list