[DTrace-devel] [PATCH v2] cg, printf: allow storing [u]stack() into vars and printf using %k
Eugene Loh
eugene.loh at oracle.com
Mon Oct 6 03:33:51 UTC 2025
On 10/3/25 23:18, Kris Van Hees wrote:
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 3ce13c74..75cd8b9e 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -2647,11 +2647,16 @@ dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
> dt_regset_free(drp, dnp->dn_reg);
> }
>
> -static uint64_t
> -dt_cg_stack_arg(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_actkind_t kind)
> +/*
> + * Extract [u]stack() argument data, storing number of frames and optional
> + * string data in 'nframes' and 'strsz', and returning the storage size for
> + * the call stack data (not including the size of the optional string data).
> + */
> +static uint_t
If this returns uint_t, then in dt_cg_agg() I would think that "size"
should become uint as well.
> +dt_cg_stack_arg(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_actkind_t kind,
> + uint_t *nframes, uint_t *strsz)
> {
> - int nframes;
> - int strsize = 0;
> + uint_t val;
"val" is a very generic name... not particularly descriptive. I would
think "nframes" would be much better. And so the arguments being passed
in would be better as "nframesp" and "strszp".
> dt_node_t *arg0 = dnp->dn_args;
> dt_node_t *arg1 = arg0 != NULL ? arg0->dn_list : NULL;
> int indopt, def, inderr;
> if (arg0 != NULL) {
> @@ -2760,17 +2772,18 @@ dt_cg_act_stack_sub(dt_pcb_t *pcb, dt_node_t *dnp, int reg, int off,
> dt_regset_free_args(drp);
> /* mov32 %r0, %r0 effectively masks the lower 32 bits. */
> emit(dlp, BPF_MOV32_REG(BPF_REG_0, BPF_REG_0));
> - emit(dlp, BPF_STORE(BPF_DW, reg, off, BPF_REG_0));
> + emit(dlp, BPF_STORE(BPF_W, reg, aloff + 12, BPF_REG_0));
It really seems to me that the MOV32_REG can be dropped.
I tried an experiment in which I preset the DW of some storage location,
then had a register filled with other contents, then stored BPF_W. I
saw that only the lower W was stored and the upper W was untouched.
Plus, if we were concerned about stuff at aloff+16 being written to,
it's about to get overwritten anyhow.
So, what is the rationale for keep the MOV32_REG?
> dt_regset_free(drp, BPF_REG_0);
> - }
> + } else
> + emit(dlp, BPF_STORE_IMM(BPF_W, reg, aloff + 12, 0));
>
> /* Call bpf_get_stack(ctx, buf, size, flags). */
> if (dt_regset_xalloc_args(drp) == -1)
> longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> dt_cg_access_dctx(BPF_REG_1, dlp, drp, DCTX_CTX);
> emit(dlp, BPF_MOV_REG(BPF_REG_2, reg));
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, off + prefsz));
> - emit(dlp, BPF_MOV_IMM(BPF_REG_3, stacksize));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, aloff + 16));
> + emit(dlp, BPF_MOV_IMM(BPF_REG_3, nframes * sizeof(uint64_t)));
> if (kind == DTRACEACT_USTACK)
> emit(dlp, BPF_MOV_IMM(BPF_REG_4, BPF_F_USER_STACK));
> else {
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index ba5463a0..bcaa4221 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -1058,16 +1058,33 @@ dt_vopen(int version, int flags, int *errp,
> "<DYN>", ctf_lookup_by_name(dmp->dm_ctfp, "void"));
>
> /*
> - * The stack type is added as a typedef of uint64_t[MAXFRAMES]. The
> - * final value of MAXFRAMES may be adjusted with the "stackframes"
> - * option.
> + * Define:
> + * typedef struct dt_stack {
> + * uint32_t frames;
> + * uint32_t strsz; // optional string blob size
> + * uint32_t type; // type of stack (kernel/user)
I guess this is now is_user.
> + * uint32_t pid; // process id (or 0 for kernel)
> + * uint64_t addrs[n]; // stack trace addresses
> + * } dt_stack_t;
> + *
> + * It is done in two stages because we won't know the size of the addrs
> + * array until runtime options have been processed. We add all members
> + * (except for addrs) here, and then append the addrs array in
> + * dtrace_init().
> */
> - ctr.ctr_contents = ctf_lookup_by_name(dmp->dm_ctfp, "uint64_t");
> - ctr.ctr_index = ctf_lookup_by_name(dmp->dm_ctfp, "long");
> - ctr.ctr_nelems = _dtrace_stackframes;
> -
> - dtp->dt_type_stack = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT,
> - "dt_stack", ctf_add_array(dmp->dm_ctfp, CTF_ADD_ROOT, &ctr));
> + {
> + ctf_id_t stid, mbid;
> +
> + stid = ctf_add_struct(dmp->dm_ctfp, CTF_ADD_ROOT, "dt_stack");
> + dtp->dt_type_stack = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT,
> + "dt_stack_t", stid);
> +
> + mbid = ctf_lookup_by_name(dmp->dm_ctfp, "uint32_t");
> + ctf_add_member(dmp->dm_ctfp, stid, "depth", mbid);
> + ctf_add_member(dmp->dm_ctfp, stid, "strsz", mbid);
> + ctf_add_member(dmp->dm_ctfp, stid, "is_user", mbid);
> + ctf_add_member(dmp->dm_ctfp, stid, "pid", mbid);
> + }
>
> dtp->dt_type_symaddr = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT,
> "_symaddr", ctf_lookup_by_name(dmp->dm_ctfp, "void"));
> diff --git a/libdtrace/dt_printf.c b/libdtrace/dt_printf.c
> index a3e15397..36169ed3 100644
> --- a/libdtrace/dt_printf.c
> +++ b/libdtrace/dt_printf.c
> @@ -569,18 +570,41 @@ dt_print_stack_user(dtrace_hdl_t *dtp, FILE *fp, const char *format,
> return err;
> }
>
> -/*ARGSUSED*/
> +/*
> + * The data at vaddr is structured as follows:
> + * uint32_t depth
> + * uint32_t strsz
> + * uint32_t type
Again, now "is_user".
> + * uint32_t pid
> + * uint64_t addrs[depth & 0x7fffffff]
> + *
> + * The 'depth' member provides the maximum number of addresses in the stack
> + * trace.
> + * The 'strsz' member provides the size of the optional string blob that is
> + * appended to the stack trace data.
> + * The 'type' member identifies the stack trace as kernel space (0) or
> + * userspace (1).
"is_user"
> + * The 'pid' member provides the userspace process id for userspace stack
> + * traces and otherwise will be 0.
> + * The 'addrs' array provides the stack traces addresses. If the stack trace
> + * is shorter than 'depth', remaining addresses will be 0 and can be ignored.
> + */
> static int
> pfprint_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
> const dt_pfargd_t *pfd, const void *vaddr, size_t size,
> uint64_t normal, uint64_t sig)
> {
> int width;
> - const dtrace_recdesc_t *rec = pfd->pfd_rec;
> - caddr_t addr = (caddr_t)vaddr;
> - uint32_t depth = DTRACE_STACK_NFRAMES(rec->dtrd_arg);
> + uint32_t *vals = (uint32_t *)vaddr;
> + uint32_t depth = vals[0];
> + uint32_t strsz = vals[1];
> + uint32_t is_user = vals[2];
> + caddr_t addr = (caddr_t)(vals + (is_user ? 2 : 4));
> int err = 0;
>
> + if (depth > dtp->dt_options[DTRACEOPT_MAXFRAMES])
> + return dt_set_errno(dtp, EDT_DSIZE);
> +
> if (depth == 0)
> return 0;
>
> @@ -2487,9 +2511,8 @@ dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata,
> else
> format = ((dt_pfargv_t *)fmtdata)->pfv_format;
>
> - /* pfprint_stack() uses pfd_rec, pfd_flags, and pfd_width only */
> + /* pfprint_stack() uses pfd_flags, and pfd_width only */
The comma can now be dropped.
> memset(&pfd, 0, sizeof(pfd));
> - pfd.pfd_rec = recs;
> pfd.pfd_flags = DT_PFCONV_LEFT;
>
> if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
> diff --git a/test/unittest/funcs/stack/tst.asgn_dvar.r.p b/test/unittest/funcs/stack/tst.asgn_dvar.r.p
> new file mode 100755
> index 00000000..31491c78
> --- /dev/null
> +++ b/test/unittest/funcs/stack/tst.asgn_dvar.r.p
> @@ -0,0 +1,35 @@
> +#!/usr/bin/gawk -f
> +
> +/hrtimer_nanosleep/ {
> + # check probe
> + if ( $1 != "hrtimer_nanosleep:entry" ) {
> + print "ERROR: expected fun:prb = hrtimer_nanosleep:entry";
> + exit(0);
> + }
> +
> + # check stack(3)
> + getline;
> + if (index($1, "`hrtimer_nanosleep+0x") == 0 &&
> + match($1, "`hrtimer_nanosleep") == 0) {
The match() should have a "$" at the end of nanosleep. That is,
match($1, "`hrtimer_nanosleep$")
Otherwise, there would be no point in the index() check.
> + print "ERROR: expected leaf frame to be hrtimer_nanosleep";
> + exit(0);
> + }
> + getline;
> + if (NF == 0) {
> + print "ERROR: missing second frame";
> + exit(0);
> + }
> + getline;
> + if (NF == 0) {
> + print "ERROR: missing third frame";
> + exit(0);
> + }
> + getline;
> + if (NF > 0) {
> + print "ERROR: expected stack(3) to have only three frames";
> + exit(0);
> + }
> +
> + print "success";
> + exit(0);
> +}
> diff --git a/test/unittest/funcs/stack/tst.store.d b/test/unittest/funcs/stack/tst.store.d
> new file mode 100644
> index 00000000..68577e27
> --- /dev/null
> +++ b/test/unittest/funcs/stack/tst.store.d
> @@ -0,0 +1,28 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: It is possible to store to members of dt_stack_t.
> + */
> +
> +/* @@trigger: periodic_output */
> +
> +fbt::hrtimer_nanosleep:entry
> +{
> + v = stack(5);
I see a problem here with some older kernels. The stack is often:
vmlinux`hrtimer_nanosleep+0x5
vmlinux`common_nsleep+0x40
vmlinux`__x64_sys_clock_nanosleep+0xdf
vmlinux`do_syscall_64+0x35
vmlinux`entry_SYSCALL_64+0xa6
So it is only 5 frames deep and "stack(5)" doesn't clip anything. But
that's not the problem. The problem is that on some older kernels I get:
vmlinux`hrtimer_nanosleep+0x1
vmlinux`x64_sys_call+0x1367
vmlinux`do_syscall_64+0x58
vmlinux`entry_SYSCALL_64+0x1b3
There are only four frames. So we get the error:
ERROR: missing stack(5) frame addrs[4]
I think it makes most sense to drop the "5" to "3".
> + printf("%k", v);
> + v.depth = 2;
> + printf("%k", v);
> + v.depth = 5;
> + printf("%k", v);
> + exit(0);
> +}
> +
> +ERROR
> +{
> + exit(1);
> +}
> diff --git a/test/unittest/funcs/ustack/tst.asgn_dvar.r.p b/test/unittest/funcs/ustack/tst.asgn_dvar.r.p
> new file mode 100755
> index 00000000..3b69efa4
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.asgn_dvar.r.p
> @@ -0,0 +1,45 @@
> +#!/usr/bin/gawk -f
> +
> +/myfunc_z/ {
> + # check probe
> + if ( $1 != "myfunc_z:entry" ) {
> + print "ERROR: expected fun:prb = myfunc_z:entry";
> + exit(0);
> + }
> +
> + # check stack(3)
> + getline;
> + if (index($1, "`myfunc_z+0x") == 0 &&
> + match($1, "`myfunc_z") == 0) {
Need a $ at the end of myfunc_z: "`myfunc_z$".
> + print "ERROR: expected leaf frame to be myfunc_z";
> + exit(0);
> + }
> + getline;
> + if (NF == 0) {
> + print "ERROR: missing second frame";
> + exit(0);
> + }
> + if (index($1, "`myfunc_x+0x") == 0 &&
> + match($1, "`myfunc_x") == 0) {
Need a $ at the end of myfunc_x: "`myfunc_x$".
> + print "ERROR: expected leaf frame to be myfunc_x";
This fails on newer kernels. The stack is usually
myfunc_z
myfunc_x
myfunc_w
and that's what the check looks for. One might reasonably ask, "What
happened to myfunc_y?" That answer is explained, e.g., in a comment
block in test/unittest/ustack/tst.ustack25_pid.r.p . So the
ustack/asgn_*var tests (maybe just asgn_dvar.r.p) need the same magic to
account for new x86 kernels.
> + exit(0);
> + }
> + getline;
> + if (NF == 0) {
> + print "ERROR: missing third frame";
> + exit(0);
> + }
> + if (index($1, "`myfunc_w+0x") == 0 &&
> + match($1, "`myfunc_w") == 0) {
Need a $ at the end of myfunc_w: "`myfunc_w$".
> + print "ERROR: expected leaf frame to be myfunc_w";
> + exit(0);
> + }
> + getline;
> + if (NF > 0) {
> + print "ERROR: expected stack(3) to have only three frames";
> + exit(0);
> + }
> +
> + print "success";
> + exit(0);
> +}
> diff --git a/test/unittest/funcs/ustack/tst.ref_pid.d b/test/unittest/funcs/ustack/tst.ref_pid.d
> new file mode 100644
> index 00000000..6ed62f12
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.ref_pid.d
> @@ -0,0 +1,37 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: Test accessing the 'pid' field of s userspace stack.
s/ s / a /
> + */
> +
> +/* @@trigger: ustack-tst-basic */
> +
> +pid$target:a.out:myfunc_z:entry
> +{
> + trace("Expecting ");
> + trace($target);
> + trace(", got ");
> + trace(ustack(7).pid);
> +}
> +
> +pid$target:a.out:myfunc_z:entry
> +/ustack(7).pid == $target/
> +{
> + exit(0);
> +}
> +
> +pid$target:a.out:myfunc_z:entry
> +/ustack(7).pid != $target/
> +{
> + exit(1);
> +}
> +
> +ERROR
> +{
> + exit(1);
> +}
> diff --git a/test/unittest/funcs/ustack/tst.ref_strsz.d b/test/unittest/funcs/ustack/tst.ref_strsz.d
> new file mode 100644
> index 00000000..b78a00c0
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.ref_strsz.d
> @@ -0,0 +1,35 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: Test accessing the 'strsz' field of s userspace stack.
s/ s / a /
> + */
> +
> +/* @@trigger: ustack-tst-basic */
> +
> +pid$target:a.out:myfunc_z:entry
> +{
> + trace("Expecting 42, got ");
> + trace(ustack(7, 42).strsz);
> +}
> +
> +pid$target:a.out:myfunc_z:entry
> +/ustack(7, 42).strsz == 42/
> +{
> + exit(0);
> +}
> +
> +pid$target:a.out:myfunc_z:entry
> +/ustack(7, 42).strsz != 42/
> +{
> + exit(1);
> +}
> +
> +ERROR
> +{
> + exit(1);
> +}
> diff --git a/test/unittest/funcs/ustack/tst.ref_strsz.r b/test/unittest/funcs/ustack/tst.ref_strsz.r
> new file mode 100644
> index 00000000..840f7e21
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.ref_strsz.r
> @@ -0,0 +1,6 @@
> + FUNCTION:NAME
> + myfunc_z:entry Expecting 42, got 42
> + myfunc_z:entry
> +
> +-- @@stderr --
> +dtrace: script 'test/unittest/funcs/ustack/tst.ref_strsz.d' matched 4 probes
> diff --git a/test/unittest/funcs/ustack/tst.store.d b/test/unittest/funcs/ustack/tst.store.d
Okay, but need .r and .r.p files???
> new file mode 100644
> index 00000000..6a9f7c94
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.store.d
> @@ -0,0 +1,25 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: It is possible to store to members of dt_stack_t.
> + */
> +
> +/* @@trigger: ustack-tst-basic */
> +
> +pid$target:a.out:myfunc_z:entry
> +{
> + v = ustack(5);
> + v.depth = 2;
> + printf("%k", v);
> + exit(0);
> +}
> +
> +ERROR
> +{
> + exit(1);
> +}
> diff --git a/test/unittest/printf/tst.ustack25_pid.r b/test/unittest/printf/tst.ustack25_pid.r
> new file mode 100644
> index 00000000..e7732fb8
> --- /dev/null
> +++ b/test/unittest/printf/tst.ustack25_pid.r
> @@ -0,0 +1,28 @@
> +
> + ustack-tst-basic`myfunc_z
> + ustack-tst-basic`myfunc_y+{ptr}
> + ustack-tst-basic`myfunc_x+{ptr}
> + ustack-tst-basic`myfunc_w+{ptr}
> + ustack-tst-basic`myfunc_v+{ptr}
> + ustack-tst-basic`myfunc_u+{ptr}
> + ustack-tst-basic`myfunc_t+{ptr}
> + ustack-tst-basic`myfunc_s+{ptr}
> + ustack-tst-basic`myfunc_r+{ptr}
> + ustack-tst-basic`myfunc_q+{ptr}
> + ustack-tst-basic`myfunc_p+{ptr}
> + ustack-tst-basic`myfunc_o+{ptr}
> + ustack-tst-basic`myfunc_n+{ptr}
> + ustack-tst-basic`myfunc_m+{ptr}
> + ustack-tst-basic`myfunc_l+{ptr}
> + ustack-tst-basic`myfunc_k+{ptr}
> + ustack-tst-basic`myfunc_j+{ptr}
> + ustack-tst-basic`myfunc_i+{ptr}
> + ustack-tst-basic`myfunc_h+{ptr}
> + ustack-tst-basic`myfunc_g+{ptr}
> + ustack-tst-basic`myfunc_f+{ptr}
> + ustack-tst-basic`myfunc_e+{ptr}
> + ustack-tst-basic`myfunc_d+{ptr}
> + ustack-tst-basic`myfunc_c+{ptr}
> + ustack-tst-basic`myfunc_b+{ptr}
> + ustack-tst-basic`myfunc_a+{ptr}
Same problem as in v1 of the patch: mismatches. Maybe get rid of the
leading spaces.
> +
> diff --git a/test/unittest/printf/tst.ustack25_pid.r.p b/test/unittest/printf/tst.ustack25_pid.r.p
> new file mode 100755
> index 00000000..c0110e00
> --- /dev/null
> +++ b/test/unittest/printf/tst.ustack25_pid.r.p
> @@ -0,0 +1,37 @@
> +#!/bin/bash
> +
> +# A pid entry probe places a uprobe on the first instruction of a function.
> +# Unfortunately, this is so early in the function preamble that the function
> +# frame pointer has not yet been established and the actual caller of the
> +# traced function is missed.
> +#
> +# In Linux 6.11, x86-specific heuristics are introduced to fix this problem.
> +# See commit cfa7f3d
> +# ("perf,x86: avoid missing caller address in stack traces captured in uprobe")
> +# for both a description of the problem and an explanation of the heuristics.
> +#
> +# Add post processing to these test results to allow for both cases:
> +# caller frame is missing or not missing.
> +
> +missing_caller=1
> +if [ $(uname -m) == "x86_64" ]; then
> + read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
> +
> + if [ $MAJOR -ge 6 ]; then
> + if [ $MAJOR -gt 6 -o $MINOR -ge 11 ]; then
> + missing_caller=0
> + fi
> + fi
> +fi
> +
> +if [ $missing_caller -eq 1 ]; then
> + # Add the missing caller function after the current function.
> + awk '{ print }
> + /myfunc_z/ { print " ustack-tst-basic`myfunc_y+{ptr}" }'
Get rid of the leading spaces?
> +else
> + # The .r results file has an extra frame at the end in case
> + # the caller frame is missing and the 25-frame limit goes
> + # "too far." If the caller is not missing, fake that extra frame.
> + awk '{ print }
> + /myfunc_b/ { print " ustack-tst-basic`myfunc_a+{ptr}" }'
Get rid of the leading spaces?
> +fi
More information about the DTrace-devel
mailing list