[DTrace-devel] [PATCH v2] cg, printf: allow storing [u]stack() into vars and printf using %k

Eugene Loh eugene.loh at oracle.com
Mon Oct 6 03:33:51 UTC 2025


On 10/3/25 23:18, Kris Van Hees wrote:

> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 3ce13c74..75cd8b9e 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -2647,11 +2647,16 @@ dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
>   	dt_regset_free(drp, dnp->dn_reg);
>   }
>   
> -static uint64_t
> -dt_cg_stack_arg(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_actkind_t kind)
> +/*
> + * Extract [u]stack() argument data, storing number of frames and optional
> + * string data in 'nframes' and 'strsz', and returning the storage size for
> + * the call stack data (not including the size of the optional string data).
> + */
> +static uint_t

If this returns uint_t, then in dt_cg_agg() I would think that "size" 
should become uint as well.

> +dt_cg_stack_arg(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_actkind_t kind,
> +		uint_t *nframes, uint_t *strsz)
>   {
> -	int		nframes;
> -	int		strsize = 0;
> +	uint_t		val;

"val" is a very generic name...  not particularly descriptive.  I would 
think "nframes" would be much better.  And so the arguments being passed 
in would be better as "nframesp" and "strszp".

>   	dt_node_t	*arg0 = dnp->dn_args;
>   	dt_node_t	*arg1 = arg0 != NULL ? arg0->dn_list : NULL;
>   	int		indopt, def, inderr;
>   	if (arg0 != NULL) {
> @@ -2760,17 +2772,18 @@ dt_cg_act_stack_sub(dt_pcb_t *pcb, dt_node_t *dnp, int reg, int off,
>   		dt_regset_free_args(drp);
>   		/* mov32 %r0, %r0 effectively masks the lower 32 bits. */
>   		emit(dlp,  BPF_MOV32_REG(BPF_REG_0, BPF_REG_0));
> -		emit(dlp,  BPF_STORE(BPF_DW, reg, off, BPF_REG_0));
> +		emit(dlp,  BPF_STORE(BPF_W, reg, aloff + 12, BPF_REG_0));

It really seems to me that the MOV32_REG can be dropped.

I tried an experiment in which I preset the DW of some storage location, 
then had a register filled with other contents, then stored BPF_W.  I 
saw that only the lower W was stored and the upper W was untouched.

Plus, if we were concerned about stuff at aloff+16 being written to, 
it's about to get overwritten anyhow.

So, what is the rationale for keep the MOV32_REG?

>   		dt_regset_free(drp, BPF_REG_0);
> -	}
> +	} else
> +		emit(dlp,  BPF_STORE_IMM(BPF_W, reg, aloff + 12, 0));
>   
>   	/* Call bpf_get_stack(ctx, buf, size, flags). */
>   	if (dt_regset_xalloc_args(drp) == -1)
>   		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
>   	dt_cg_access_dctx(BPF_REG_1, dlp, drp, DCTX_CTX);
>   	emit(dlp,  BPF_MOV_REG(BPF_REG_2, reg));
> -	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, off + prefsz));
> -	emit(dlp,  BPF_MOV_IMM(BPF_REG_3, stacksize));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, aloff + 16));
> +	emit(dlp,  BPF_MOV_IMM(BPF_REG_3, nframes * sizeof(uint64_t)));
>   	if (kind == DTRACEACT_USTACK)
>   		emit(dlp,  BPF_MOV_IMM(BPF_REG_4, BPF_F_USER_STACK));
>   	else {
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index ba5463a0..bcaa4221 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -1058,16 +1058,33 @@ dt_vopen(int version, int flags, int *errp,
>   	    "<DYN>", ctf_lookup_by_name(dmp->dm_ctfp, "void"));
>   
>   	/*
> -	 * The stack type is added as a typedef of uint64_t[MAXFRAMES].  The
> -	 * final value of MAXFRAMES may be adjusted with the "stackframes"
> -	 * option.
> +	 * Define:
> +	 *   typedef struct dt_stack {
> +	 *   	uint32_t	frames;
> +	 *	uint32_t	strsz;		// optional string blob size
> +	 *	uint32_t	type;		// type of stack (kernel/user)

I guess this is now is_user.

> +	 *	uint32_t	pid;		// process id (or 0 for kernel)
> +	 *   	uint64_t	addrs[n];	// stack trace addresses
> +	 *   } dt_stack_t;
> +	 *
> +	 * It is done in two stages because we won't know the size of the addrs
> +	 * array until runtime options have been processed.  We add all members
> +	 * (except for addrs) here, and then append the addrs array in
> +	 * dtrace_init().
>   	 */
> -	ctr.ctr_contents = ctf_lookup_by_name(dmp->dm_ctfp, "uint64_t");
> -	ctr.ctr_index = ctf_lookup_by_name(dmp->dm_ctfp, "long");
> -	ctr.ctr_nelems = _dtrace_stackframes;
> -
> -	dtp->dt_type_stack = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT,
> -		"dt_stack", ctf_add_array(dmp->dm_ctfp, CTF_ADD_ROOT, &ctr));
> +	{
> +		ctf_id_t	stid, mbid;
> +
> +		stid = ctf_add_struct(dmp->dm_ctfp, CTF_ADD_ROOT, "dt_stack");
> +		dtp->dt_type_stack = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT,
> +						     "dt_stack_t", stid);
> +
> +		mbid = ctf_lookup_by_name(dmp->dm_ctfp, "uint32_t");
> +		ctf_add_member(dmp->dm_ctfp, stid, "depth", mbid);
> +		ctf_add_member(dmp->dm_ctfp, stid, "strsz", mbid);
> +		ctf_add_member(dmp->dm_ctfp, stid, "is_user", mbid);
> +		ctf_add_member(dmp->dm_ctfp, stid, "pid", mbid);
> +	}
>   
>   	dtp->dt_type_symaddr = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT,
>   	    "_symaddr", ctf_lookup_by_name(dmp->dm_ctfp, "void"));
> diff --git a/libdtrace/dt_printf.c b/libdtrace/dt_printf.c
> index a3e15397..36169ed3 100644
> --- a/libdtrace/dt_printf.c
> +++ b/libdtrace/dt_printf.c
> @@ -569,18 +570,41 @@ dt_print_stack_user(dtrace_hdl_t *dtp, FILE *fp, const char *format,
>   	return err;
>   }
>   
> -/*ARGSUSED*/
> +/*
> + * The data at vaddr is structured as follows:
> + *	uint32_t	depth
> + *	uint32_t	strsz
> + *	uint32_t	type

Again, now "is_user".

> + *	uint32_t	pid
> + *	uint64_t	addrs[depth & 0x7fffffff]
> + *
> + * The 'depth' member provides the maximum number of addresses in the stack
> + * trace.
> + * The 'strsz' member provides the size of the optional string blob that is
> + * appended to the stack trace data.
> + * The 'type' member identifies the stack trace as kernel space (0) or
> + * userspace (1).

"is_user"

> + * The 'pid' member provides the userspace process id for userspace stack
> + * traces and otherwise will be 0.
> + * The 'addrs' array provides the stack traces addresses.  If the stack trace
> + * is shorter than 'depth', remaining addresses will be 0 and can be ignored.
> + */
>   static int
>   pfprint_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
>   	      const dt_pfargd_t *pfd, const void *vaddr, size_t size,
>   	      uint64_t normal, uint64_t sig)
>   {
>   	int width;
> -	const dtrace_recdesc_t *rec = pfd->pfd_rec;
> -	caddr_t addr = (caddr_t)vaddr;
> -	uint32_t depth = DTRACE_STACK_NFRAMES(rec->dtrd_arg);
> +	uint32_t *vals = (uint32_t *)vaddr;
> +	uint32_t depth = vals[0];
> +	uint32_t strsz = vals[1];
> +	uint32_t is_user = vals[2];
> +	caddr_t addr = (caddr_t)(vals + (is_user ? 2 : 4));
>   	int err = 0;
>   
> +	if (depth > dtp->dt_options[DTRACEOPT_MAXFRAMES])
> +		return dt_set_errno(dtp, EDT_DSIZE);
> +
>   	if (depth == 0)
>   		return 0;
>   
> @@ -2487,9 +2511,8 @@ dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata,
>   	else
>   		format = ((dt_pfargv_t *)fmtdata)->pfv_format;
>   
> -	/* pfprint_stack() uses pfd_rec, pfd_flags, and pfd_width only */
> +	/* pfprint_stack() uses pfd_flags, and pfd_width only */

The comma can now be dropped.

>   	memset(&pfd, 0, sizeof(pfd));
> -	pfd.pfd_rec = recs;
>   	pfd.pfd_flags = DT_PFCONV_LEFT;
>   
>   	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
> diff --git a/test/unittest/funcs/stack/tst.asgn_dvar.r.p b/test/unittest/funcs/stack/tst.asgn_dvar.r.p
> new file mode 100755
> index 00000000..31491c78
> --- /dev/null
> +++ b/test/unittest/funcs/stack/tst.asgn_dvar.r.p
> @@ -0,0 +1,35 @@
> +#!/usr/bin/gawk -f
> +
> +/hrtimer_nanosleep/ {
> +    # check probe
> +    if ( $1 != "hrtimer_nanosleep:entry" ) {
> +        print "ERROR: expected fun:prb = hrtimer_nanosleep:entry";
> +        exit(0);
> +    }
> +
> +    # check stack(3)
> +    getline;
> +    if (index($1, "`hrtimer_nanosleep+0x") == 0 &&
> +        match($1, "`hrtimer_nanosleep") == 0) {

The match() should have a "$" at the end of nanosleep.  That is,

         match($1, "`hrtimer_nanosleep$")

Otherwise, there would be no point in the index() check.

> +        print "ERROR: expected leaf frame to be hrtimer_nanosleep";
> +        exit(0);
> +    }
> +    getline;
> +    if (NF == 0) {
> +        print "ERROR: missing second frame";
> +        exit(0);
> +    }
> +    getline;
> +    if (NF == 0) {
> +        print "ERROR: missing third frame";
> +        exit(0);
> +    }
> +    getline;
> +    if (NF > 0) {
> +        print "ERROR: expected stack(3) to have only three frames";
> +        exit(0);
> +    }
> +
> +    print "success";
> +    exit(0);
> +}
> diff --git a/test/unittest/funcs/stack/tst.store.d b/test/unittest/funcs/stack/tst.store.d
> new file mode 100644
> index 00000000..68577e27
> --- /dev/null
> +++ b/test/unittest/funcs/stack/tst.store.d
> @@ -0,0 +1,28 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: It is possible to store to members of dt_stack_t.
> + */
> +
> +/* @@trigger: periodic_output */
> +
> +fbt::hrtimer_nanosleep:entry
> +{
> +	v = stack(5);

I see a problem here with some older kernels.  The stack is often:

         vmlinux`hrtimer_nanosleep+0x5
         vmlinux`common_nsleep+0x40
         vmlinux`__x64_sys_clock_nanosleep+0xdf
         vmlinux`do_syscall_64+0x35
         vmlinux`entry_SYSCALL_64+0xa6

So it is only 5 frames deep and "stack(5)" doesn't clip anything. But 
that's not the problem.  The problem is that on some older kernels I get:

         vmlinux`hrtimer_nanosleep+0x1
         vmlinux`x64_sys_call+0x1367
         vmlinux`do_syscall_64+0x58
         vmlinux`entry_SYSCALL_64+0x1b3

There are only four frames.  So we get the error:

         ERROR: missing stack(5) frame addrs[4]

I think it makes most sense to drop the "5" to "3".

> +	printf("%k", v);
> +	v.depth = 2;
> +	printf("%k", v);
> +	v.depth = 5;
> +	printf("%k", v);
> +	exit(0);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/funcs/ustack/tst.asgn_dvar.r.p b/test/unittest/funcs/ustack/tst.asgn_dvar.r.p
> new file mode 100755
> index 00000000..3b69efa4
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.asgn_dvar.r.p
> @@ -0,0 +1,45 @@
> +#!/usr/bin/gawk -f
> +
> +/myfunc_z/ {
> +    # check probe
> +    if ( $1 != "myfunc_z:entry" ) {
> +        print "ERROR: expected fun:prb = myfunc_z:entry";
> +        exit(0);
> +    }
> +
> +    # check stack(3)
> +    getline;
> +    if (index($1, "`myfunc_z+0x") == 0 &&
> +        match($1, "`myfunc_z") == 0) {

Need a $ at the end of myfunc_z:  "`myfunc_z$".

> +        print "ERROR: expected leaf frame to be myfunc_z";
> +        exit(0);
> +    }
> +    getline;
> +    if (NF == 0) {
> +        print "ERROR: missing second frame";
> +        exit(0);
> +    }
> +    if (index($1, "`myfunc_x+0x") == 0 &&
> +        match($1, "`myfunc_x") == 0) {

Need a $ at the end of myfunc_x:  "`myfunc_x$".

> +        print "ERROR: expected leaf frame to be myfunc_x";

This fails on newer kernels.  The stack is usually
         myfunc_z
         myfunc_x
         myfunc_w
and that's what the check looks for.  One might reasonably ask, "What 
happened to myfunc_y?"  That answer is explained, e.g., in a comment 
block in test/unittest/ustack/tst.ustack25_pid.r.p .  So the 
ustack/asgn_*var tests (maybe just asgn_dvar.r.p) need the same magic to 
account for new x86 kernels.

> +        exit(0);
> +    }
> +    getline;
> +    if (NF == 0) {
> +        print "ERROR: missing third frame";
> +        exit(0);
> +    }
> +    if (index($1, "`myfunc_w+0x") == 0 &&
> +        match($1, "`myfunc_w") == 0) {

Need a $ at the end of myfunc_w:  "`myfunc_w$".

> +        print "ERROR: expected leaf frame to be myfunc_w";
> +        exit(0);
> +    }
> +    getline;
> +    if (NF > 0) {
> +        print "ERROR: expected stack(3) to have only three frames";
> +        exit(0);
> +    }
> +
> +    print "success";
> +    exit(0);
> +}
> diff --git a/test/unittest/funcs/ustack/tst.ref_pid.d b/test/unittest/funcs/ustack/tst.ref_pid.d
> new file mode 100644
> index 00000000..6ed62f12
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.ref_pid.d
> @@ -0,0 +1,37 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: Test accessing the 'pid' field of s userspace stack.

s/ s / a /

> + */
> +
> +/* @@trigger: ustack-tst-basic */
> +
> +pid$target:a.out:myfunc_z:entry
> +{
> +	trace("Expecting ");
> +	trace($target);
> +	trace(", got ");
> +	trace(ustack(7).pid);
> +}
> +
> +pid$target:a.out:myfunc_z:entry
> +/ustack(7).pid == $target/
> +{
> +	exit(0);
> +}
> +
> +pid$target:a.out:myfunc_z:entry
> +/ustack(7).pid != $target/
> +{
> +	exit(1);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/funcs/ustack/tst.ref_strsz.d b/test/unittest/funcs/ustack/tst.ref_strsz.d
> new file mode 100644
> index 00000000..b78a00c0
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.ref_strsz.d
> @@ -0,0 +1,35 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: Test accessing the 'strsz' field of s userspace stack.

s/ s / a /

> + */
> +
> +/* @@trigger: ustack-tst-basic */
> +
> +pid$target:a.out:myfunc_z:entry
> +{
> +	trace("Expecting 42, got ");
> +	trace(ustack(7, 42).strsz);
> +}
> +
> +pid$target:a.out:myfunc_z:entry
> +/ustack(7, 42).strsz == 42/
> +{
> +	exit(0);
> +}
> +
> +pid$target:a.out:myfunc_z:entry
> +/ustack(7, 42).strsz != 42/
> +{
> +	exit(1);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/funcs/ustack/tst.ref_strsz.r b/test/unittest/funcs/ustack/tst.ref_strsz.r
> new file mode 100644
> index 00000000..840f7e21
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.ref_strsz.r
> @@ -0,0 +1,6 @@
> +                   FUNCTION:NAME
> +                  myfunc_z:entry   Expecting 42, got                         42
> +                  myfunc_z:entry
> +
> +-- @@stderr --
> +dtrace: script 'test/unittest/funcs/ustack/tst.ref_strsz.d' matched 4 probes
> diff --git a/test/unittest/funcs/ustack/tst.store.d b/test/unittest/funcs/ustack/tst.store.d

Okay, but need .r and .r.p files???

> new file mode 100644
> index 00000000..6a9f7c94
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.store.d
> @@ -0,0 +1,25 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: It is possible to store to members of dt_stack_t.
> + */
> +
> +/* @@trigger: ustack-tst-basic */
> +
> +pid$target:a.out:myfunc_z:entry
> +{
> +	v = ustack(5);
> +	v.depth = 2;
> +	printf("%k", v);
> +	exit(0);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/printf/tst.ustack25_pid.r b/test/unittest/printf/tst.ustack25_pid.r
> new file mode 100644
> index 00000000..e7732fb8
> --- /dev/null
> +++ b/test/unittest/printf/tst.ustack25_pid.r
> @@ -0,0 +1,28 @@
> +
> +              ustack-tst-basic`myfunc_z
> +              ustack-tst-basic`myfunc_y+{ptr}
> +              ustack-tst-basic`myfunc_x+{ptr}
> +              ustack-tst-basic`myfunc_w+{ptr}
> +              ustack-tst-basic`myfunc_v+{ptr}
> +              ustack-tst-basic`myfunc_u+{ptr}
> +              ustack-tst-basic`myfunc_t+{ptr}
> +              ustack-tst-basic`myfunc_s+{ptr}
> +              ustack-tst-basic`myfunc_r+{ptr}
> +              ustack-tst-basic`myfunc_q+{ptr}
> +              ustack-tst-basic`myfunc_p+{ptr}
> +              ustack-tst-basic`myfunc_o+{ptr}
> +              ustack-tst-basic`myfunc_n+{ptr}
> +              ustack-tst-basic`myfunc_m+{ptr}
> +              ustack-tst-basic`myfunc_l+{ptr}
> +              ustack-tst-basic`myfunc_k+{ptr}
> +              ustack-tst-basic`myfunc_j+{ptr}
> +              ustack-tst-basic`myfunc_i+{ptr}
> +              ustack-tst-basic`myfunc_h+{ptr}
> +              ustack-tst-basic`myfunc_g+{ptr}
> +              ustack-tst-basic`myfunc_f+{ptr}
> +              ustack-tst-basic`myfunc_e+{ptr}
> +              ustack-tst-basic`myfunc_d+{ptr}
> +              ustack-tst-basic`myfunc_c+{ptr}
> +              ustack-tst-basic`myfunc_b+{ptr}
> +              ustack-tst-basic`myfunc_a+{ptr}

Same problem as in v1 of the patch:  mismatches.  Maybe get rid of the 
leading spaces.

> +
> diff --git a/test/unittest/printf/tst.ustack25_pid.r.p b/test/unittest/printf/tst.ustack25_pid.r.p
> new file mode 100755
> index 00000000..c0110e00
> --- /dev/null
> +++ b/test/unittest/printf/tst.ustack25_pid.r.p
> @@ -0,0 +1,37 @@
> +#!/bin/bash
> +
> +# A pid entry probe places a uprobe on the first instruction of a function.
> +# Unfortunately, this is so early in the function preamble that the function
> +# frame pointer has not yet been established and the actual caller of the
> +# traced function is missed.
> +#
> +# In Linux 6.11, x86-specific heuristics are introduced to fix this problem.
> +# See commit cfa7f3d
> +# ("perf,x86: avoid missing caller address in stack traces captured in uprobe")
> +# for both a description of the problem and an explanation of the heuristics.
> +#
> +# Add post processing to these test results to allow for both cases:
> +# caller frame is missing or not missing.
> +
> +missing_caller=1
> +if [ $(uname -m) == "x86_64" ]; then
> +        read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
> +
> +        if [ $MAJOR -ge 6 ]; then
> +                if [ $MAJOR -gt 6 -o $MINOR -ge 11 ]; then
> +                        missing_caller=0
> +                fi
> +        fi
> +fi
> +
> +if [ $missing_caller -eq 1 ]; then
> +        # Add the missing caller function after the current function.
> +        awk '{ print }
> +             /myfunc_z/ { print "              ustack-tst-basic`myfunc_y+{ptr}" }'

Get rid of the leading spaces?

> +else
> +        # The .r results file has an extra frame at the end in case
> +        # the caller frame is missing and the 25-frame limit goes
> +        # "too far."  If the caller is not missing, fake that extra frame.
> +        awk '{ print }
> +             /myfunc_b/ { print "              ustack-tst-basic`myfunc_a+{ptr}" }'

Get rid of the leading spaces?

> +fi



More information about the DTrace-devel mailing list