[DTrace-devel] [PATCH v3] cg, printf: allow storing [u]stack() into vars and printf using %k

Tue Oct 7 03:05:26 UTC 2025

On 10/6/25 15:55, Kris Van Hees wrote:

> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> @@ -2647,11 +2647,16 @@ dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
>   	dt_regset_free(drp, dnp->dn_reg);
>   }
>   
> -static uint64_t
> -dt_cg_stack_arg(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_actkind_t kind)
> +/*
> + * Extract [u]stack() argument data, storing number of frames and optional
> + * string data in 'nframes' and 'strsz', and returning the storage size for

Or, in '*nframesp' and '*strszp'.

> + * the call stack data (not including the size of the optional string data).
> + */
> +static uint_t

Again, I would think that "size" should become uint  in dt_cg_agg() as well.

> +dt_cg_stack_arg(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_actkind_t kind,
> +		uint_t *nframesp, uint_t *strszp)
>   {
> -	int		nframes;
> -	int		strsize = 0;
> +	uint_t		nframes;
>   	dt_node_t	*arg0 = dnp->dn_args;
>   	dt_node_t	*arg1 = arg0 != NULL ? arg0->dn_list : NULL;
>   	int		indopt, def, inderr;
> @@ -2760,17 +2772,18 @@ dt_cg_act_stack_sub(dt_pcb_t *pcb, dt_node_t *dnp, int reg, int off,
>   		dt_regset_free_args(drp);
>   		/* mov32 %r0, %r0 effectively masks the lower 32 bits. */
>   		emit(dlp,  BPF_MOV32_REG(BPF_REG_0, BPF_REG_0));

The unnecessary MOV32 instruction is okay, but I'm curious what your 
thoughts are...  why you're in favor of keeping it.

> -		emit(dlp,  BPF_STORE(BPF_DW, reg, off, BPF_REG_0));
> +		emit(dlp,  BPF_STORE(BPF_W, reg, aloff + 12, BPF_REG_0));
>   		dt_regset_free(drp, BPF_REG_0);
> -	}
> +	} else
> +		emit(dlp,  BPF_STORE_IMM(BPF_W, reg, aloff + 12, 0));
>   
>   	/* Call bpf_get_stack(ctx, buf, size, flags). */
>   	if (dt_regset_xalloc_args(drp) == -1)
>   		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
>   	dt_cg_access_dctx(BPF_REG_1, dlp, drp, DCTX_CTX);
>   	emit(dlp,  BPF_MOV_REG(BPF_REG_2, reg));
> -	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, off + prefsz));
> -	emit(dlp,  BPF_MOV_IMM(BPF_REG_3, stacksize));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, aloff + 16));
> +	emit(dlp,  BPF_MOV_IMM(BPF_REG_3, nframes * sizeof(uint64_t)));
>   	if (kind == DTRACEACT_USTACK)
>   		emit(dlp,  BPF_MOV_IMM(BPF_REG_4, BPF_F_USER_STACK));
>   	else {
> diff --git a/libdtrace/dt_printf.c b/libdtrace/dt_printf.c
> @@ -569,18 +570,41 @@ dt_print_stack_user(dtrace_hdl_t *dtp, FILE *fp, const char *format,
>   	return err;
>   }
>   
> -/*ARGSUSED*/
> +/*
> + * The data at vaddr is structured as follows:
> + *	uint32_t	depth
> + *	uint32_t	strsz
> + *	uint32_t	is_user
> + *	uint32_t	pid
> + *	uint64_t	addrs[depth & 0x7fffffff]
> + *
> + * The 'depth' member provides the maximum number of addresses in the stack
> + * trace.
> + * The 'strsz' member provides the size of the optional string blob that is
> + * appended to the stack trace data.
> + * The 'is_user' member identifies the stack trace as kernel space (0) or
> + * userspace (1).

Or, instead of "1" it's ">0"?

> + * The 'pid' member provides the userspace process id for userspace stack
> + * traces and otherwise will be 0.
> + * The 'addrs' array provides the stack traces addresses.  If the stack trace
> + * is shorter than 'depth', remaining addresses will be 0 and can be ignored.
> + */
>   static int
>   pfprint_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
>   	      const dt_pfargd_t *pfd, const void *vaddr, size_t size,
>   	      uint64_t normal, uint64_t sig)
>   {
>   	int width;
> -	const dtrace_recdesc_t *rec = pfd->pfd_rec;
> -	caddr_t addr = (caddr_t)vaddr;
> -	uint32_t depth = DTRACE_STACK_NFRAMES(rec->dtrd_arg);
> +	uint32_t *vals = (uint32_t *)vaddr;
> +	uint32_t depth = vals[0];
> +	uint32_t strsz = vals[1];
> +	uint32_t is_user = vals[2];
> +	caddr_t addr = (caddr_t)(vals + (is_user ? 2 : 4));
>   	int err = 0;
>   
> +	if (depth > dtp->dt_options[DTRACEOPT_MAXFRAMES])
> +		return dt_set_errno(dtp, EDT_DSIZE);
> +
>   	if (depth == 0)
>   		return 0;
>   
> diff --git a/test/unittest/funcs/ustack/tst.asgn_dvar.r.p b/test/unittest/funcs/ustack/tst.asgn_dvar.r.p
> new file mode 100755
> index 00000000..9999094f
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.asgn_dvar.r.p
> @@ -0,0 +1,59 @@
> +#!/usr/bin/gawk -f
> +
> +BEGIN {
> +    cmd = "uname -rm";
> +    cmd | getline;
> +    close(cmd);
> +
> +    if (/x86_64/) {
> +	gsub(/\./, " ");
> +	maj = int($1);
> +	min = int($2);
> +	if (maj < 6 || (maj == 6 && min < 11))
> +	    missing_frame = 1;
> +    }
> +}

Cool.  Nice solution to that problem.  Unfortunately, it fails for me on 
all ARM.  I think the problem is that the "missing frame" problem is on 
all ARM:  even newer kernels don't fix it (just on x86).  So, for arm, 
missing_frame should be 1.

> +/myfunc_z/ {
> +    # check probe
> +    if ( $1 != "myfunc_z:entry" ) {
> +        print "ERROR: expected fun:prb = myfunc_z:entry";
> +        exit(0);
> +    }
> +
> +    # check stack(3)
> +    getline;
> +    if (index($1, "`myfunc_z+0x") == 0 &&
> +        match($1, "`myfunc_z$") == 0) {
> +        print "ERROR: expected leaf frame to be myfunc_z";
> +        exit(0);
> +    }
> +    getline;
> +    if (NF == 0) {
> +        print "ERROR: missing second frame";
> +        exit(0);
> +    }
> +    if (index($1, missing_frame ? "`myfunc_x+0x" : "`myfunc_y+0x") == 0 &&
> +        match($1, missing_frame ? "`myfunc_x$" : "`myfunc_y$") == 0) {
> +        printf("ERROR: expected leaf frame to be %s\n", missing_frame ? "myfunc_x" : "myfunc_y");
> +        exit(0);
> +    }
> +    getline;
> +    if (NF == 0) {
> +        print "ERROR: missing third frame";
> +        exit(0);
> +    }
> +    if (index($1, missing_frame ? "`myfunc_w+0x" : "`myfunc_x+0x") == 0 &&
> +        match($1, missing_frame ? "`myfunc_w$" : "`myfunc_x$") == 0) {
> +        printf("ERROR: expected leaf frame to be %s\n", missing_frame ? "myfunc_w" : "myfunc_x");
> +        exit(0);
> +    }
> +    getline;
> +    if (NF > 0) {
> +        print "ERROR: expected stack(3) to have only three frames";
> +        exit(0);
> +    }
> +
> +    print "success";
> +    exit(0);
> +}
> diff --git a/test/unittest/funcs/ustack/tst.store.d b/test/unittest/funcs/ustack/tst.store.d

Okay.  Just curious again, though.  Is the omission of .r and .r.p files 
for this test intentional, given that the stack version of this test 
does more checking.

> new file mode 100644
> index 00000000..6a9f7c94
> --- /dev/null
> +++ b/test/unittest/funcs/ustack/tst.store.d
> @@ -0,0 +1,25 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: It is possible to store to members of dt_stack_t.
> + */
> +
> +/* @@trigger: ustack-tst-basic */
> +
> +pid$target:a.out:myfunc_z:entry
> +{
> +	v = ustack(5);
> +	v.depth = 2;
> +	printf("%k", v);
> +	exit(0);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/printf/tst.stack.d b/test/unittest/printf/tst.stack.d

Looks fine.  Other stack tests seem to be migrating to hrtimer_nanosleep 
-- and dumping the destructive BEGIN{system()} stuff -- but that does 
not seem necessary here.

> new file mode 100644
> index 00000000..db943c29
> --- /dev/null
> +++ b/test/unittest/printf/tst.stack.d
> @@ -0,0 +1,33 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: Test printf with %k and a stack argument.
> + *
> + * SECTION: Output Formatting/printf()
> + */
> +
> +#pragma D option destructive
> +
> +BEGIN
> +{
> +	system("echo write something > /dev/null");
> +}
> +
> +fbt::ksys_write:entry
> +{
> +	printf("%k", stack(1));
> +	printf("%k", stack(2));
> +	printf("%k", stack(3));
> +	printf("%k", stack());
> +	exit(0);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/printf/tst.ustack25_pid.d b/test/unittest/printf/tst.ustack25_pid.d
> new file mode 100644
> index 00000000..9ae3a7b8
> --- /dev/null
> +++ b/test/unittest/printf/tst.ustack25_pid.d
> @@ -0,0 +1,21 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/* @@trigger: ustack-tst-basic */
> +
> +#pragma D option quiet
> +
> +pid$target:a.out:myfunc_z:entry
> +{
> +	printf("%k", ustack(25));
> +	exit(0);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/printf/tst.ustack25_pid.r b/test/unittest/printf/tst.ustack25_pid.r

I still don't get this one.  Just as in v1 and v2, I get consistent 
FAILs here.  Does this pass for you?  So far as I can tell, there are 
issues with leading whitespace.  How about trimming the leading 
whitespace in the .r file?  And...

> new file mode 100644
> index 00000000..e7732fb8
> --- /dev/null
> +++ b/test/unittest/printf/tst.ustack25_pid.r
> @@ -0,0 +1,28 @@
> +
> +              ustack-tst-basic`myfunc_z
> +              ustack-tst-basic`myfunc_y+{ptr}
> +              ustack-tst-basic`myfunc_x+{ptr}
> +              ustack-tst-basic`myfunc_w+{ptr}
> +              ustack-tst-basic`myfunc_v+{ptr}
> +              ustack-tst-basic`myfunc_u+{ptr}
> +              ustack-tst-basic`myfunc_t+{ptr}
> +              ustack-tst-basic`myfunc_s+{ptr}
> +              ustack-tst-basic`myfunc_r+{ptr}
> +              ustack-tst-basic`myfunc_q+{ptr}
> +              ustack-tst-basic`myfunc_p+{ptr}
> +              ustack-tst-basic`myfunc_o+{ptr}
> +              ustack-tst-basic`myfunc_n+{ptr}
> +              ustack-tst-basic`myfunc_m+{ptr}
> +              ustack-tst-basic`myfunc_l+{ptr}
> +              ustack-tst-basic`myfunc_k+{ptr}
> +              ustack-tst-basic`myfunc_j+{ptr}
> +              ustack-tst-basic`myfunc_i+{ptr}
> +              ustack-tst-basic`myfunc_h+{ptr}
> +              ustack-tst-basic`myfunc_g+{ptr}
> +              ustack-tst-basic`myfunc_f+{ptr}
> +              ustack-tst-basic`myfunc_e+{ptr}
> +              ustack-tst-basic`myfunc_d+{ptr}
> +              ustack-tst-basic`myfunc_c+{ptr}
> +              ustack-tst-basic`myfunc_b+{ptr}
> +              ustack-tst-basic`myfunc_a+{ptr}
> +
> diff --git a/test/unittest/printf/tst.ustack25_pid.r.p b/test/unittest/printf/tst.ustack25_pid.r.p
> new file mode 100755
> index 00000000..c0110e00
> --- /dev/null
> +++ b/test/unittest/printf/tst.ustack25_pid.r.p
> @@ -0,0 +1,37 @@
> +#!/bin/bash
> +
> +# A pid entry probe places a uprobe on the first instruction of a function.
> +# Unfortunately, this is so early in the function preamble that the function
> +# frame pointer has not yet been established and the actual caller of the
> +# traced function is missed.
> +#
> +# In Linux 6.11, x86-specific heuristics are introduced to fix this problem.
> +# See commit cfa7f3d
> +# ("perf,x86: avoid missing caller address in stack traces captured in uprobe")
> +# for both a description of the problem and an explanation of the heuristics.
> +#
> +# Add post processing to these test results to allow for both cases:
> +# caller frame is missing or not missing.
> +
> +missing_caller=1
> +if [ $(uname -m) == "x86_64" ]; then
> +        read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
> +
> +        if [ $MAJOR -ge 6 ]; then
> +                if [ $MAJOR -gt 6 -o $MINOR -ge 11 ]; then
> +                        missing_caller=0
> +                fi
> +        fi
> +fi
> +
> +if [ $missing_caller -eq 1 ]; then
> +        # Add the missing caller function after the current function.
> +        awk '{ print }
> +             /myfunc_z/ { print "              ustack-tst-basic`myfunc_y+{ptr}" }'
> +else
> +        # The .r results file has an extra frame at the end in case
> +        # the caller frame is missing and the 25-frame limit goes
> +        # "too far."  If the caller is not missing, fake that extra frame.
> +        awk '{ print }
> +             /myfunc_b/ { print "              ustack-tst-basic`myfunc_a+{ptr}" }'

... trimming the leading space in both of those print statements.

> +fi