[DTrace-devel] [PATCH] Fix stack offsets for clause functions

Eugene Loh eugene.loh at oracle.com
Mon Mar 7 21:52:20 UTC 2022


Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
with comments...

On 3/4/22 4:00 PM, Kris Van Hees via DTrace-devel wrote:
> Clause functions were reserving space on the stack for the DTrace
> context even though they only a pointer to the DTrace context that is
Is a verb missing?
> stored on the stack of the trampoline program.  This wasted 64 bytes
> (12.5% of the total available stack).
Better: "(out of 512 bytes total)".  The 512 is more recognizable and 
the large relative waste is just as apparent.
> This patch also provides some reworking of how reference is made to
> members of dctx on the stack.  Both trampolines and functions now use
> [reg + offset].
>
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
>   libdtrace/dt_cg.c                        | 122 +++++++++++------------
>   libdtrace/dt_dctx.h                      |  60 +++++++----
>   libdtrace/dt_prov_dtrace.c               |  16 +--
>   test/unittest/codegen/tst.stack_layout.r |  22 ++--
>   4 files changed, 118 insertions(+), 102 deletions(-)
>
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 64089da5..4713f033 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -42,6 +42,7 @@ static void dt_cg_node(dt_node_t *, dt_irlist_t *, dt_regset_t *);
>    *
>    *	%r7 contains a pointer to dctx->mst
>    *	%r8 contains a pointer to dctx->ctx
> + *	%r9 contains a pointer to dctx
>    */

The comment block says:
      * The caller can depend on the following registers being set when this
      * function returns:
      *      %r7 contains a pointer to dctx->mst
      *      %r8 contains a pointer to dctx->ctx
      *      %r9 contains a pointer to dctx
Which caller and function?  How about specifying "the caller of 
dt_cg_tramp_prologue_act()".
That makes it easier for the reader to rule out the interpretation that 
we're talking about
when the clause function returns to the trampoline.

>   void
>   dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
> @@ -67,20 +68,25 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>   	 *	uintptr_t	rc;
>   	 *	char		*buf, *mem;
>   	 *
> -	 *				//     (%r8 = pointer to BPF context)
> +	 *				// mov %r9, %fp
> +	 *				// add %r9, -DCTX_SIZE
> +	 *				//     (%r9 = pointer to DTrace context)
>   	 *				// mov %r8, %r1
> -	 *	dctx.ctx = ctx;		// stdw [%fp + DCTX_FP(DCTX_CTX)], %r8
> +	 *				//     (%r8 = pointer to BPF context)
> +	 *	dctx.ctx = ctx;		// stdw [%r9 + DCTX_CTX], %r8
>   	 */
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_9, BPF_REG_FP));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_9, -DCTX_SIZE));
>   	emit(dlp,  BPF_MOV_REG(BPF_REG_8, BPF_REG_1));
> -	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_CTX), BPF_REG_8));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_9, DCTX_CTX, BPF_REG_8));
>   
>   	/*
> -	 *	key = DT_STATE_ACTIVITY;// stw [%fp + DCTX_FP(DCTX_ACT)],
> +	 *	key = DT_STATE_ACTIVITY;// stw [%r9 + DCTX_ACT],
>   	 *				//		DT_STATE_ACTIVITY
>   	 *	rc = bpf_map_lookup_elem(&state, &key);
>   	 *				// lddw %r1, &state
> -	 *				// mov %r2, %fp
> -	 *				// add %r2, DCTX_FP(DCTX_ACT)
> +	 *				// mov %r2, %r9
> +	 *				// add %r2, DCTX_ACT
>   	 *				// call bpf_map_lookup_elem
>   	 *				//     (%r1 ... %r5 clobbered)
>   	 *				//     (%r0 = map value)
> @@ -89,24 +95,24 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>   	 *	if (*rc != act)		// ldw %r1, [%r0 + 0]
>   	 *		goto exit;	// jne %r1, act, lbl_exit
>   	 *
> -	 *	dctx.act = rc;		// stdw [%fp + DCTX_FP(DCTX_ACT)], %r0
> +	 *	dctx.act = rc;		// stdw [%r9 + DCTX_ACT], %r0
>   	 */
> -	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_ACT), DT_STATE_ACTIVITY));
> +	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_9, DCTX_ACT, DT_STATE_ACTIVITY));
>   	dt_cg_xsetx(dlp, state, DT_LBL_NONE, BPF_REG_1, state->di_id);
> -	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> -	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(DCTX_ACT)));
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_ACT));
>   	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
>   	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
>   	emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
>   	emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, act, lbl_exit));
> -	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_ACT), BPF_REG_0));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_9, DCTX_ACT, BPF_REG_0));
>   
>   	/*
> -	 *	key = 0;		// stw [%fp + DCTX_FP(DCTX_MST)], 0
> +	 *	key = 0;		// stw [%r9 + DCTX_MST], 0
>   	 *	rc = bpf_map_lookup_elem(&mem, &key);
>   	 *				// lddw %r1, &mem
> -	 *				// mov %r2, %fp
> -	 *				// add %r2, DCTX_FP(DCTX_MST)
> +	 *				// mov %r2, %r9
> +	 *				// add %r2, DCTX_MST
>   	 *				// call bpf_map_lookup_elem
>   	 *				//     (%r1 ... %r5 clobbered)
>   	 *				//     (%r0 = 'mem' BPF map value)
> @@ -115,19 +121,19 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>   	 *				//     (%r0 = map value)
>   	 *				//     (%r7 = pointer to dt_mstate_t)
>   	 *				// mov %r7, %r0
> -	 *	dctx.mst = rc;		// stdw [%fp + DCTX_FP(DCTX_MST)], %r7
> +	 *	dctx.mst = rc;		// stdw [%r9 + DCTX_MST], %r7
>   	 *	dctx.mst->prid = PRID;	// stw [%r7 + DMST_PRID], PRID
>   	 *	dctx.mst->syscall_errno = 0;
>   	 *				// stw [%r7 + DMST_ERRNO], 0
>   	 */
> -	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_MST), 0));
> +	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_9, DCTX_MST, 0));
>   	dt_cg_xsetx(dlp, mem, DT_LBL_NONE, BPF_REG_1, mem->di_id);
> -	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> -	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(DCTX_MST)));
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_MST));
>   	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
>   	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
>   	emit(dlp,  BPF_MOV_REG(BPF_REG_7, BPF_REG_0));
> -	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_MST), BPF_REG_7));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_9, DCTX_MST, BPF_REG_7));
>   	emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_PRID, -1), prid);
>   	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_ERRNO, 0));
>   
> @@ -139,20 +145,20 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>   	 *				// stdw [%r0 + 0], 0
>   	 *	buf += 8;		// add %r0, 8
>   	 *				//     (%r0 = pointer to buffer space)
> -	 *	dctx.buf = buf;		// stdw [%fp + DCTX_FP(DCTX_BUF)], %r0
> +	 *	dctx.buf = buf;		// stdw [%r9 + DCTX_BUF], %r0
>   	 */
>   	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, roundup(sizeof(dt_mstate_t), 8)));
>   	emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_0, 0, 0));
>   	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8));
> -	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_BUF), BPF_REG_0));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_9, DCTX_BUF, BPF_REG_0));
>   
>   	/*
>   	 *	mem = buf + roundup(dtp->dt_maxreclen, 8);
>   	 *				// add %r0, roundup(dtp->dt_maxreclen,
> -	 *	dctx.mem = mem;		// stdw [%fp + DCTX_FP(DCTX_BUF)], %r0
> +	 *	dctx.mem = mem;		// stdw [%r9 + DCTX_MEM], %r0
>   	 */
>   	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, roundup(dtp->dt_maxreclen, 8)));
> -	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_MEM), BPF_REG_0));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_9, DCTX_MEM, BPF_REG_0));
>   
>   	/*
>   	 * Store -1 to the strtok internal-state offset to indicate
> @@ -164,34 +170,34 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>   	 * Store pointer to BPF map "name" in the DTrace context field "fld" at
>   	 * "offset".
>   	 *
> -	 *	key = 0;		// stw [%fp + DCTX_FP(offset)], 0
> +	 *	key = 0;		// stw [%r9 + offset], 0
>   	 *	rc = bpf_map_lookup_elem(&name, &key);
>   	 *				// lddw %r1, &name
> -	 *				// mov %r2, %fp
> -	 *				// add %r2, DCTX_FP(offset)
> +	 *				// mov %r2, %r9
> +	 *				// add %r2, offset
>   	 *				// call bpf_map_lookup_elem
>   	 *				//     (%r1 ... %r5 clobbered)
>   	 *				//     (%r0 = name BPF map value)
>   	 *	if (rc == 0)		// jeq %r0, 0, lbl_exit
>   	 *		goto exit;
>   	 *				//     (%r0 = pointer to map value)
> -	 *	dctx.fld = rc;		// stdw [%fp + DCTX_FP(offset)], %r0
> +	 *	dctx.fld = rc;		// stdw [%r9 + offset], %r0
>   	 */
>   #define DT_CG_STORE_MAP_PTR(name, offset) \
>   	do { \
>   		dt_ident_t *idp = dt_dlib_get_map(dtp, name); \
>   		\
>   		assert(idp != NULL); \
> -		emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(offset), 0)); \
> +		emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, (offset), 0)); \
>   		\
>   		dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id); \
> -		emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP)); \
> -		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(offset))); \
> +		emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_9)); \
> +		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (offset))); \
>   		emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem)); \
>   		\
>   		emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit)); \
>   		\
> -		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(offset), BPF_REG_0)); \
> +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_9, (offset), BPF_REG_0)); \
>   	} while(0)
>   
>   	DT_CG_STORE_MAP_PTR("strtab", DCTX_STRTAB);
> @@ -301,51 +307,46 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
It seems to me that the dt_cg_tramp_copy_args_from_regs() refactoring 
should be its own patch preceding this one.  There was never any need 
for the function to be using the BPF stack in the first place.
>   	 *
>   	 *	for (i = PT_REGS_ARGC;
>   	 *	     i < ARRAY_SIZE(((dt_mstate_t *)0)->argv); i++) {
> -	 *		uint64_t	val, tmp;
>   	 *		int		rc;
>   	 *		uint64_t	*sp;
>   	 *
>   	 *		sp = (uint64_t *)(((dt_pt_regs *)rp)->sp;
> -	 *		rc = bpf_probe_read(&tmp, sizeof(tmp),
> +	 *		rc = bpf_probe_read(dctx->mst->argv[i],
> +	 *				    sizeof(uint64_t),
>   	 *				    &sp[i - PT_REGS_ARGC +
>   	 *					    PT_REGS_ARGSTKBASE]);
> -	 *				// mov %r1, %fp
> -	 *				// add %r1, DT_STK_SPILL(0)
> +	 *				// mov %r1, %r7
> +	 *				// add %r1, DMST_ARG(i)
>   	 *				// mov %r2, sizeof(uint64_t)
>   	 *				// lddw %r3, [%rp + PT_REGS_SP]
>   	 *				// add %r3, (i - PT_REGS_ARGC +
>   	 *						 PT_REGS_ARGSTKBASE) *
>   	 *					    sizeof(uint64_t)
>   	 *				// call bpf_probe_read
> -	 *		val = 0;
> -	 *				// mov %r1, 0
>   	 *		if (rc != 0)
> -	 *			goto store:
> -	 *				// jne %r0, 0, lbl_store
> -	 *		val = tmp;
> -	 *				// lddw %r1, [%rp + DT_STK_SPILL(0)]
> +	 *			goto ok:
s/ok/lbl_ok/
> +	 *				// jeq %r0, 0, lbl_ok
> +	 *		dctx->mst->argv[i] = 0;
> +	 *				// stdw [%r7 + DMST_ARG(i)], 0
>   	 *
> -	 *	store:
> -	 *		dctx->mst->argv[i] = val;
> -	 *				// store:
> -	 *				// stdw [%r7 + DMST_ARG(i)], %r1
> +	 *	lbl_ok:
> +	 *				// lbl_ok:
Put "lbl_ok:" and "// lbl_ok:" on the same line.
>   	 *	}
>   	 *
>   	 */
Actually, the whole comment block is more verbose yet less readable than 
the actually assembly code.  Just drop the comment block or pare it down 
substantially.
>   	for (i = PT_REGS_ARGC; i < ARRAY_SIZE(((dt_mstate_t *)0)->argv); i++) {
> -		uint_t	lbl_store = dt_irlist_label(dlp);
> +		uint_t	lbl_ok = dt_irlist_label(dlp);
>   
> -		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> -		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_STK_SPILL(0)));
> +		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_7));
> +		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DMST_ARG(i)));
>   		emit(dlp,  BPF_MOV_IMM(BPF_REG_2, sizeof(uint64_t)));
>   		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_3, rp, PT_REGS_SP));
>   		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, (i - PT_REGS_ARGC + PT_REGS_ARGSTKBASE) * sizeof(uint64_t)));
>   		emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_probe_read));
> -		emit(dlp,  BPF_MOV_IMM(BPF_REG_1, 0));
> -		emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, 0, lbl_store));
> -		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_SPILL(0)));
> -		emitl(dlp, lbl_store,
> -			   BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(i), BPF_REG_1));
> +		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_ok));
> +		emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(i), 0));
> +		emitl(dlp, lbl_ok,
> +			   BPF_NOP());
>   	}
>   }
>   
> @@ -361,22 +362,19 @@ dt_cg_call_clause(dtrace_hdl_t *dtp, dt_ident_t *idp, dt_clause_arg_t *arg)
>   	dt_irlist_t	*dlp = arg->dlp;
>   
>   	/*
> -	 *	if (*dctx.act != act)	// ldw %r0, [%fp +
> -	 *				//	     DCTX_FP(DCTX_ACT)]
> +	 *	if (*dctx.act != act)	// ldw %r0, [%r9 + DCTX_ACT]
>   	 *		goto exit;	// ldw %r0, [%r0 + 0]
>   	 *				// jne %r0, act, lbl_exit
>   	 */
> -	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DCTX_FP(DCTX_ACT)));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DCTX_ACT));
>   	emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_0, 0));
>   	emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, arg->act, arg->lbl_exit));
>   
>   	/*
> -	 *	dt_clause(dctx);	// mov %r1, %fp
> -	 *				// add %r1, DCTX_FP(0)
> +	 *	dt_clause(dctx);	// mov %r1, %r9
>   	 *				// call clause
>   	 */
> -	emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> -	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DCTX_FP(0)));
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_9));
>   	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
>   
>   	return 0;
> @@ -424,11 +422,11 @@ dt_cg_tramp_epilogue_advance(dt_pcb_t *pcb, dt_activity_t act)
>   	dt_cg_tramp_call_clauses(pcb, pcb->pcb_probe, act);
>   
>   	/*
> -	 *	(*dctx.act)++;		// lddw %r0, [%fp + DCTX_FP(DCTX_ACT)]
> +	 *	(*dctx.act)++;		// lddw %r0, [%r9 + DCTX_ACT)]
>   	 *				// mov %r1, 1
>   	 *				// xadd [%r0 + 0], %r1
>   	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DCTX_FP(DCTX_ACT)));
> +	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DCTX_ACT));
>   	emit(dlp, BPF_MOV_IMM(BPF_REG_1, 1));
>   	emit(dlp, BPF_XADD_REG(BPF_W, BPF_REG_0, 0, BPF_REG_1));
>   
> diff --git a/libdtrace/dt_dctx.h b/libdtrace/dt_dctx.h
> index 76d48647..7d38dfd8 100644
> --- a/libdtrace/dt_dctx.h
> +++ b/libdtrace/dt_dctx.h
> @@ -29,6 +29,16 @@ typedef struct dt_mstate {
>   	uint64_t	argv[10];	/* Probe arguments */
>   } dt_mstate_t;
>   
> +#define DMST_EPID	offsetof(dt_mstate_t, epid)
> +#define DMST_PRID	offsetof(dt_mstate_t, prid)
> +#define DMST_CLID	offsetof(dt_mstate_t, clid)
> +#define DMST_TAG	offsetof(dt_mstate_t, tag)
> +#define DMST_ERRNO	offsetof(dt_mstate_t, syscall_errno)
> +#define DMST_FAULT	offsetof(dt_mstate_t, fault)
> +#define DMST_TSTAMP	offsetof(dt_mstate_t, tstamp)
> +#define DMST_REGS	offsetof(dt_mstate_t, regs)
> +#define DMST_ARG(n)	offsetof(dt_mstate_t, argv[n])
> +
>   /*
>    * The DTrace context.
>    */
> @@ -53,6 +63,7 @@ typedef struct dt_dctx {
>   #define DCTX_AGG	offsetof(dt_dctx_t, agg)
>   #define DCTX_GVARS	offsetof(dt_dctx_t, gvars)
>   #define DCTX_LVARS	offsetof(dt_dctx_t, lvars)
> +
>   #define DCTX_SIZE	((int16_t)sizeof(dt_dctx_t))
>   
>   /*
> @@ -89,22 +100,6 @@ typedef struct dt_dctx {
>    */
>   #define DMEM_SIZE(dtp)	(DMEM_STRTOK(dtp) + DMEM_STRTOK_SZ(dtp))
>   
> -/*
> - * Macro to determine the (negative) offset from the frame pointer (%fp) for
> - * the given offset in dt_dctx_t.
> - */
> -#define DCTX_FP(off)	(DT_STK_DCTX + (int16_t)(off))
> -
> -#define DMST_EPID	offsetof(dt_mstate_t, epid)
> -#define DMST_PRID	offsetof(dt_mstate_t, prid)
> -#define DMST_CLID	offsetof(dt_mstate_t, clid)
> -#define DMST_TAG	offsetof(dt_mstate_t, tag)
> -#define DMST_ERRNO	offsetof(dt_mstate_t, syscall_errno)
> -#define DMST_FAULT	offsetof(dt_mstate_t, fault)
> -#define DMST_TSTAMP	offsetof(dt_mstate_t, tstamp)
> -#define DMST_REGS	offsetof(dt_mstate_t, regs)
> -#define DMST_ARG(n)	offsetof(dt_mstate_t, argv[n])
> -
>   /*
>    * DTrace BPF programs can use all BPF registers except for the the %fp (frame
>    * pointer) register and the highest numbered register (currently %r9) that is
> @@ -112,6 +107,32 @@ typedef struct dt_dctx {
>    */
>   #define DT_STK_NREGS		(MAX_BPF_REG - 2)
>   
> +/*
> + * The stack layout for BPF programs that are generated as trampolines for
> + * D clauses.
> + *
> + * Note: The BPF frame pointer points to the address of the first byte past the
> + *       end of the stack.  8-byte values are properly aligned at offsets -8,
> + *       -16, -24, -32, etc. -- that is, negative multiples of sizeof(uint64_t).
The Note confuses me;  how about dropping it?  The comment "first byte 
past the end of the stack" sounds like the FP is pointing to some 
address that is 7 mod 8 -- that is, some unaligned address. Anyhow, to 
me the Note contributes only confusion.
> + *
> + *                       +----------------+
> + *                 SP(n) |                |
> + *                       +----------------+
> + *                       |      ...       |
> + *                       +----------------+
> + *                 SP(1) |                |
> + *                       +----------------+
> + *       SP_BASE = SP(0) |                |
> + *                       +----------------+
> + *                  DCTX | DTrace Context |
> + *                       +----------------+
> + */
The figure resembles that for clause functions.  So one gets the 
impression that there are a fixed number of SP(n) slots, even if we do 
not know what "n" is.  I assume it should really be open-ended, in which 
case the figure might be clearer like this:

+ *                       +----------------+
+ *                       |      ...       |
+ *                       +----------------+
+ *                 SP(2) |                |
+ *                       +----------------+
+ *                 SP(1) |                |
+ *                       +----------------+
+ *                 SP(0) |                |
+ *                       +----------------+
+ *                  DCTX | DTrace Context |
+ *                       +----------------+


> +#define DT_STK_BASE		((int16_t)0)
> +#define DT_STK_SLOT_SZ		((int16_t)sizeof(uint64_t))
> +
> +#define DT_TRAMP_SP_BASE	(DT_STK_BASE - DCTX_SIZE - DT_STK_SLOT_SZ)
> +#define DT_TRAMP_SP(n)		(DT_TRAMP_SP_BASE - (n) * DT_STK_SLOT_SZ)
> +
The DT_STK_BASE and DT_STK_SLOT_SZ definitions should appear before the 
trampoline comment block since they do not relate specifically to 
trampolines;  the trampoline comment block should be adjacent to the 
#define statements they describe.

I do not understand what DT_STK_BASE is other than 0.  How about 
eliminating DT_STK_BASE?

And how about eliminating DT_TRAMP_SP_BASE, which is used only in the 
succeeding line, which would be clearer as:
#define DT_TRAMP_SP(n) (DT_STK_BASE - DCTX_SIZE - ((n)+1) * DT_STK_SLOT_SZ)
This would also mean eliminating "SP_BASE" from the figure. Similarly it 
would make sense to eliminate DT_STK_SPILL_BASE.
>   /*
>    * The stack layout for functions that implement a D clause is encoded with the
>    * following constants.
> @@ -131,13 +152,10 @@ typedef struct dt_dctx {
>    *                       +----------------+
>    * SPILL_BASE = SPILL(0) | %r0            |
>    *                       +----------------+
> - *                  DCTX | DTrace Context |
> + *                  DCTX | Ptr to dctx    |
>    *                       +----------------+
>    */

Higher up in the file, we have:
         /*
          * DTrace BPF programs can use all BPF registers except for the 
the %fp (frame
          * pointer) register and the highest numbered register 
(currently %r9) that is
          * used to store the base pointer for the trace output record.
          */
         #define DT_STK_NREGS            (MAX_BPF_REG - 2)
But that is not how trampolines and bpf/* functions use %r9.  Also, the 
highest register is %r10.  So I suggest rewriting this as:
         /*
          * DTrace clause functions can spill any BPF registers except 
for the the %fp
          * (frame pointer) register and the next highest register (used 
to store the
          * base pointer for the trace output record).
          */
Also, this comment and definition should be moved closer to the section 
that discusses stack layout for clause functions.

> -#define DT_STK_BASE		((int16_t)0)
> -#define DT_STK_SLOT_SZ		((int16_t)sizeof(uint64_t))
> -
> -#define DT_STK_DCTX		(DT_STK_BASE - DCTX_SIZE)
> +#define DT_STK_DCTX		(DT_STK_BASE - DT_STK_SLOT_SZ)
>   #define DT_STK_SPILL_BASE	(DT_STK_DCTX - DT_STK_SLOT_SZ)
>   #define DT_STK_SPILL(n)		(DT_STK_SPILL_BASE - (n) * DT_STK_SLOT_SZ)
>   
> diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
> index c8a55475..d959a829 100644
> --- a/libdtrace/dt_prov_dtrace.c
> +++ b/libdtrace/dt_prov_dtrace.c
> @@ -128,30 +128,30 @@ static void trampoline(dt_pcb_t *pcb)
>   
>   	/*
>   	 *     key = DT_STATE_(BEGANON|ENDEDON);
> -	 *				// stw [%fp + DT_STK_SPILL(0)],
> +	 *				// stw [%fp + DT_TRAMP_SP(0)],
>   	 *				//	DT_STATE_(BEGANON|ENDEDON)
>   	 *     val = bpf_get_smp_processor_id();
>   	 *				// call bpf_get_smp_processor_id
> -	 *				// stw [%fp + DT_STK_SPILL(1)], %r0
> +	 *				// stw [%fp + DT_TRAMP_SP(1)], %r0
>   	 *     bpf_map_update_elem(state, &key, &val, BPF_ANY);
>   	 *				// lddw %r1, &state
>   	 *				// mov %r2, %fp
> -	 *				// add %r2, DT_STK_SPILL(0)
> +	 *				// add %r2, DT_TRAMP_SP(0)
>   	 *				// mov %r3, %fp
> -	 *				// add %r3, DT_STK_SPILL(1)
> +	 *				// add %r3, DT_TRAMP_SP(1)
>   	 *				// mov %r4, BPF_ANY
>   	 *				// call bpf_map_update_elem
>   	 */
>   	dt_ident_t	*state = dt_dlib_get_map(pcb->pcb_hdl, "state");
>   
> -	emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DT_STK_SPILL(0), key));
> +	emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DT_TRAMP_SP(0), key));
>   	emit(dlp, BPF_CALL_HELPER(BPF_FUNC_get_smp_processor_id));
> -	emit(dlp, BPF_STORE(BPF_W, BPF_REG_FP, DT_STK_SPILL(1), BPF_REG_0));
> +	emit(dlp, BPF_STORE(BPF_W, BPF_REG_FP, DT_TRAMP_SP(1), BPF_REG_0));
>   	dt_cg_xsetx(dlp, state, DT_LBL_NONE, BPF_REG_1, state->di_id);
>   	emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> -	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_STK_SPILL(0)));
> +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP(0)));
>   	emit(dlp, BPF_MOV_REG(BPF_REG_3, BPF_REG_FP));
> -	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, DT_STK_SPILL(1)));
> +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, DT_TRAMP_SP(1)));
>   	emit(dlp, BPF_MOV_IMM(BPF_REG_4, BPF_ANY));
>   	emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_update_elem));
>   
> diff --git a/test/unittest/codegen/tst.stack_layout.r b/test/unittest/codegen/tst.stack_layout.r
> index 7c02de0d..af2a4aad 100644
> --- a/test/unittest/codegen/tst.stack_layout.r
> +++ b/test/unittest/codegen/tst.stack_layout.r
> @@ -1,12 +1,12 @@
>   Base:          0
> -dctx:        -72
> -%r0:         -80
> -%r1:         -88
> -%r2:         -96
> -%r3:        -104
> -%r4:        -112
> -%r5:        -120
> -%r6:        -128
> -%r7:        -136
> -%r8:        -144
> -scratch:    -145 .. -512
> +dctx:         -8
> +%r0:         -16
> +%r1:         -24
> +%r2:         -32
> +%r3:         -40
> +%r4:         -48
> +%r5:         -56
> +%r6:         -64
> +%r7:         -72
> +%r8:         -80
> +scratch:     -81 .. -512



More information about the DTrace-devel mailing list