[DTrace-devel] [PATCH 11/16] Unify common argument retrieval for providers

Eugene Loh eugene.loh at oracle.com
Tue Mar 23 16:06:10 PDT 2021


What tests cover this patch?  In particular, the patch adds some new 
elements that could not have been tested before.

I am confused about how the highest args are being filled in.  In 
dt_cg_tramp_copy_args_from_regs(), they are copied in from the stack.  
The dtrace provider used to clear those args out.  The fbt provider 
still does.  So, might it not make sense to supply 
dt_cg_tramp_copy_args_from_regs() with another argument that indicates 
whether to fill those higher args in from the stack, to zero them out, 
or not touch them at all?

Other comments below.

On 3/19/21 12:54 AM, Kris Van Hees wrote:
> Multiple providers retrieve the probe arguments from the register set
> and from the stack.  Function dt_cg_tramp_copy_args_from_regs() can be

So, how about naming it dt_cg_tramp_fill_args()?  It is shorter and more 
inclusive (since the function does not simply copy from regs).

> called to do this in a trampoline.  The trampoline prologue will now
> set up %r7 as a pointer to the mstate and %r8 as a pointer to the BPF
> context.
>
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
>   libdtrace/dt_cg.c           | 128 ++++++++++++++++++++++++++++++++----
>   libdtrace/dt_cg.h           |   1 +
>   libdtrace/dt_prov_dtrace.c  |  57 ++--------------
>   libdtrace/dt_prov_fbt.c     |  40 +----------
>   libdtrace/dt_prov_profile.c |  11 +---
>   libdtrace/dt_prov_sdt.c     |   7 +-
>   libdtrace/dt_prov_syscall.c |   7 +-
>   libdtrace/dt_pt_regs.h      |  12 ++++
>   8 files changed, 144 insertions(+), 119 deletions(-)
>
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 9ffc5834..35691a26 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -36,8 +36,11 @@ static void dt_cg_node(dt_node_t *, dt_irlist_t *, dt_regset_t *);
>    *
>    * The trampoline prologue will populate the dt_dctx_t structure on the stack.
>    *
> - * The caller should NOT depend on any register values that exist at the end of
> - * the trampoline prologue.
> + * The caller can depend on the following registers being set when this
> + * function returns:
> + *
> + *	%r7 contains a pointer to dctx->mst
> + *	%r8 contains a pointer to dctx->ctx
>    */
>   void
>   dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
> @@ -60,17 +63,20 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>   	 *	uintptr_t	rc;
>   	 *	char		*buf;
>   	 *
> -	 *	dctx.ctx = ctx;		// stdw [%fp + DCTX_FP(DCTX_CTX)], %r1
> +	 *				//     (%r8 = pointer to BPF context)
> +	 *				// mov %r8, %r1
> +	 *	dctx.ctx = ctx;		// stdw [%fp + DCTX_FP(DCTX_CTX)], %r8
>   	 */
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_CTX), BPF_REG_1));
> +	emit(dlp, BPF_MOV_REG(BPF_REG_8, BPF_REG_1));
> +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_CTX), BPF_REG_8));
>   
>   	/*
> -	 *	key = DT_STATE_ACTIVITY;// stw [%fp + DCTX_FP(DCTX_MST)],
> +	 *	key = DT_STATE_ACTIVITY;// stw [%fp + DCTX_FP(DCTX_ACT)],
>   	 *				//		DT_STATE_ACTIVITY
>   	 *	rc = bpf_map_lookup_elem(&state, &key);
>   	 *				// lddw %r1, &state
>   	 *				// mov %r2, %fp
> -	 *				// add %r2, DCTX_FP(DCTX_MST)
> +	 *				// add %r2, DCTX_FP(DCTX_ACT)
>   	 *				// call bpf_map_lookup_elem
>   	 *				//     (%r1 ... %r5 clobbered)
>   	 *				//     (%r0 = map value)
> @@ -81,10 +87,10 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>   	 *
>   	 *	dctx.act = rc;		// stdw [%fp + DCTX_FP(DCTX_ACT)], %r0
>   	 */
> -	emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_MST), DT_STATE_ACTIVITY));
> +	emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_ACT), DT_STATE_ACTIVITY));
>   	dt_cg_xsetx(dlp, state, DT_LBL_NONE, BPF_REG_1, state->di_id);
>   	emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> -	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(DCTX_MST)));
> +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(DCTX_ACT)));
>   	emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
>   	emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
>   	emit(dlp, BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
> @@ -102,8 +108,10 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>   	 *				//     (%r0 = 'mem' BPF map value)
>   	 *	if (rc == 0)		// jeq %r0, 0, lbl_exit
>   	 *		goto exit;
> -	 *				//     (%r0 = pointer to dt_mstate_t)
> -	 *	dctx.mst = rc;		// stdw [%fp + DCTX_FP(DCTX_MST)], %r0
> +	 *				//     (%r0 = map value)
> +	 *				//     (%r7 = pointer to dt_mstate_t)
> +	 *				// mov %r7, %r0
> +	 *	dctx.mst = rc;		// stdw [%fp + DCTX_FP(DCTX_MST)], %r7
>   	 */
>   	emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_MST), 0));
>   	dt_cg_xsetx(dlp, mem, DT_LBL_NONE, BPF_REG_1, mem->di_id);
> @@ -111,7 +119,8 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>   	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(DCTX_MST)));
>   	emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
>   	emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_MST), BPF_REG_0));
> +	emit(dlp, BPF_MOV_REG(BPF_REG_7, BPF_REG_0));
> +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_MST), BPF_REG_7));
>   
>   	/*
>   	 *	buf = rc + roundup(sizeof(dt_mstate_t), 8);
> @@ -161,6 +170,103 @@ dt_cg_tramp_prologue(dt_pcb_t *pcb)
>   	dt_cg_tramp_prologue_act(pcb, DT_ACTIVITY_ACTIVE);
>   }
>   
> +/*
> + * Copy arguments from a dt_pt_regs structure referenced by the 'rp' argument.
> + *
> + * The caller must ensure that %r7 and %r8 contain the values set by the
> + * dt_cg_tramp_prologue*() functions.
> + */

This comment doesn't make complete sense, since the function actually 
does not explicitly depend on %r8.  It relies instead on argument rp.  
So, do you want to eliminate the rp argument and use %r8 instead?  
Another option is to pass in not only %r8 but also %r7, making clearer 
what is going on (though making cloudier the assumptions about which 
regs are being used where).

In any case, referring to rp as a dt_pt_regs is confusing since this 
pointer is otherwise referred to as a BPF context.  I understand how 
this pointer can be understood to point both to a BPF context and to a 
dt_pt_regs, but one might as well just be consistent how one talks about 
these things.

> +void
> +dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
> +{
> +	dt_irlist_t	*dlp = &pcb->pcb_ir;
> +	int		i;
> +	uint_t		lbl_store = dt_irlist_label(dlp);
> +
> +	/*
> +	 *	for (i = 0; i < PT_REGS_ARGC; i++)
> +	 *		dctx->mst->argv[i] = PT_REGS_PARAMi((dt_pt_regs *)rp);


Would make more sense if PARAMi -> ARGi.


> +	 *				// lddw %r0, [%rp + PT_REGS_ARGi]
> +	 *				// stdw [%r7 + DMST_ARG(i)], %r0
> +	 */
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, rp, PT_REGS_ARG0));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, rp, PT_REGS_ARG1));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, rp, PT_REGS_ARG2));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, rp, PT_REGS_ARG3));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_0));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, rp, PT_REGS_ARG4));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, rp, PT_REGS_ARG5));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0));
> +#ifdef PT_REGS_ARG6
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, rp, PT_REGS_ARG6));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(6), BPF_REG_0));
> +#endif
> +#ifdef PT_REGS_ARG7
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, rp, PT_REGS_ARG7));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(7), BPF_REG_0));
> +#endif
> +
> +	/*
> +	 * Generate code to read the remainder of the arguments from the stack
> +	 * (if possible).  We (ab)use the %r0 spill slot on the stack to read
> +	 * values using bpf_probe_read() because we know that it cannot be in
> +	 * use at this point.
> +	 *
> +	 *	for (i = PT_REGS_ARGC;
> +	 *	     i < ARRAY_SIZE(((dt_mstate_t *)0)->argv); i++) {
> +	 *		uint64_t	val, tmp;
> +	 *		int		rc;
> +	 *		uint64_t	*sp;
> +	 *
> +	 *		sp = (uint64_t *)(((dt_pt_regs *)rp)->sp;
> +	 *		rc = bpf_probe_read(&tmp, sizeof(tmp),
> +	 *				    &sp[i - PT_REGS_ARGC +
> +	 *					    PT_REGS_ARGSTKBASE]);
> +	 *				// mov %r1, %fp
> +	 *				// add %r1, DT_STK_SPILL(0)
> +	 *				// mov %r2, sizeof(uint64_t)
> +	 *				// lddw %r3, [%rp + PT_REGS_SP]
> +	 *				// mov %r0, i - PT_REGS_ARGC +
> +	 *						PT_REGS_ARGSTKBASE
> +	 *				// mul %r0, sizeof(uint64_t)
> +	 *				// add %r3, %r0
> +	 *				// call bpf_probe_read
> +	 *		val = 0;
> +	 *				// mov %r1, 0
> +	 *		if (rc != 0)
> +	 *			goto store:
> +	 *				// jne %r0, 0, lbl_store
> +	 *		val = tmp;
> +	 *				// lddw %r1, [%rp + DT_STK_SPILL(0)]
> +	 *
> +	 *	store:
> +	 *		dctx->mst->argv[i] = val;
> +	 *				// store:
> +	 *				// stdw [%r7 + DMST_ARG(i)], %r1
> +	 *	}
> +	 *
> +	 */
> +	for (i = PT_REGS_ARGC; i < ARRAY_SIZE(((dt_mstate_t *)0)->argv); i++) {
> +		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> +		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_STK_SPILL(0)));
> +		emit(dlp,  BPF_MOV_IMM(BPF_REG_2, sizeof(uint64_t)));
> +		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_3, rp, PT_REGS_SP));
> +		emit(dlp,  BPF_MOV_IMM(BPF_REG_0, i - PT_REGS_ARGC + PT_REGS_ARGSTKBASE));
> +		emit(dlp,  BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, sizeof(uint64_t)));
> +		emit(dlp,  BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_0));
> +		emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_probe_read));
> +		emit(dlp,  BPF_MOV_IMM(BPF_REG_1, 1234));
> +		emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, 0, lbl_store));
> +		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_SPILL(0)));
> +		emitl(dlp, lbl_store,
> +			   BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(i), BPF_REG_1));
> +	}
> +}
> +

As you note elsewhere, the 1234 is wrong.

You also have these instructions:
         BPF_MOV_IMM(BPF_REG_0, i - PT_REGS_ARGC + PT_REGS_ARGSTKBASE);
         BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, sizeof(uint64_t));
         BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_0);
How about
         BPF_ALU64_IMM(BPF_ADD, BPF_REG_3,
                 (i - PT_REGS_ARGC + PT_REGS_ARGSTKBASE) * 
sizeof(uint64_t));
to save two instructions inside this loop.  The expression could even be 
simplified a little by redefining PT_REGS_ARGSTKBASE since it is 
currently only ever used in the combination PT_REGS_ARGSTKBASE - 
PT_REGS_ARGC anyhow.

You also use the lbl_store many times (inside the loop), even though 
only one lbl_store is generated (at the top of the function).  The 
result is that they all branch to the end, not filling a lot of the 
args, whether with 0s, 1234s, or anything else.  I think the 
"lbl_store=dt_irlist_label()" probably needs to go inside the loop.

>   typedef struct {
>   	dt_irlist_t	*dlp;
>   	dt_activity_t	act;
> diff --git a/libdtrace/dt_cg.h b/libdtrace/dt_cg.h
> index be430c28..96ce56f8 100644
> --- a/libdtrace/dt_cg.h
> +++ b/libdtrace/dt_cg.h
> @@ -22,6 +22,7 @@ extern void dt_cg_xsetx(dt_irlist_t *, dt_ident_t *, uint_t, int, uint64_t);
>   extern dt_irnode_t *dt_cg_node_alloc(uint_t, struct bpf_insn);
>   extern void dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act);
>   extern void dt_cg_tramp_prologue(dt_pcb_t *pcb);
> +extern void dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp);
>   extern void dt_cg_tramp_call_clauses(dt_pcb_t *pcb, const dt_probe_t *prp,
>   				     dt_activity_t act);
>   extern void dt_cg_tramp_return(dt_pcb_t *pcb);
> diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
> index 55090e53..fe0b8d86 100644
> --- a/libdtrace/dt_prov_dtrace.c
> +++ b/libdtrace/dt_prov_dtrace.c
> @@ -78,7 +78,6 @@ static int populate(dtrace_hdl_t *dtp)
>    */
>   static void trampoline(dt_pcb_t *pcb)
>   {
> -	int		i;
>   	dt_irlist_t	*dlp = &pcb->pcb_ir;
>   	dt_activity_t	act = DT_ACTIVITY_ACTIVE;
>   	int		adv_act;
> @@ -124,6 +123,12 @@ static void trampoline(dt_pcb_t *pcb)
>   
>   	dt_cg_tramp_prologue_act(pcb, act);
>   
> +	/*
> +	 * After the dt_cg_tramp_prologue_act() call, we have:
> +	 *				//     (%r7 = dctx->mst)
> +	 *				//     (%r8 = dctx->ctx)
> +	 */
> +
>   	if (key) {
>   		/*
>   		 *     key = DT_STATE_(BEGANON|ENDEDON);
> @@ -155,17 +160,6 @@ static void trampoline(dt_pcb_t *pcb)
>   		emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_update_elem));
>   	}
>   
> -	/*
> -	 * We cannot assume anything about the state of any registers so set up
> -	 * the ones we need:
> -	 *				//     (%r7 = dctx->mst)
> -	 *				// lddw %r7, [%fp + DCTX_FP(DCTX_MST)]
> -	 *				//     (%r8 = dctx->ctx)
> -	 *				// lddw %r8, [%fp + DCTX_FP(DCTX_CTX)]
> -	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_7, BPF_REG_FP, DCTX_FP(DCTX_MST)));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_8, BPF_REG_FP, DCTX_FP(DCTX_CTX)));
> -
>   #if 0
>   	/*
>   	 *     dctx->mst->regs = *(dt_pt_regs *)dctx->ctx;
> @@ -181,44 +175,7 @@ static void trampoline(dt_pcb_t *pcb)
>   	}
>   #endif
>   
> -	/*
> -	 *	dctx->mst->argv[0] = PT_REGS_PARAM1((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG0]
> -	 *				// stdw [%r7 + DMST_ARG(0)], %r0
> -	 *	dctx->mst->argv[1] = PT_REGS_PARAM2((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG1]
> -	 *				// stdw [%r7 + DMST_ARG(1)], %r0
> -	 *	dctx->mst->argv[2] = PT_REGS_PARAM3((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG2]
> -	 *				// stdw [%r7 + DMST_ARG(2)], %r0
> -	 *	dctx->mst->argv[3] = PT_REGS_PARAM4((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG3]
> -	 *				// stdw [%r7 + DMST_ARG(3)], %r0
> -	 *	dctx->mst->argv[4] = PT_REGS_PARAM5((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG4]
> -	 *				// stdw [%r7 + DMST_ARG(4)], %r0
> -	 *	dctx->mst->argv[5] = PT_REGS_PARAM6((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG5]
> -	 *				// stdw [%r7 + DMST_ARG(5)], %r0
> -	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG0));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG1));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG2));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG3));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG4));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG5));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0));
> -
> -	/*
> -	 *     (we clear dctx->mst->argv[6] and on)
> -	 */
> -	for (i = 6; i < ARRAY_SIZE(((dt_mstate_t *)0)->argv); i++)
> -		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(i), 0));
> +	dt_cg_tramp_copy_args_from_regs(pcb, BPF_REG_8);
>   
>   	if (adv_act)
>   		dt_cg_tramp_epilogue_advance(pcb, act);
> diff --git a/libdtrace/dt_prov_fbt.c b/libdtrace/dt_prov_fbt.c
> index 6db41767..c344aa4c 100644
> --- a/libdtrace/dt_prov_fbt.c
> +++ b/libdtrace/dt_prov_fbt.c
> @@ -166,15 +166,10 @@ static void trampoline(dt_pcb_t *pcb)
>   	dt_cg_tramp_prologue(pcb);
>   
>   	/*
> -	 * We cannot assume anything about the state of any registers so set up
> -	 * the ones we need:
> +	 * After the dt_cg_tramp_prologue() call, we have:
>   	 *				//     (%r7 = dctx->mst)
> -	 *				// lddw %r7, [%fp + DCTX_FP(DCTX_MST)]
>   	 *				//     (%r8 = dctx->ctx)
> -	 *				// lddw %r8, [%fp + DCTX_FP(DCTX_CTX)]
>   	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_7, BPF_REG_FP, DCTX_FP(DCTX_MST)));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_8, BPF_REG_FP, DCTX_FP(DCTX_CTX)));
>   
>   #if 0
>   	/*
> @@ -186,38 +181,7 @@ static void trampoline(dt_pcb_t *pcb)
>   	}
>   #endif
>   
> -	/*
> -	 *	dctx->mst->argv[0] = PT_REGS_PARAM1((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG0]
> -	 *				// stdw [%r7 + DMST_ARG(0)], %r0
> -	 *	dctx->mst->argv[1] = PT_REGS_PARAM2((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG1]
> -	 *				// stdw [%r7 + DMST_ARG(1)], %r0
> -	 *	dctx->mst->argv[2] = PT_REGS_PARAM3((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG2]
> -	 *				// stdw [%r7 + DMST_ARG(2)], %r0
> -	 *	dctx->mst->argv[3] = PT_REGS_PARAM4((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG3]
> -	 *				// stdw [%r7 + DMST_ARG(3)], %r0
> -	 *	dctx->mst->argv[4] = PT_REGS_PARAM5((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG4]
> -	 *				// stdw [%r7 + DMST_ARG(4)], %r0
> -	 *	dctx->mst->argv[5] = PT_REGS_PARAM6((dt_pt_regs *)dctx->ctx);
> -	 *				// lddw %r0, [%r8 + PT_REGS_ARG5]
> -	 *				// stdw [%r7 + DMST_ARG(5)], %r0
> -	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG0));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG1));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG2));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG3));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG4));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, PT_REGS_ARG5));
> -	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0));
> +	dt_cg_tramp_copy_args_from_regs(pcb, BPF_REG_8);
>   
>   	/*
>   	 *     (we clear dctx->mst->argv[6] and on)
> diff --git a/libdtrace/dt_prov_profile.c b/libdtrace/dt_prov_profile.c
> index f7512387..313c8c9c 100644
> --- a/libdtrace/dt_prov_profile.c
> +++ b/libdtrace/dt_prov_profile.c
> @@ -221,15 +221,10 @@ static void trampoline(dt_pcb_t *pcb)
>   	dt_cg_tramp_prologue(pcb);
>   
>   	/*
> -	 * We cannot assume anything about the state of any registers so set up
> -	 * the ones we need:
> -	 *                              //     (%r7 = dctx->mst)
> -	 *                              // lddw %r7, [%fp + DCTX_FP(DCTX_MST)]
> -	 *                              //     (%r8 = dctx->ctx)
> -	 *                              // lddw %r8, [%fp + DCTX_FP(DCTX_CTX)]
> +	 * After the dt_cg_tramp_prologue() call, we have:
> +	 *				//     (%r7 = dctx->mst)
> +	 *				//     (%r8 = dctx->ctx)
>   	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_7, BPF_REG_FP, DCTX_FP(DCTX_MST)));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_8, BPF_REG_FP, DCTX_FP(DCTX_CTX)));
>   
>   #if 0
>   	/*
> diff --git a/libdtrace/dt_prov_sdt.c b/libdtrace/dt_prov_sdt.c
> index bd8a201b..cf62d01a 100644
> --- a/libdtrace/dt_prov_sdt.c
> +++ b/libdtrace/dt_prov_sdt.c
> @@ -146,15 +146,10 @@ static void trampoline(dt_pcb_t *pcb)
>   	dt_cg_tramp_prologue(pcb);
>   
>   	/*
> -	 * We cannot assume anything about the state of any registers so set up
> -	 * the ones we need:
> +	 * After the dt_cg_tramp_prologue() call, we have:
>   	 *				//     (%r7 = dctx->mst)
> -	 *				// lddw %r7, [%fp + DCTX_FP(DCTX_MST)]
>   	 *				//     (%r8 = dctx->ctx)
> -	 *				// lddw %r8, [%fp + DCTX_FP(DCTX_CTX)]
>   	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_7, BPF_REG_FP, DCTX_FP(DCTX_MST)));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_8, BPF_REG_FP, DCTX_FP(DCTX_CTX)));
>   
>   #if 0
>   	/*
> diff --git a/libdtrace/dt_prov_syscall.c b/libdtrace/dt_prov_syscall.c
> index 5b142733..15099b59 100644
> --- a/libdtrace/dt_prov_syscall.c
> +++ b/libdtrace/dt_prov_syscall.c
> @@ -151,15 +151,10 @@ static void trampoline(dt_pcb_t *pcb)
>   	dt_cg_tramp_prologue(pcb);
>   
>   	/*
> -	 * We cannot assume anything about the state of any registers so set up
> -	 * the ones we need:
> +	 * After the dt_cg_tramp_prologue() call, we have:
>   	 *				//     (%r7 = dctx->mst)
> -	 *				// lddw %r7, [%fp + DCTX_FP(DCTX_MST)]
>   	 *				//     (%r8 = dctx->ctx)
> -	 *				// lddw %r8, [%fp + DCTX_FP(DCTX_CTX)]
>   	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_7, BPF_REG_FP, DCTX_FP(DCTX_MST)));
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_8, BPF_REG_FP, DCTX_FP(DCTX_CTX)));
>   
>   #if 0
>   	/*
> diff --git a/libdtrace/dt_pt_regs.h b/libdtrace/dt_pt_regs.h
> index 72e065c1..0bbb1be2 100644
> --- a/libdtrace/dt_pt_regs.h
> +++ b/libdtrace/dt_pt_regs.h
> @@ -26,7 +26,10 @@ typedef struct pt_regs		dt_pt_regs;
>   # define PT_REGS_ARG3		offsetof(dt_pt_regs, rcx)
>   # define PT_REGS_ARG4		offsetof(dt_pt_regs, r8)
>   # define PT_REGS_ARG5		offsetof(dt_pt_regs, r9)
> +# define PT_REGS_ARGC		6
> +# define PT_REGS_ARGSTKBASE	1
>   # define PT_REGS_IP		offsetof(dt_pt_regs, rip)
> +# define PT_REGS_SP		offsetof(dt_pt_regs, rsp)
>   
>   # define PT_REGS_BPF_ARG0(r)	((r)->rdi)
>   # define PT_REGS_BPF_ARG1(r)	((r)->rsi)
> @@ -35,6 +38,7 @@ typedef struct pt_regs		dt_pt_regs;
>   # define PT_REGS_BPF_ARG4(r)	((r)->r8)
>   # define PT_REGS_BPF_ARG5(r)	((r)->r9)
>   # define PT_REGS_BPF_IP(r)	((r)->rip)
> +# define PT_REGS_BPF_SP(r)	((r)->rsp)
>   #elif defined(__aarch64__)
>   typedef struct user_pt_regs	dt_pt_regs;
>   # define PT_REGS_ARG0		offsetof(dt_pt_regs, regs[0])
> @@ -43,7 +47,12 @@ typedef struct user_pt_regs	dt_pt_regs;
>   # define PT_REGS_ARG3		offsetof(dt_pt_regs, regs[3])
>   # define PT_REGS_ARG4		offsetof(dt_pt_regs, regs[4])
>   # define PT_REGS_ARG5		offsetof(dt_pt_regs, regs[5])
> +# define PT_REGS_ARG6		offsetof(dt_pt_regs, regs[6])
> +# define PT_REGS_ARG7		offsetof(dt_pt_regs, regs[7])
> +# define PT_REGS_ARGC		8
> +# define PT_REGS_ARGSTKBASE	2
>   # define PT_REGS_IP		offsetof(dt_pt_regs, pc)
> +# define PT_REGS_SP		offsetof(dt_pt_regs, sp)
>   
>   # define PT_REGS_BPF_ARG0(r)	((r)->regs[0])
>   # define PT_REGS_BPF_ARG1(r)	((r)->regs[1])
> @@ -51,7 +60,10 @@ typedef struct user_pt_regs	dt_pt_regs;
>   # define PT_REGS_BPF_ARG3(r)	((r)->regs[3])
>   # define PT_REGS_BPF_ARG4(r)	((r)->regs[4])
>   # define PT_REGS_BPF_ARG5(r)	((r)->regs[5])
> +# define PT_REGS_BPF_ARG5(r)	((r)->regs[6])
> +# define PT_REGS_BPF_ARG5(r)	((r)->regs[7])
>   # define PT_REGS_BPF_IP(r)	((r)->pc)
> +# define PT_REGS_BPF_SP(r)	((r)->sp)
>   #else
>   # error ISA not supported
>   #endif

Does libdtrace/dt_pt_regs.h need a refreshed year on the Copyright notice?

I see:
      # define PT_REGS_BPF_ARG5(r)       ((r)->regs[5])
     +# define PT_REGS_BPF_ARG5(r)       ((r)->regs[6])
     +# define PT_REGS_BPF_ARG5(r)       ((r)->regs[7])
Should they not be ARG6 and ARG7

Actually, just remove all (that is, both new and pre-existing) 
PT_REGS_BPF_* definitions since they are unused.

How about getting rid of typedef dt_pt_regs:

   - It's the regs in the BPF context and does not specifically
     have to do with DTrace.  Hence, the dt_* prefix is misleading.

   - In dt_pt_regs.h, it's only used to define the PT_REGS_* macros.

   - Function dt_cg_tramp_copy_args_from_regs() need not refer to
     dt_pt_regs since it only uses the "DTrace-agnostic" PT_REGS_* macros.

   - The "#if 0" dead code in dt_prov_*.c that refers to dt_pt_regs
     should be removed.  Those files also still have
     "include dt_pt_regs.h", which can also be removed.  Oops, two flies
     in the ointment:

       - dt_prov_profile.c still uses PT_REGS_IP

       - dt_prov_syscall.c has to figure out SCD_ARG()

Remove "include dt_pt_regs.h" from dt_impl.h.

What's the future of "include dt_pt_regs.h" and "dt_pt_regs regs" in 
dt_dctx.h?  Can regs and DMST_REGS be removed?



More information about the DTrace-devel mailing list