[DTrace-devel] [PATCH] Fix stack offsets for clause functions
Eugene Loh
eugene.loh at oracle.com
Mon Mar 7 21:52:20 UTC 2022
Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
with comments...
On 3/4/22 4:00 PM, Kris Van Hees via DTrace-devel wrote:
> Clause functions were reserving space on the stack for the DTrace
> context even though they only a pointer to the DTrace context that is
Is a verb missing?
> stored on the stack of the trampoline program. This wasted 64 bytes
> (12.5% of the total available stack).
Better: "(out of 512 bytes total)". The 512 is more recognizable and
the large relative waste is just as apparent.
> This patch also provides some reworking of how reference is made to
> members of dctx on the stack. Both trampolines and functions now use
> [reg + offset].
>
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> libdtrace/dt_cg.c | 122 +++++++++++------------
> libdtrace/dt_dctx.h | 60 +++++++----
> libdtrace/dt_prov_dtrace.c | 16 +--
> test/unittest/codegen/tst.stack_layout.r | 22 ++--
> 4 files changed, 118 insertions(+), 102 deletions(-)
>
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 64089da5..4713f033 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -42,6 +42,7 @@ static void dt_cg_node(dt_node_t *, dt_irlist_t *, dt_regset_t *);
> *
> * %r7 contains a pointer to dctx->mst
> * %r8 contains a pointer to dctx->ctx
> + * %r9 contains a pointer to dctx
> */
The comment block says:
* The caller can depend on the following registers being set when this
* function returns:
* %r7 contains a pointer to dctx->mst
* %r8 contains a pointer to dctx->ctx
* %r9 contains a pointer to dctx
Which caller and function? How about specifying "the caller of
dt_cg_tramp_prologue_act()".
That makes it easier for the reader to rule out the interpretation that
we're talking about
when the clause function returns to the trampoline.
> void
> dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
> @@ -67,20 +68,25 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
> * uintptr_t rc;
> * char *buf, *mem;
> *
> - * // (%r8 = pointer to BPF context)
> + * // mov %r9, %fp
> + * // add %r9, -DCTX_SIZE
> + * // (%r9 = pointer to DTrace context)
> * // mov %r8, %r1
> - * dctx.ctx = ctx; // stdw [%fp + DCTX_FP(DCTX_CTX)], %r8
> + * // (%r8 = pointer to BPF context)
> + * dctx.ctx = ctx; // stdw [%r9 + DCTX_CTX], %r8
> */
> + emit(dlp, BPF_MOV_REG(BPF_REG_9, BPF_REG_FP));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_9, -DCTX_SIZE));
> emit(dlp, BPF_MOV_REG(BPF_REG_8, BPF_REG_1));
> - emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_CTX), BPF_REG_8));
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_9, DCTX_CTX, BPF_REG_8));
>
> /*
> - * key = DT_STATE_ACTIVITY;// stw [%fp + DCTX_FP(DCTX_ACT)],
> + * key = DT_STATE_ACTIVITY;// stw [%r9 + DCTX_ACT],
> * // DT_STATE_ACTIVITY
> * rc = bpf_map_lookup_elem(&state, &key);
> * // lddw %r1, &state
> - * // mov %r2, %fp
> - * // add %r2, DCTX_FP(DCTX_ACT)
> + * // mov %r2, %r9
> + * // add %r2, DCTX_ACT
> * // call bpf_map_lookup_elem
> * // (%r1 ... %r5 clobbered)
> * // (%r0 = map value)
> @@ -89,24 +95,24 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
> * if (*rc != act) // ldw %r1, [%r0 + 0]
> * goto exit; // jne %r1, act, lbl_exit
> *
> - * dctx.act = rc; // stdw [%fp + DCTX_FP(DCTX_ACT)], %r0
> + * dctx.act = rc; // stdw [%r9 + DCTX_ACT], %r0
> */
> - emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_ACT), DT_STATE_ACTIVITY));
> + emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, DCTX_ACT, DT_STATE_ACTIVITY));
> dt_cg_xsetx(dlp, state, DT_LBL_NONE, BPF_REG_1, state->di_id);
> - emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(DCTX_ACT)));
> + emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_ACT));
> emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
> emit(dlp, BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
> emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, act, lbl_exit));
> - emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_ACT), BPF_REG_0));
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_9, DCTX_ACT, BPF_REG_0));
>
> /*
> - * key = 0; // stw [%fp + DCTX_FP(DCTX_MST)], 0
> + * key = 0; // stw [%r9 + DCTX_MST], 0
> * rc = bpf_map_lookup_elem(&mem, &key);
> * // lddw %r1, &mem
> - * // mov %r2, %fp
> - * // add %r2, DCTX_FP(DCTX_MST)
> + * // mov %r2, %r9
> + * // add %r2, DCTX_MST
> * // call bpf_map_lookup_elem
> * // (%r1 ... %r5 clobbered)
> * // (%r0 = 'mem' BPF map value)
> @@ -115,19 +121,19 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
> * // (%r0 = map value)
> * // (%r7 = pointer to dt_mstate_t)
> * // mov %r7, %r0
> - * dctx.mst = rc; // stdw [%fp + DCTX_FP(DCTX_MST)], %r7
> + * dctx.mst = rc; // stdw [%r9 + DCTX_MST], %r7
> * dctx.mst->prid = PRID; // stw [%r7 + DMST_PRID], PRID
> * dctx.mst->syscall_errno = 0;
> * // stw [%r7 + DMST_ERRNO], 0
> */
> - emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_MST), 0));
> + emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, DCTX_MST, 0));
> dt_cg_xsetx(dlp, mem, DT_LBL_NONE, BPF_REG_1, mem->di_id);
> - emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(DCTX_MST)));
> + emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_MST));
> emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
> emit(dlp, BPF_MOV_REG(BPF_REG_7, BPF_REG_0));
> - emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_MST), BPF_REG_7));
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_9, DCTX_MST, BPF_REG_7));
> emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_PRID, -1), prid);
> emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_ERRNO, 0));
>
> @@ -139,20 +145,20 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
> * // stdw [%r0 + 0], 0
> * buf += 8; // add %r0, 8
> * // (%r0 = pointer to buffer space)
> - * dctx.buf = buf; // stdw [%fp + DCTX_FP(DCTX_BUF)], %r0
> + * dctx.buf = buf; // stdw [%r9 + DCTX_BUF], %r0
> */
> emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, roundup(sizeof(dt_mstate_t), 8)));
> emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, 0, 0));
> emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8));
> - emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_BUF), BPF_REG_0));
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_9, DCTX_BUF, BPF_REG_0));
>
> /*
> * mem = buf + roundup(dtp->dt_maxreclen, 8);
> * // add %r0, roundup(dtp->dt_maxreclen,
> - * dctx.mem = mem; // stdw [%fp + DCTX_FP(DCTX_BUF)], %r0
> + * dctx.mem = mem; // stdw [%r9 + DCTX_MEM], %r0
> */
> emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, roundup(dtp->dt_maxreclen, 8)));
> - emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_MEM), BPF_REG_0));
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_9, DCTX_MEM, BPF_REG_0));
>
> /*
> * Store -1 to the strtok internal-state offset to indicate
> @@ -164,34 +170,34 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
> * Store pointer to BPF map "name" in the DTrace context field "fld" at
> * "offset".
> *
> - * key = 0; // stw [%fp + DCTX_FP(offset)], 0
> + * key = 0; // stw [%r9 + offset], 0
> * rc = bpf_map_lookup_elem(&name, &key);
> * // lddw %r1, &name
> - * // mov %r2, %fp
> - * // add %r2, DCTX_FP(offset)
> + * // mov %r2, %r9
> + * // add %r2, offset
> * // call bpf_map_lookup_elem
> * // (%r1 ... %r5 clobbered)
> * // (%r0 = name BPF map value)
> * if (rc == 0) // jeq %r0, 0, lbl_exit
> * goto exit;
> * // (%r0 = pointer to map value)
> - * dctx.fld = rc; // stdw [%fp + DCTX_FP(offset)], %r0
> + * dctx.fld = rc; // stdw [%r9 + offset], %r0
> */
> #define DT_CG_STORE_MAP_PTR(name, offset) \
> do { \
> dt_ident_t *idp = dt_dlib_get_map(dtp, name); \
> \
> assert(idp != NULL); \
> - emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(offset), 0)); \
> + emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, (offset), 0)); \
> \
> dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id); \
> - emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP)); \
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(offset))); \
> + emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_9)); \
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, (offset))); \
> emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem)); \
> \
> emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit)); \
> \
> - emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(offset), BPF_REG_0)); \
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_9, (offset), BPF_REG_0)); \
> } while(0)
>
> DT_CG_STORE_MAP_PTR("strtab", DCTX_STRTAB);
> @@ -301,51 +307,46 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
It seems to me that the dt_cg_tramp_copy_args_from_regs() refactoring
should be its own patch preceding this one. There was never any need
for the function to be using the BPF stack in the first place.
> *
> * for (i = PT_REGS_ARGC;
> * i < ARRAY_SIZE(((dt_mstate_t *)0)->argv); i++) {
> - * uint64_t val, tmp;
> * int rc;
> * uint64_t *sp;
> *
> * sp = (uint64_t *)(((dt_pt_regs *)rp)->sp;
> - * rc = bpf_probe_read(&tmp, sizeof(tmp),
> + * rc = bpf_probe_read(dctx->mst->argv[i],
> + * sizeof(uint64_t),
> * &sp[i - PT_REGS_ARGC +
> * PT_REGS_ARGSTKBASE]);
> - * // mov %r1, %fp
> - * // add %r1, DT_STK_SPILL(0)
> + * // mov %r1, %r7
> + * // add %r1, DMST_ARG(i)
> * // mov %r2, sizeof(uint64_t)
> * // lddw %r3, [%rp + PT_REGS_SP]
> * // add %r3, (i - PT_REGS_ARGC +
> * PT_REGS_ARGSTKBASE) *
> * sizeof(uint64_t)
> * // call bpf_probe_read
> - * val = 0;
> - * // mov %r1, 0
> * if (rc != 0)
> - * goto store:
> - * // jne %r0, 0, lbl_store
> - * val = tmp;
> - * // lddw %r1, [%rp + DT_STK_SPILL(0)]
> + * goto ok:
s/ok/lbl_ok/
> + * // jeq %r0, 0, lbl_ok
> + * dctx->mst->argv[i] = 0;
> + * // stdw [%r7 + DMST_ARG(i)], 0
> *
> - * store:
> - * dctx->mst->argv[i] = val;
> - * // store:
> - * // stdw [%r7 + DMST_ARG(i)], %r1
> + * lbl_ok:
> + * // lbl_ok:
Put "lbl_ok:" and "// lbl_ok:" on the same line.
> * }
> *
> */
Actually, the whole comment block is more verbose yet less readable than
the actually assembly code. Just drop the comment block or pare it down
substantially.
> for (i = PT_REGS_ARGC; i < ARRAY_SIZE(((dt_mstate_t *)0)->argv); i++) {
> - uint_t lbl_store = dt_irlist_label(dlp);
> + uint_t lbl_ok = dt_irlist_label(dlp);
>
> - emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_STK_SPILL(0)));
> + emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_7));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DMST_ARG(i)));
> emit(dlp, BPF_MOV_IMM(BPF_REG_2, sizeof(uint64_t)));
> emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, rp, PT_REGS_SP));
> emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, (i - PT_REGS_ARGC + PT_REGS_ARGSTKBASE) * sizeof(uint64_t)));
> emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
> - emit(dlp, BPF_MOV_IMM(BPF_REG_1, 0));
> - emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, 0, lbl_store));
> - emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_SPILL(0)));
> - emitl(dlp, lbl_store,
> - BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(i), BPF_REG_1));
> + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_ok));
> + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(i), 0));
> + emitl(dlp, lbl_ok,
> + BPF_NOP());
> }
> }
>
> @@ -361,22 +362,19 @@ dt_cg_call_clause(dtrace_hdl_t *dtp, dt_ident_t *idp, dt_clause_arg_t *arg)
> dt_irlist_t *dlp = arg->dlp;
>
> /*
> - * if (*dctx.act != act) // ldw %r0, [%fp +
> - * // DCTX_FP(DCTX_ACT)]
> + * if (*dctx.act != act) // ldw %r0, [%r9 + DCTX_ACT]
> * goto exit; // ldw %r0, [%r0 + 0]
> * // jne %r0, act, lbl_exit
> */
> - emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DCTX_FP(DCTX_ACT)));
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DCTX_ACT));
> emit(dlp, BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_0, 0));
> emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, arg->act, arg->lbl_exit));
>
> /*
> - * dt_clause(dctx); // mov %r1, %fp
> - * // add %r1, DCTX_FP(0)
> + * dt_clause(dctx); // mov %r1, %r9
> * // call clause
> */
> - emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DCTX_FP(0)));
> + emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_9));
> emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
>
> return 0;
> @@ -424,11 +422,11 @@ dt_cg_tramp_epilogue_advance(dt_pcb_t *pcb, dt_activity_t act)
> dt_cg_tramp_call_clauses(pcb, pcb->pcb_probe, act);
>
> /*
> - * (*dctx.act)++; // lddw %r0, [%fp + DCTX_FP(DCTX_ACT)]
> + * (*dctx.act)++; // lddw %r0, [%r9 + DCTX_ACT)]
> * // mov %r1, 1
> * // xadd [%r0 + 0], %r1
> */
> - emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DCTX_FP(DCTX_ACT)));
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DCTX_ACT));
> emit(dlp, BPF_MOV_IMM(BPF_REG_1, 1));
> emit(dlp, BPF_XADD_REG(BPF_W, BPF_REG_0, 0, BPF_REG_1));
>
> diff --git a/libdtrace/dt_dctx.h b/libdtrace/dt_dctx.h
> index 76d48647..7d38dfd8 100644
> --- a/libdtrace/dt_dctx.h
> +++ b/libdtrace/dt_dctx.h
> @@ -29,6 +29,16 @@ typedef struct dt_mstate {
> uint64_t argv[10]; /* Probe arguments */
> } dt_mstate_t;
>
> +#define DMST_EPID offsetof(dt_mstate_t, epid)
> +#define DMST_PRID offsetof(dt_mstate_t, prid)
> +#define DMST_CLID offsetof(dt_mstate_t, clid)
> +#define DMST_TAG offsetof(dt_mstate_t, tag)
> +#define DMST_ERRNO offsetof(dt_mstate_t, syscall_errno)
> +#define DMST_FAULT offsetof(dt_mstate_t, fault)
> +#define DMST_TSTAMP offsetof(dt_mstate_t, tstamp)
> +#define DMST_REGS offsetof(dt_mstate_t, regs)
> +#define DMST_ARG(n) offsetof(dt_mstate_t, argv[n])
> +
> /*
> * The DTrace context.
> */
> @@ -53,6 +63,7 @@ typedef struct dt_dctx {
> #define DCTX_AGG offsetof(dt_dctx_t, agg)
> #define DCTX_GVARS offsetof(dt_dctx_t, gvars)
> #define DCTX_LVARS offsetof(dt_dctx_t, lvars)
> +
> #define DCTX_SIZE ((int16_t)sizeof(dt_dctx_t))
>
> /*
> @@ -89,22 +100,6 @@ typedef struct dt_dctx {
> */
> #define DMEM_SIZE(dtp) (DMEM_STRTOK(dtp) + DMEM_STRTOK_SZ(dtp))
>
> -/*
> - * Macro to determine the (negative) offset from the frame pointer (%fp) for
> - * the given offset in dt_dctx_t.
> - */
> -#define DCTX_FP(off) (DT_STK_DCTX + (int16_t)(off))
> -
> -#define DMST_EPID offsetof(dt_mstate_t, epid)
> -#define DMST_PRID offsetof(dt_mstate_t, prid)
> -#define DMST_CLID offsetof(dt_mstate_t, clid)
> -#define DMST_TAG offsetof(dt_mstate_t, tag)
> -#define DMST_ERRNO offsetof(dt_mstate_t, syscall_errno)
> -#define DMST_FAULT offsetof(dt_mstate_t, fault)
> -#define DMST_TSTAMP offsetof(dt_mstate_t, tstamp)
> -#define DMST_REGS offsetof(dt_mstate_t, regs)
> -#define DMST_ARG(n) offsetof(dt_mstate_t, argv[n])
> -
> /*
> * DTrace BPF programs can use all BPF registers except for the the %fp (frame
> * pointer) register and the highest numbered register (currently %r9) that is
> @@ -112,6 +107,32 @@ typedef struct dt_dctx {
> */
> #define DT_STK_NREGS (MAX_BPF_REG - 2)
>
> +/*
> + * The stack layout for BPF programs that are generated as trampolines for
> + * D clauses.
> + *
> + * Note: The BPF frame pointer points to the address of the first byte past the
> + * end of the stack. 8-byte values are properly aligned at offsets -8,
> + * -16, -24, -32, etc. -- that is, negative multiples of sizeof(uint64_t).
The Note confuses me; how about dropping it? The comment "first byte
past the end of the stack" sounds like the FP is pointing to some
address that is 7 mod 8 -- that is, some unaligned address. Anyhow, to
me the Note contributes only confusion.
> + *
> + * +----------------+
> + * SP(n) | |
> + * +----------------+
> + * | ... |
> + * +----------------+
> + * SP(1) | |
> + * +----------------+
> + * SP_BASE = SP(0) | |
> + * +----------------+
> + * DCTX | DTrace Context |
> + * +----------------+
> + */
The figure resembles that for clause functions. So one gets the
impression that there are a fixed number of SP(n) slots, even if we do
not know what "n" is. I assume it should really be open-ended, in which
case the figure might be clearer like this:
+ * +----------------+
+ * | ... |
+ * +----------------+
+ * SP(2) | |
+ * +----------------+
+ * SP(1) | |
+ * +----------------+
+ * SP(0) | |
+ * +----------------+
+ * DCTX | DTrace Context |
+ * +----------------+
> +#define DT_STK_BASE ((int16_t)0)
> +#define DT_STK_SLOT_SZ ((int16_t)sizeof(uint64_t))
> +
> +#define DT_TRAMP_SP_BASE (DT_STK_BASE - DCTX_SIZE - DT_STK_SLOT_SZ)
> +#define DT_TRAMP_SP(n) (DT_TRAMP_SP_BASE - (n) * DT_STK_SLOT_SZ)
> +
The DT_STK_BASE and DT_STK_SLOT_SZ definitions should appear before the
trampoline comment block since they do not relate specifically to
trampolines; the trampoline comment block should be adjacent to the
#define statements they describe.
I do not understand what DT_STK_BASE is other than 0. How about
eliminating DT_STK_BASE?
And how about eliminating DT_TRAMP_SP_BASE, which is used only in the
succeeding line, which would be clearer as:
#define DT_TRAMP_SP(n) (DT_STK_BASE - DCTX_SIZE - ((n)+1) * DT_STK_SLOT_SZ)
This would also mean eliminating "SP_BASE" from the figure. Similarly it
would make sense to eliminate DT_STK_SPILL_BASE.
> /*
> * The stack layout for functions that implement a D clause is encoded with the
> * following constants.
> @@ -131,13 +152,10 @@ typedef struct dt_dctx {
> * +----------------+
> * SPILL_BASE = SPILL(0) | %r0 |
> * +----------------+
> - * DCTX | DTrace Context |
> + * DCTX | Ptr to dctx |
> * +----------------+
> */
Higher up in the file, we have:
/*
* DTrace BPF programs can use all BPF registers except for the
the %fp (frame
* pointer) register and the highest numbered register
(currently %r9) that is
* used to store the base pointer for the trace output record.
*/
#define DT_STK_NREGS (MAX_BPF_REG - 2)
But that is not how trampolines and bpf/* functions use %r9. Also, the
highest register is %r10. So I suggest rewriting this as:
/*
* DTrace clause functions can spill any BPF registers except
for the the %fp
* (frame pointer) register and the next highest register (used
to store the
* base pointer for the trace output record).
*/
Also, this comment and definition should be moved closer to the section
that discusses stack layout for clause functions.
> -#define DT_STK_BASE ((int16_t)0)
> -#define DT_STK_SLOT_SZ ((int16_t)sizeof(uint64_t))
> -
> -#define DT_STK_DCTX (DT_STK_BASE - DCTX_SIZE)
> +#define DT_STK_DCTX (DT_STK_BASE - DT_STK_SLOT_SZ)
> #define DT_STK_SPILL_BASE (DT_STK_DCTX - DT_STK_SLOT_SZ)
> #define DT_STK_SPILL(n) (DT_STK_SPILL_BASE - (n) * DT_STK_SLOT_SZ)
>
> diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
> index c8a55475..d959a829 100644
> --- a/libdtrace/dt_prov_dtrace.c
> +++ b/libdtrace/dt_prov_dtrace.c
> @@ -128,30 +128,30 @@ static void trampoline(dt_pcb_t *pcb)
>
> /*
> * key = DT_STATE_(BEGANON|ENDEDON);
> - * // stw [%fp + DT_STK_SPILL(0)],
> + * // stw [%fp + DT_TRAMP_SP(0)],
> * // DT_STATE_(BEGANON|ENDEDON)
> * val = bpf_get_smp_processor_id();
> * // call bpf_get_smp_processor_id
> - * // stw [%fp + DT_STK_SPILL(1)], %r0
> + * // stw [%fp + DT_TRAMP_SP(1)], %r0
> * bpf_map_update_elem(state, &key, &val, BPF_ANY);
> * // lddw %r1, &state
> * // mov %r2, %fp
> - * // add %r2, DT_STK_SPILL(0)
> + * // add %r2, DT_TRAMP_SP(0)
> * // mov %r3, %fp
> - * // add %r3, DT_STK_SPILL(1)
> + * // add %r3, DT_TRAMP_SP(1)
> * // mov %r4, BPF_ANY
> * // call bpf_map_update_elem
> */
> dt_ident_t *state = dt_dlib_get_map(pcb->pcb_hdl, "state");
>
> - emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DT_STK_SPILL(0), key));
> + emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DT_TRAMP_SP(0), key));
> emit(dlp, BPF_CALL_HELPER(BPF_FUNC_get_smp_processor_id));
> - emit(dlp, BPF_STORE(BPF_W, BPF_REG_FP, DT_STK_SPILL(1), BPF_REG_0));
> + emit(dlp, BPF_STORE(BPF_W, BPF_REG_FP, DT_TRAMP_SP(1), BPF_REG_0));
> dt_cg_xsetx(dlp, state, DT_LBL_NONE, BPF_REG_1, state->di_id);
> emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_STK_SPILL(0)));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP(0)));
> emit(dlp, BPF_MOV_REG(BPF_REG_3, BPF_REG_FP));
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, DT_STK_SPILL(1)));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, DT_TRAMP_SP(1)));
> emit(dlp, BPF_MOV_IMM(BPF_REG_4, BPF_ANY));
> emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_update_elem));
>
> diff --git a/test/unittest/codegen/tst.stack_layout.r b/test/unittest/codegen/tst.stack_layout.r
> index 7c02de0d..af2a4aad 100644
> --- a/test/unittest/codegen/tst.stack_layout.r
> +++ b/test/unittest/codegen/tst.stack_layout.r
> @@ -1,12 +1,12 @@
> Base: 0
> -dctx: -72
> -%r0: -80
> -%r1: -88
> -%r2: -96
> -%r3: -104
> -%r4: -112
> -%r5: -120
> -%r6: -128
> -%r7: -136
> -%r8: -144
> -scratch: -145 .. -512
> +dctx: -8
> +%r0: -16
> +%r1: -24
> +%r2: -32
> +%r3: -40
> +%r4: -48
> +%r5: -56
> +%r6: -64
> +%r7: -72
> +%r8: -80
> +scratch: -81 .. -512
More information about the DTrace-devel
mailing list