[DTrace-devel] [PATCH 5/6] Fix disassembly for global and local variables
Kris Van Hees
kris.van.hees at oracle.com
Wed Mar 31 22:44:09 PDT 2021
Comments below...
On Fri, Mar 19, 2021 at 01:45:33PM -0400, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
>
> Recent patches have changed how global and local variables are
> accessed. This means that the disassembler, which relies on
> recognizing BPF instructions to add annotations, must be updated.
>
> Add a "struct dtrace_difv" field dtdv_offset so that variables
> can be looked up by offset rather than ID.
>
> Rename dt_dis_varname() to dt_dis_varname_id(). Add a by-offset
> lookup function dt_dis_varname_off(). Eliminate dt_dis_lvarname().
>
> Add code to recognize access of global and local variables from
> the BPF instruction sequence:
> insn code dst src offset imm
> ---- ---- --- --- ----------- --------
> -1: ld dst %fp DT_STK_DCTX 00000000
> 0: ld dst dst DCTX_*VARS 00000000
> followed by one of
> +1: ld dst dst var_offset 00000000
> +1: st dst src var_offset 00000000
> +1: add dst 0 0 var_offset
> the last case being for access by-reference.
>
> That is, if we have a load instruction, we see if it might be
> "instruction 0". If so, we check the preceding "instruction -1".
> If we still match the pattern, we examine the following "instruction +1"
> and extract the var_offset to look up the variable name. Since the
> instruction we are annotating is always a load, we can consolidate
> all annotation of global and local arrays in the disassembler to
> dt_dis_load().
Why are you annotating the instruction that loads the BPF map value pointer
from the dctx? That is always going to be the same for every variable of a
particular kind (global or local, and in the future also TLS). The one that
we really want to annotate is the instruction that loads from or stores to a
particular variable.
> Add a test, checking global and local scalars and structs.
>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
> include/dtrace/dif.h | 1 +
> libdtrace/dt_as.c | 1 +
> libdtrace/dt_dis.c | 130 ++++++++++++++++-----------
> test/unittest/dif/tst.DisVarNames.r | 23 +++++
> test/unittest/dif/tst.DisVarNames.sh | 42 +++++++++
> 5 files changed, 145 insertions(+), 52 deletions(-)
> create mode 100644 test/unittest/dif/tst.DisVarNames.r
> create mode 100755 test/unittest/dif/tst.DisVarNames.sh
>
> diff --git a/include/dtrace/dif.h b/include/dtrace/dif.h
> index ad4d2f3a..191aa73c 100644
> --- a/include/dtrace/dif.h
> +++ b/include/dtrace/dif.h
> @@ -50,6 +50,7 @@ typedef struct dtrace_diftype {
> typedef struct dtrace_difv {
> uint32_t dtdv_name;
> uint32_t dtdv_id;
> + uint32_t dtdv_offset;
> uint8_t dtdv_kind;
> uint8_t dtdv_scope;
> uint32_t dtdv_insn_from;
> diff --git a/libdtrace/dt_as.c b/libdtrace/dt_as.c
> index 786fc22c..f648f21a 100644
> --- a/libdtrace/dt_as.c
> +++ b/libdtrace/dt_as.c
> @@ -92,6 +92,7 @@ dt_copyvar(dt_idhash_t *dhp, dt_ident_t *idp, dtrace_hdl_t *dtp)
>
> dvp->dtdv_name = (uint_t)stroff;
> dvp->dtdv_id = idp->di_id;
> + dvp->dtdv_offset = idp->di_offset;
> dvp->dtdv_flags = 0;
>
> switch (idp->di_kind) {
> diff --git a/libdtrace/dt_dis.c b/libdtrace/dt_dis.c
> index d4f4ad67..1a589bca 100644
> --- a/libdtrace/dt_dis.c
> +++ b/libdtrace/dt_dis.c
> @@ -45,7 +45,7 @@ reg(int r)
> }
>
> static const char *
> -dt_dis_varname(const dtrace_difo_t *dp, uint_t id, uint_t scope, uint_t addr)
> +dt_dis_varname_id(const dtrace_difo_t *dp, uint_t id, uint_t scope, uint_t addr)
> {
> const dtrace_difv_t *dvp = dp->dtdo_vartab;
> uint_t i;
> @@ -63,20 +63,18 @@ dt_dis_varname(const dtrace_difo_t *dp, uint_t id, uint_t scope, uint_t addr)
> return NULL;
> }
>
> -static char *
> -dt_dis_lvarname(const dtrace_difo_t *dp, int reg, int var, uint_t addr,
> - char *buf, int len)
> +static const char *
> +dt_dis_varname_off(const dtrace_difo_t *dp, uint_t off, uint_t scope, uint_t addr)
> {
> - if (reg == BPF_REG_FP) {
> - var = -1; /* FIXME: need to find variable offset */
> - if (var != -1) {
> - const char *vname;
> -
> - vname = dt_dis_varname(dp, var, DIFV_SCOPE_LOCAL, addr);
> - if (vname) {
> - snprintf(buf, len, "this->%s", vname);
> - return buf;
> - }
> + uint_t i;
> +
> + for (i = 0; i < dp->dtdo_varlen; i++) {
> + const dtrace_difv_t *dvp = &dp->dtdo_vartab[i];
> + if (dvp->dtdv_offset == off && dvp->dtdv_scope == scope &&
> + dvp->dtdv_insn_from <= addr && addr <= dvp->dtdv_insn_to) {
> + if (dvp->dtdv_name < dp->dtdo_strlen)
> + return dp->dtdo_strtab + dvp->dtdv_name;
> + break;
> }
> }
>
> @@ -150,19 +148,68 @@ static uint_t
> dt_dis_load(const dtrace_difo_t *dp, const char *name, uint_t addr,
> const struct bpf_insn *in, const char *rname, FILE *fp)
> {
> - char buf[256];
> - const char *vname;
> int n;
> + __u8 ldcode = BPF_LDX | BPF_MEM | BPF_DW;
> + __u8 stcode = BPF_STX | BPF_MEM | BPF_DW;
> + __u8 addcode = BPF_ALU64 | BPF_ADD | BPF_K;
> + int dst = in->dst_reg;
> + int scope;
>
> n = fprintf(fp, "%-4s %s, [%s%+d]", name, reg(in->dst_reg),
> reg(in->src_reg), in->off);
>
> - vname = dt_dis_lvarname(dp, in->src_reg, in->off, addr, buf,
> - sizeof(buf));
> - if (vname)
> - fprintf(fp, "%*s! %s\n", DT_DIS_PAD(n), "", vname);
> + /*
> + * Check if we are loading either the gvar or lvar BPF map. If so,
> + * we want to report the name of the variable it is looking up.
> + * The sequence of instructions we are looking for is:
> + * insn code dst src offset imm
> + * -1: ld dst %fp DT_STK_DCTX 00000000
> + * 0: ld dst dst DCTX_*VARS 00000000
> + * followed by one of
> + * +1: ld dst dst var_offset 00000000
> + * +1: st dst src var_offset 00000000
> + * +1: add dst 0 0 var_offset
> + * the last case being for access by-reference.
> + */
> + if (in->off == DCTX_GVARS)
> + scope = DIFV_SCOPE_GLOBAL;
> + else if (in->off == DCTX_LVARS)
> + scope = DIFV_SCOPE_LOCAL;
> else
> - fprintf(fp, "\n");
> + scope = -1;
> + if (in[ 0].code == ldcode &&
> + in[ 0].src_reg == dst &&
> + scope >= 0 &&
> + in[ 0].imm == 0 &&
> + in[-1].code == ldcode &&
As far as I can see you never validate that addr > 0 and therefore you do not
guaramtee that in[-1] is actually safe. You need to ensure that addr > 0.
But as mentioned above, you are annotating the wrong instruction.
> + in[-1].dst_reg == dst &&
> + in[-1].src_reg == BPF_REG_FP &&
> + in[-1].off == DT_STK_DCTX &&
> + in[-1].imm == 0 &&
> + in[+1].dst_reg == dst) {
> + int var_offset = -1;
> + const char *vname = NULL;
> +
> + if (in[+1].code == ldcode &&
> + in[+1].src_reg == dst &&
> + in[+1].imm == 0)
> + var_offset = in[+1].off;
> + else if (in[+1].code == stcode &&
> + in[+1].src_reg != dst &&
> + in[+1].imm == 0)
> + var_offset = in[+1].off;
> + else if (in[+1].code == addcode &&
> + in[+1].src_reg == 0 &&
> + in[+1].off == 0)
> + var_offset = in[+1].imm;
> +
> + if (var_offset >= 0)
> + vname = dt_dis_varname_off(dp, var_offset, scope, addr);
> + if (vname)
> + fprintf(fp, "%*s! %s%s", DT_DIS_PAD(n), "",
> + scope == DIFV_SCOPE_LOCAL ? "this->" : "", vname);
> + }
> + fprintf(fp, "\n");
>
> return 0;
> }
> @@ -193,20 +240,9 @@ static uint_t
> dt_dis_store(const dtrace_difo_t *dp, const char *name, uint_t addr,
> const struct bpf_insn *in, const char *rname, FILE *fp)
> {
> - char buf[256];
> - const char *vname;
> - int n;
> -
> - n = fprintf(fp, "%-4s [%s%+d], %s", name, reg(in->dst_reg), in->off,
> + fprintf(fp, "%-4s [%s%+d], %s\n", name, reg(in->dst_reg), in->off,
> reg(in->src_reg));
>
> - vname = dt_dis_lvarname(dp, in->dst_reg, in->off, addr, buf,
> - sizeof(buf));
> - if (vname)
> - fprintf(fp, "%*s! %s\n", DT_DIS_PAD(n), "", vname);
> - else
> - fprintf(fp, "\n");
> -
> return 0;
> }
>
> @@ -215,24 +251,14 @@ static uint_t
> dt_dis_store_imm(const dtrace_difo_t *dp, const char *name, uint_t addr,
> const struct bpf_insn *in, const char *rname, FILE *fp)
> {
> - char buf[256];
> - const char *vname;
> int n;
>
> n = fprintf(fp, "%-4s [%s%+d], %d", name, reg(in->dst_reg), in->off,
> in->imm);
>
> - vname = dt_dis_lvarname(dp, in->dst_reg, in->off, addr, buf,
> - sizeof(buf));
> - if (vname) {
> - if (rname)
> - fprintf(fp, "%*s! %s = %s\n", DT_DIS_PAD(n), "",
> - vname, rname);
> - else
> - fprintf(fp, "%*s! %s\n", DT_DIS_PAD(n), "", vname);
> - } else if (rname) {
> + if (rname)
> fprintf(fp, "%*s! = %s\n", DT_DIS_PAD(n), "", rname);
> - } else
> + else
> fprintf(fp, "\n");
>
> return 0;
> @@ -252,7 +278,7 @@ dt_dis_bpf_args(const dtrace_difo_t *dp, const char *fn,
> */
> in--;
> snprintf(buf, len, "%s",
> - dt_dis_varname(dp, in->imm, DIFV_SCOPE_GLOBAL, addr));
> + dt_dis_varname_id(dp, in->imm, DIFV_SCOPE_GLOBAL, addr));
> return buf;
> } else if (strcmp(fn, "dt_get_tvar") == 0 ||
> strcmp(fn, "dt_set_tvar") == 0) {
> @@ -263,7 +289,7 @@ dt_dis_bpf_args(const dtrace_difo_t *dp, const char *fn,
> */
> in--;
> snprintf(buf, len, "self->%s",
> - dt_dis_varname(dp, in->imm + DIF_VAR_OTHER_UBASE,
> + dt_dis_varname_id(dp, in->imm + DIF_VAR_OTHER_UBASE,
> DIFV_SCOPE_THREAD, addr));
> return buf;
> } else if (strcmp(fn, "dt_get_string") == 0) {
> @@ -656,8 +682,8 @@ dt_dis_difo(const dtrace_difo_t *dp, FILE *fp, const dt_ident_t *idp,
> }
>
> if (dp->dtdo_varlen != 0) {
> - fprintf(fp, "\n%-16s %-4s %-3s %-3s %-11s %-4s %s\n",
> - "NAME", "ID", "KND", "SCP", "RANGE", "FLAG", "TYPE");
> + fprintf(fp, "\n%-16s %-4s %-6s %-3s %-3s %-11s %-4s %s\n",
> + "NAME", "ID", "OFFSET", "KND", "SCP", "RANGE", "FLAG", "TYPE");
> }
>
> for (i = 0; i < dp->dtdo_varlen; i++) {
> @@ -705,9 +731,9 @@ dt_dis_difo(const dtrace_difo_t *dp, FILE *fp, const dt_ident_t *idp,
> if (v->dtdv_flags & DIFV_F_MOD)
> strcat(flags, "/w");
>
> - fprintf(fp, "%-16s %-4x %-3s %-3s %-11s %-4s %s\n",
> - &dp->dtdo_strtab[v->dtdv_name], v->dtdv_id, kind,
> - scope, range, flags + 1,
> + fprintf(fp, "%-16s %-4x %-6x %-3s %-3s %-11s %-4s %s\n",
> + &dp->dtdo_strtab[v->dtdv_name], v->dtdv_id,
> + v->dtdv_offset, kind, scope, range, flags + 1,
> dt_dis_typestr(&v->dtdv_type, type, sizeof(type)));
> }
>
> diff --git a/test/unittest/dif/tst.DisVarNames.r b/test/unittest/dif/tst.DisVarNames.r
> new file mode 100644
> index 00000000..f2766716
> --- /dev/null
> +++ b/test/unittest/dif/tst.DisVarNames.r
> @@ -0,0 +1,23 @@
> +Z
> +Y
> +X
> +A
> +A
> +A
> +A
> +B
> +A
> +C
> +Z
> +this->z
> +this->y
> +this->x
> +C
> +this->c
> +this->b
> +this->a
> +this->b
> +C
> +this->x
> +Y
> +this->z
> diff --git a/test/unittest/dif/tst.DisVarNames.sh b/test/unittest/dif/tst.DisVarNames.sh
> new file mode 100755
> index 00000000..ae13823e
> --- /dev/null
> +++ b/test/unittest/dif/tst.DisVarNames.sh
> @@ -0,0 +1,42 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +# ASSERTION: That dtrace -S recognizes global and local variable names.
> +
> +dtrace=$1
> +tmpout=$tmpdir/dis-var-names.out
> +
> +$dtrace $dt_flags -Sen '
> +struct foo {
> + int foo_i, foo_j, foo_k;
> +} A, B, C;
> +this struct foo a, b, c;
> +BEGIN {
> + X = Y = Z = 1;
> + A.foo_i = A.foo_j = A.foo_k = 1;
> + B = A;
> + C = A;
> + this->x = this->y = this->z = Z;
> + this->a = this->b = this->c = C;
> + trace(this->b.foo_j);
> + trace(C.foo_k);
> + trace(this->x);
> + trace(Y);
> + trace(this->z);
> +}' |& awk '
> +/ 00000000 lddw %r.*A/ { print "A" }
> +/ 00000000 lddw %r.*B/ { print "B" }
> +/ 00000000 lddw %r.*C/ { print "C" }
> +/ 00000000 lddw %r.*X/ { print "X" }
> +/ 00000000 lddw %r.*Y/ { print "Y" }
> +/ 00000000 lddw %r.*Z/ { print "Z" }
> +/ 00000000 lddw %r.*this->a/ { print "this->a" }
> +/ 00000000 lddw %r.*this->b/ { print "this->b" }
> +/ 00000000 lddw %r.*this->c/ { print "this->c" }
> +/ 00000000 lddw %r.*this->x/ { print "this->x" }
> +/ 00000000 lddw %r.*this->y/ { print "this->y" }
> +/ 00000000 lddw %r.*this->z/ { print "this->z" }'
> --
> 2.18.4
>
>
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list