[DTrace-devel] [PATCH] Add disassembler annotations for string constants
Eugene Loh
eugene.loh at oracle.com
Fri Feb 25 18:47:28 UTC 2022
Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
with comments below:
On 2/24/22 11:49 PM, Kris Van Hees via DTrace-devel wrote:
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> libdtrace/dt_dis.c | 51 ++++++++++++++++--------
> test/unittest/disasm/tst.ann-strconst.r | 1 +
> test/unittest/disasm/tst.ann-strconst.sh | 26 ++++++++++++
> 3 files changed, 61 insertions(+), 17 deletions(-)
> create mode 100644 test/unittest/disasm/tst.ann-strconst.r
> create mode 100755 test/unittest/disasm/tst.ann-strconst.sh
If a new, nearly empty test directory is being created, then it would be
appropriate to bring closely related tests into the same new directory.
E.g.,
variables/tvar/tst.dis-ann.sh
dtrace-util/tst.DisOption.sh
dif/tst.DisSubrNames.sh
dif/tst.DisVarNames.sh
>
> diff --git a/libdtrace/dt_dis.c b/libdtrace/dt_dis.c
> index efdaf012..f92cfdcf 100644
> --- a/libdtrace/dt_dis.c
> +++ b/libdtrace/dt_dis.c
> @@ -82,9 +82,12 @@ dt_dis_varname_off(const dtrace_difo_t *dp, uint_t off, uint_t scope, uint_t add
> }
>
> /*
> - * Check if we are loading either the gvar or lvar BPF map. If so,
> - * we want to report the name of the variable it is looking up.
> - * The sequence of instructions we are looking for is:
> + * Check if we are loading from the gvar, lvar, or strtab BPF map.
> + *
> + * If we are loading a gvar or lvar, we want to report the variable name.
> + * If we are loading a string constant, we want to report its value.
> + *
> + * For variables, the sequence of instructions we are looking for is:
> * insn code dst src offset imm
> * -2: ld dst %fp DT_STK_DCTX 00000000
> * -1: ld dst dst DCTX_*VARS 00000000
> @@ -96,15 +99,22 @@ dt_dis_varname_off(const dtrace_difo_t *dp, uint_t off, uint_t scope, uint_t add
> * - load by value
> * - store by value
> * - access by reference
> + *
> + * For string constants, the sequence of instructions we are looking for is:
> + * insn code dst src offset imm
> + * -2: ld dst %fp DT_STK_DCTX 00000000
> + * -1: ld dst dst DCTX_STRTAB 00000000
> + * 0: add dst 0 0 var_offset
> + * where instruction 0 is the current instruction.
> */
> static void
> -dt_dis_varname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
> +dt_dis_refname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
> int n, FILE *fp)
> {
> __u8 ldcode = BPF_LDX | BPF_MEM | BPF_DW;
> __u8 addcode = BPF_ALU64 | BPF_ADD | BPF_K;
> - int dst, scope, var_offset = -1;
> - const char *vname;
> + int dst, scope = -1, var_offset = -1;
> + const char *name;
I get that vname is now an outdated name, but "name" doesn't seem right
either. You just said that in the strtab case you're going to print a
value. So maybe s/vname/label/ or s/vname/string/?
>
> /* make sure in[-2] and in[-1] exist */
> if (addr < 2)
> @@ -117,7 +127,7 @@ dt_dis_varname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
> scope = DIFV_SCOPE_GLOBAL;
> else if (in[-1].off == DCTX_LVARS)
> scope = DIFV_SCOPE_LOCAL;
> - else
> + else if (in[-1].off != DCTX_STRTAB)
> goto out;
>
> /* check preceding instructions */
> @@ -134,8 +144,16 @@ dt_dis_varname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
> /* check the current instruction and read var_offset */
> if (in->dst_reg != dst)
> goto out;
> - if (BPF_CLASS(in->code) == BPF_LDX && BPF_MODE(in->code) == BPF_MEM &&
> - in->src_reg == dst && in->imm == 0)
> + if (in[-1].off == DCTX_STRTAB &&
> + in->code == addcode && in->src_reg == 0 && in->off == 0) {
> + name = dt_difo_getstr(dp, in->imm);
> + if (name != NULL)
> + fprintf(fp, "%*s! \"%s\"", DT_DIS_PAD(n), "", name);
> +
> + goto out;
> + } else if (BPF_CLASS(in->code) == BPF_LDX &&
> + BPF_MODE(in->code) == BPF_MEM &&
> + in->src_reg == dst && in->imm == 0)
> var_offset = in->off;
> else if (BPF_CLASS(in->code) == BPF_STX &&
> BPF_MODE(in->code) == BPF_MEM &&
This is probably okay, but the logic would be tighter like this:
if (in[-1].off == DCTX_STRTAB) {
if (in->code == addcode && in->src_reg == 0 && in->off == 0) {
name = dt_difo_getstr(dp, in->imm);
if (name != NULL)
fprintf(fp, "%*s! \"%s\"", DT_DIS_PAD(n), "", name);
}
goto out;
} else ... // var cases
> @@ -147,11 +165,10 @@ dt_dis_varname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
> goto out;
>
> /* print name */
> - vname = dt_dis_varname_off(dp, var_offset, scope, addr);
> - if (vname == NULL)
> - goto out;
> - fprintf(fp, "%*s! %s%s", DT_DIS_PAD(n), "",
> - scope == DIFV_SCOPE_LOCAL ? "this->" : "", vname);
> + name = dt_dis_varname_off(dp, var_offset, scope, addr);
> + if (name != NULL)
> + fprintf(fp, "%*s! %s%s", DT_DIS_PAD(n), "",
> + scope == DIFV_SCOPE_LOCAL ? "this->" : "", name);
>
> out:
> fprintf(fp, "\n");
> @@ -192,7 +209,7 @@ dt_dis_op2imm(const dtrace_difo_t *dp, const char *name, uint_t addr,
> int n;
>
> n = fprintf(fp, "%-4s %s, %d", name, reg(in->dst_reg), in->imm);
> - dt_dis_varname(dp, in, addr, n, fp);
> + dt_dis_refname(dp, in, addr, n, fp);
>
> return 0;
> }
> @@ -233,7 +250,7 @@ dt_dis_load(const dtrace_difo_t *dp, const char *name, uint_t addr,
> n = fprintf(fp, "%-4s %s, [%s%+d]", name, reg(in->dst_reg),
> reg(in->src_reg), in->off);
>
> - dt_dis_varname(dp, in, addr, n, fp);
> + dt_dis_refname(dp, in, addr, n, fp);
>
> return 0;
> }
> @@ -269,7 +286,7 @@ dt_dis_store(const dtrace_difo_t *dp, const char *name, uint_t addr,
> n = fprintf(fp, "%-4s [%s%+d], %s", name, reg(in->dst_reg), in->off,
> reg(in->src_reg));
>
> - dt_dis_varname(dp, in, addr, n, fp);
> + dt_dis_refname(dp, in, addr, n, fp);
>
> return 0;
> }
> diff --git a/test/unittest/disasm/tst.ann-strconst.r b/test/unittest/disasm/tst.ann-strconst.r
> new file mode 100644
> index 00000000..49273e33
> --- /dev/null
> +++ b/test/unittest/disasm/tst.ann-strconst.r
> @@ -0,0 +1 @@
> +07 X 0 0000 XXXXXXXX add %rX, D ! "strconst"
> diff --git a/test/unittest/disasm/tst.ann-strconst.sh b/test/unittest/disasm/tst.ann-strconst.sh
> new file mode 100755
> index 00000000..28b54948
> --- /dev/null
> +++ b/test/unittest/disasm/tst.ann-strconst.sh
> @@ -0,0 +1,26 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +
> +dtrace=$1
> +
> +$dtrace $dt_flags -Sen '
> +BEGIN
> +{
> + var = "strconst";
> + exit(0);
> +}
> +' 2>&1 | \
> + awk '/ ! "strconst"/ {
> + sub(/^[^:]+: /, "");
> + sub(/^07 [0-9a-f] /, "07 X ");
[0-9a-f]? There are not that many BPF registers. And r9 and r10 are
off limits. So how about [0-8]?
> + sub(/[0-9a-f]{8} add/, "XXXXXXXX add");
> + sub(/%r[0-9], [0-9]+ +/, "%rX, D ");
> + print;
> + }'
> +
> +exit $?
Actually, I think this check can be more succinct, more readable, and
more robust by using a single regex. The sequence of substitutions is
changing parts of the line to a "standard" form, but those substitutions
are unnecessary. All we want to know in the end is if the sought line
is found or not. E.g.:
awk '/^[^:]+: 07 [0-8] 0 0000 [0-9a-f]{8} add +%r[0-9],
[0-9]+ + ! "strconst"/ {print "SUCCESS"}'
Or /^[0-9]+ [0-9]+: 07 etc./. Or use awk exit codes and forgo the .r file.
More information about the DTrace-devel
mailing list