[DTrace-devel] [PATCH] Add disassembler annotations for string constants

Eugene Loh eugene.loh at oracle.com
Fri Feb 25 18:47:28 UTC 2022


Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
with comments below:

On 2/24/22 11:49 PM, Kris Van Hees via DTrace-devel wrote:
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
>   libdtrace/dt_dis.c                       | 51 ++++++++++++++++--------
>   test/unittest/disasm/tst.ann-strconst.r  |  1 +
>   test/unittest/disasm/tst.ann-strconst.sh | 26 ++++++++++++
>   3 files changed, 61 insertions(+), 17 deletions(-)
>   create mode 100644 test/unittest/disasm/tst.ann-strconst.r
>   create mode 100755 test/unittest/disasm/tst.ann-strconst.sh
If a new, nearly empty test directory is being created, then it would be 
appropriate to bring closely related tests into the same new directory.  
E.g.,
variables/tvar/tst.dis-ann.sh
dtrace-util/tst.DisOption.sh
dif/tst.DisSubrNames.sh
dif/tst.DisVarNames.sh

>
> diff --git a/libdtrace/dt_dis.c b/libdtrace/dt_dis.c
> index efdaf012..f92cfdcf 100644
> --- a/libdtrace/dt_dis.c
> +++ b/libdtrace/dt_dis.c
> @@ -82,9 +82,12 @@ dt_dis_varname_off(const dtrace_difo_t *dp, uint_t off, uint_t scope, uint_t add
>   }
>   
>   /*
> - * Check if we are loading either the gvar or lvar BPF map.  If so,
> - * we want to report the name of the variable it is looking up.
> - * The sequence of instructions we are looking for is:
> + * Check if we are loading from the gvar, lvar, or strtab BPF map.
> + *
> + * If we are loading a gvar or lvar, we want to report the variable name.
> + * If we are loading a string constant, we want to report its value.
> + *
> + * For variables, the sequence of instructions we are looking for is:
>    *         insn   code  dst  src    offset        imm
>    *          -2:    ld   dst  %fp  DT_STK_DCTX  00000000
>    *          -1:    ld   dst  dst  DCTX_*VARS   00000000
> @@ -96,15 +99,22 @@ dt_dis_varname_off(const dtrace_difo_t *dp, uint_t off, uint_t scope, uint_t add
>    *   - load by value
>    *   - store by value
>    *   - access by reference
> + *
> + * For string constants, the sequence of instructions we are looking for is:
> + *         insn   code  dst  src    offset        imm
> + *          -2:    ld   dst  %fp  DT_STK_DCTX  00000000
> + *          -1:    ld   dst  dst  DCTX_STRTAB  00000000
> + *           0:    add  dst    0     0         var_offset
> + * where instruction 0 is the current instruction.
>    */
>   static void
> -dt_dis_varname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
> +dt_dis_refname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
>   	       int n, FILE *fp)
>   {
>   	__u8		ldcode = BPF_LDX | BPF_MEM | BPF_DW;
>   	__u8		addcode = BPF_ALU64 | BPF_ADD | BPF_K;
> -	int		dst, scope, var_offset = -1;
> -	const char	*vname;
> +	int		dst, scope = -1, var_offset = -1;
> +	const char	*name;
I get that vname is now an outdated name, but "name" doesn't seem right 
either.  You just said that in the strtab case you're going to print a 
value.  So maybe s/vname/label/ or s/vname/string/?
>   
>   	/* make sure in[-2] and in[-1] exist */
>   	if (addr < 2)
> @@ -117,7 +127,7 @@ dt_dis_varname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
>   		scope = DIFV_SCOPE_GLOBAL;
>   	else if (in[-1].off == DCTX_LVARS)
>   		scope = DIFV_SCOPE_LOCAL;
> -	else
> +	else if (in[-1].off != DCTX_STRTAB)
>   		goto out;
>   
>   	/* check preceding instructions */
> @@ -134,8 +144,16 @@ dt_dis_varname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
>   	/* check the current instruction and read var_offset */
>   	if (in->dst_reg != dst)
>   		goto out;
> -	if (BPF_CLASS(in->code) == BPF_LDX && BPF_MODE(in->code) == BPF_MEM &&
> -	    in->src_reg == dst && in->imm == 0)
> +	if (in[-1].off == DCTX_STRTAB &&
> +	    in->code == addcode && in->src_reg == 0 && in->off == 0) {
> +		name = dt_difo_getstr(dp, in->imm);
> +		if (name != NULL)
> +			fprintf(fp, "%*s! \"%s\"", DT_DIS_PAD(n), "", name);
> +
> +		goto out;
> +	} else if (BPF_CLASS(in->code) == BPF_LDX &&
> +		   BPF_MODE(in->code) == BPF_MEM &&
> +		   in->src_reg == dst && in->imm == 0)
>   		var_offset = in->off;
>   	else if (BPF_CLASS(in->code) == BPF_STX &&
>   		 BPF_MODE(in->code) == BPF_MEM &&
This is probably okay, but the logic would be tighter like this:
         if (in[-1].off == DCTX_STRTAB) {
             if (in->code == addcode && in->src_reg == 0 && in->off == 0) {
                 name = dt_difo_getstr(dp, in->imm);
                 if (name != NULL)
                     fprintf(fp, "%*s! \"%s\"", DT_DIS_PAD(n), "", name);
             }
             goto out;
         } else ... // var cases
> @@ -147,11 +165,10 @@ dt_dis_varname(const dtrace_difo_t *dp, const struct bpf_insn *in, uint_t addr,
>   		goto out;
>   
>   	/* print name */
> -	vname = dt_dis_varname_off(dp, var_offset, scope, addr);
> -	if (vname == NULL)
> -		goto out;
> -	fprintf(fp, "%*s! %s%s", DT_DIS_PAD(n), "",
> -		scope == DIFV_SCOPE_LOCAL ? "this->" : "", vname);
> +	name = dt_dis_varname_off(dp, var_offset, scope, addr);
> +	if (name != NULL)
> +		fprintf(fp, "%*s! %s%s", DT_DIS_PAD(n), "",
> +			scope == DIFV_SCOPE_LOCAL ? "this->" : "", name);
>   
>   out:
>   	fprintf(fp, "\n");
> @@ -192,7 +209,7 @@ dt_dis_op2imm(const dtrace_difo_t *dp, const char *name, uint_t addr,
>   	int		n;
>   
>   	n = fprintf(fp, "%-4s %s, %d", name, reg(in->dst_reg), in->imm);
> -	dt_dis_varname(dp, in, addr, n, fp);
> +	dt_dis_refname(dp, in, addr, n, fp);
>   
>   	return 0;
>   }
> @@ -233,7 +250,7 @@ dt_dis_load(const dtrace_difo_t *dp, const char *name, uint_t addr,
>   	n = fprintf(fp, "%-4s %s, [%s%+d]", name, reg(in->dst_reg),
>   		    reg(in->src_reg), in->off);
>   
> -	dt_dis_varname(dp, in, addr, n, fp);
> +	dt_dis_refname(dp, in, addr, n, fp);
>   
>   	return 0;
>   }
> @@ -269,7 +286,7 @@ dt_dis_store(const dtrace_difo_t *dp, const char *name, uint_t addr,
>   	n = fprintf(fp, "%-4s [%s%+d], %s", name, reg(in->dst_reg), in->off,
>   		    reg(in->src_reg));
>   
> -	dt_dis_varname(dp, in, addr, n, fp);
> +	dt_dis_refname(dp, in, addr, n, fp);
>   
>   	return 0;
>   }
> diff --git a/test/unittest/disasm/tst.ann-strconst.r b/test/unittest/disasm/tst.ann-strconst.r
> new file mode 100644
> index 00000000..49273e33
> --- /dev/null
> +++ b/test/unittest/disasm/tst.ann-strconst.r
> @@ -0,0 +1 @@
> +07 X 0 0000 XXXXXXXX    add  %rX, D ! "strconst"
> diff --git a/test/unittest/disasm/tst.ann-strconst.sh b/test/unittest/disasm/tst.ann-strconst.sh
> new file mode 100755
> index 00000000..28b54948
> --- /dev/null
> +++ b/test/unittest/disasm/tst.ann-strconst.sh
> @@ -0,0 +1,26 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +
> +dtrace=$1
> +
> +$dtrace $dt_flags -Sen '
> +BEGIN
> +{
> +	var = "strconst";
> +	exit(0);
> +}
> +' 2>&1 | \
> +	awk '/ ! "strconst"/ {
> +		sub(/^[^:]+: /, "");
> +		sub(/^07 [0-9a-f] /, "07 X ");
[0-9a-f]?  There are not that many BPF registers.  And r9 and r10 are 
off limits.  So how about [0-8]?
> +		sub(/[0-9a-f]{8}    add/, "XXXXXXXX    add");
> +		sub(/%r[0-9], [0-9]+ +/, "%rX, D ");
> +		print;
> +	     }'
> +
> +exit $?
Actually, I think this check can be more succinct, more readable, and 
more robust by using a single regex.  The sequence of substitutions is 
changing parts of the line to a "standard" form, but those substitutions 
are unnecessary.  All we want to know in the end is if the sought line 
is found or not.  E.g.:
         awk '/^[^:]+: 07 [0-8] 0 0000 [0-9a-f]{8}    add +%r[0-9], 
[0-9]+ + ! "strconst"/ {print "SUCCESS"}'
Or /^[0-9]+ [0-9]+: 07 etc./.  Or use awk exit codes and forgo the .r file.



More information about the DTrace-devel mailing list