[DTrace-devel] [PATCH] Add BPF verifier log post-processor scripts
Eugene Loh
eugene.loh at oracle.com
Fri Aug 20 21:30:49 PDT 2021
I assume these are purely developer tools? I'm confused what your
position is on adding such tools to the repo.
Plus, they're stand-alone scripts, in contrast to something like the
-xdisasm=0 stuff, which would best be part of the source code and is
harder for a developer to keep patching as the source code evolves -- or
harder to ask a user to send diagnostic output in the case such output
is expected to help diagnose a remote problem.
The patch commit message should explain how to use these tools or,
better yet, there should be usage information or documentation in the
tools themselves. Some comments in the code would also be nice. The
input to each tool is what? It is generated and specified how? (I
think I know the answers now, but no one should have to do such
detective work.)
Clearer variable names would be nice, especially in the absence of any
comments. E.g., in isolateFunc, instead of saddr and eaddr, call them
start and end or something.
In isolateFunc, why is a fn[] array kept when only one name is of
interest? (Or maybe I read the code wrong.)
procVerifierLog changes disassembly syntax. Why? I suppose that's a
rhetorical question; apparently it's a matter of personal preference
that I simply do not share.
procVerifierLog splits things onto multiple lines without making things
more readable or reducing redundant output. The script I use to
postprocess BPF verifier output splits stuff into two columns. The left
column has assembly instructions, basically one per line, making it look
like a normal assembly listing. The right column has the changed state,
annotating the line with the value (or range or tnum or whatever) that
the BPF verifier is using. So assembly and state are clearly separated
and the volume of output is reduced tremendously by pulling out a lot of
redundant output.
On 8/20/21 2:47 AM, Kris Van Hees wrote:
> Signed-off-by: Kris Van Hees<kris.van.hees at oracle.com>
> ---
> cmd/isolateFunc | 89 +++++++++++++
> cmd/procVerifierLog | 316 ++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 405 insertions(+)
> create mode 100755 cmd/isolateFunc
> create mode 100755 cmd/procVerifierLog
>
> diff --git a/cmd/isolateFunc b/cmd/isolateFunc
> new file mode 100755
> index 00000000..8afec455
> --- /dev/null
> +++ b/cmd/isolateFunc
> @@ -0,0 +1,89 @@
> +#!/usr/bin/awk -f
> +
> +BEGIN {
> + if ((!fname && !faddr) || (fname && faddr)) {
> + print "Syntax: isolateFunc -vfname=<NAME>";
> + print " isolateFunc -vfaddr=<ADDR>";
> + exit(1);
> + }
> +
> + saddr = -1;
> + eaddr = -1;
> +}
> +
> +$1 != "BPF:" {
> + next;
> +}
> +
> +$2 ~ /^func#[0-9]/ {
> + addr = int(substr($3, 2));
> + if (NF >= 4)
> + fn[addr] = substr($4, 2, length($4) - 2);
> + else
> + fn[addr] = $2;
> +
> + if (fname == fn[addr] || (NF >= 4 && fname == $2)) {
> + saddr = addr;
> + eaddr = -1;
> + } else if (addr <= faddr) {
> + saddr = addr;
> + eaddr = -1;
> + } else if (eaddr == -1)
> + eaddr = addr;
> +
> + next;
> +}
> +
> +$2 !~ /^func#[0-9]/ && eaddr == -1 {
> + eaddr = 1000000;
> +}
> +
> +int($2) == 0 {
> + if (saddr == -1) {
> + printf "ERROR: function '%s' not found.\n", fname;
> + exit(1);
> + }
> +}
> +
> +$2 ~ /^(R[0-9]|fp)/ {
> + rv[rc++] = $0;
> + next;
> +}
> +
> +int($2) == saddr {
> + printf "Function <%s> [%d-%d]\n", fn[saddr], saddr, eaddr;
> +}
> +
> +int($2) >= saddr && int($2) < eaddr {
> + pc = int($2)
> + if (pc < ppc)
> + print "BPF: ----------";
> + ppc = pc;
> +
> + for (i = 0; i < rc; i++)
> + print rv[i];
> + print;
> +}
> +
> +$2 ~ /^----------/ {
> + next;
> +}
> +
> +/returning from/ && int($8) >= saddr && int($8) < eaddr {
> + print "BPF:";
> + print;
> + getline;
> + print;
> + next;
> +}
> +
> +/returning from/ {
> + getline;
> + next;
> +}
> +
> +$2 ~ /^[0-9]+:/ {
> + delete rv;
> + rc = 0;
> + next;
> +}
> diff --git a/cmd/procVerifierLog b/cmd/procVerifierLog
> new file mode 100755
> index 00000000..a7f1d544
> --- /dev/null
> +++ b/cmd/procVerifierLog
> @@ -0,0 +1,316 @@
> +#!/usr/bin/gawk -f
> +
> +BEGIN {
> + state = 0;
> +
> + arith["+="] = "add";
> + arith["-="] = "sub";
> + arith["*="] = "mul";
> + arith["/="] = "div";
> + arith["|="] = "or";
> + arith["&="] = "and";
> + arith["<<="] = "lsh";
> + arith[">>="] = "rsh";
> + arith["%="] = "mod";
> + arith["^="] = "xor";
> + arith["s>>="] = "arsh";
> +
> + cond["=="] = "jeq";
> + cond["!="] = "jne";
> + cond[">="] = "jge";
> + cond["<="] = "jle";
> + cond[">"] = "jgt";
> + cond["<"] = "jlt";
> + cond["s>="] = "jsge";
> + cond["s<="] = "jsle";
> + cond["s>"] = "jsgt";
> + cond["s<"] = "jslt";
> + cond["&"] = "jset";
> +
> + ld[64] = "lddw";
> + ld[32] = "ldw";
> + ld[16] = "ldh";
> + ld[8] = "ldb";
> +
> + st[64] = "stdw";
> + st[32] = "stw";
> + st[16] = "sth";
> + st[8] = "stb";
> +}
> +
> +state == 0 && /^Disassembly of final program/ {
> + pn = $NF;
> + sub(/:$/, "", pn);
> + fa[0] = pn;
> + state = 1;
> +}
> +
> +state == 1 && $1 == "BPF" && $2 == "OFFSET" {
> + state = 2;
> + print;
> + while (getline == 1) {
> + if (NF == 0)
> + break;
> +
> + print;
> +
> + if ($1 == "R_BPF_INSN_DISP32" && $4 ~ /^dt_/)
> + fa[int($3)] = $4;
> + }
> +
> + asorti(fa, na)
> +
> + delete na;
> +}
> +
> +/^BPF: (frame[0-9]+|R[0-9]+)/ {
> + next;
> +}
> +
> +/^BPF: func#[0-9]+ @[0-9]+$/ {
> + i = int(substr($3, 2));
> + if (i in fa) {
> + fn[i] = fa[i];
> + printf "BPF: %-8s %8s (%s)\n", $2, $3, fn[i];
> + } else {
> + fn[i] = $2;
> + printf "BPF: %-8s %8s\n", $2, $3;
> + }
> + next;
> +}
> +
> +# Find a function name
> +function fname(addr, fn, i) {
> + fn = "???";
> + for (i in fa) {
> + if (int(addr) < int(i))
> + break;
> +
> + fn = fa[i];
> + }
> + return fn;
> +}
> +
> +/^BPF: returning from callee:$/ {
> + $4 = fname(pc);
> + callfrom = $0;
> +
> + getline;
> + if (match($0, / R0/) > 0) {
> + callretv = substr($0, RSTART + 1);
> + sub(/ (R|fp-)[0-9].*/, "", callretv);
> + } else
> + callretv = "R0=???";
> + next;
> +}
> +
> +/^BPF: to caller at [0-9]+:$/ {
> + pc = int($5);
> + $3 = fname(pc);
> + $1 = "";
> + if (!callfrom)
> + callfrom = "BPF: returning";
> + print "BPF: ----------";
> + print callfrom $0;
> + printf "BPF: %s\n", callretv;
> + next;
> +}
> +
> +/^BPF: [0-9]+: \(/ {
> + state = 3;
> + pc = int($2);
> + opc = $3;
> + $1 = $2 = $3 = "";
> + sub(/^ +/, "");
> +
> + if (pc in fn)
> + ann = "<" fn[pc] ">";
> + else
> + ann = 0;
> +
> + # The BPF verifier is reporting on an alternative branch or is trying
> + # an alternative speculative value for something. Either way, we went
> + # back to earlier code, so indicate a control flow boundary and reset
> + # the regstate so we report the initial register state for this
> + # section of instructions.
> + if (pc < ppc) {
> + print "BPF: ----------";
> + regstate = 0;
> + }
> +
> + ppc = pc;
> +
> + callfrom = 0;
> + callretv = 0;
> +}
> +
> +/^BPF: [0-9]+: (frame[0-9]: )?R[0-9]+/ {
> + pc = int($2);
> + $1 = $2 = "";
> + sub(/^ +/, "");
> +
> + if ($0 != regstate) {
> + regstate = $0;
> +
> + if (match($0, /^frame[0-9]: /) > 0) {
> + printf "BPF: %s\n", substr($0, 1, RLENGTH - 1);
> + $0 = substr($0, RSTART + RLENGTH);
> + }
> +
> + while (match($0, / R[0-9]+/) > 0) {
> + printf "BPF: %s\n", substr($0, 1, RSTART);
> + $0 = substr($0, RSTART + 1);
> + }
> +
> + if (match($0, / fp-[0-9]+/) > 0) {
> + printf "BPF: %s\n", substr($0, 1, RSTART);
> + $0 = substr($0, RSTART + 1);
> + }
> +
> + printf "BPF: %s\n", $0;
> + }
> + next;
> +}
> +
> +state != 3 {
> + print;
> + next;
> +}
> +
> +# Print an instruction
> +function emit(ins, args, ann, s, n) {
> + gsub(/%r10/, "%fp", args);
> + s = sprintf("BPF: % 5d: %4.4s %-4.4s %s", pc, opc, ins, args);
> + if (ann) {
> + n = length(s);
> + if (n > 64)
> + n = 0;
> + else
> + n = 64 - n;
> + printf "%s%*s! %s\n", s, n, "", ann;
> + } else
> + print s;
> +}
> +
> +# Register-to-register assignment
> +/[rw][0-9]+ = [rw][0-9]+/ {
> + emit("mov", "%" $1 ", %" $3, ann);
> + next;
> +}
> +
> +# Negate Register value
> +/[rw][0-9]+ = -[rw][0-9]+/ {
> + emit("neg", "%" $1, ann);
> + next;
> +}
> +
> +# Immediate-to-register assignment
> +/^r[0-9]+ = [-0-9]+/ {
> + emit("mov", "%" $1 ", " $3, ann);
> + next;
> +}
> +
> +# 64-bit value assignment
> +/^r[0-9] = 0x[0-9a-f]+/ {
> + emit("lddw", "%" $1 ", " $3, ann);
> + next;
> +}
> +
> +# Load from register
> +/^r[0-9]+ = \*\(u[0-9]+ \*\)\(r[0-9]+ [-+][0-9]+\)/ {
> + sz = int(substr($3, 4));
> + sub(/\)/, "]", $5);
> + emit(ld[sz], "%" $1 ", [%" substr($4, 4) $5, ann);
> + next;
> +}
> +
> +# Store from register
> +/^\*\(u[0-9]+ \*\)\(r[0-9]+ [-+][0-9]+\) = r[0-9]+/ {
> + sz = int(substr($1, 4));
> + sub(/\)/, "]", $3);
> + emit(st[sz], "[%" substr($2, 4) $3 ", %" $5, ann);
> + next;
> +}
> +
> +# Store from immediate
> +/^\*\(u[0-9]+ \*\)\(r[0-9]+ [-+][0-9]+\) = [^r]/ {
> + sz = int(substr($1, 4));
> + sub(/\)/, "]", $3);
> + emit(st[sz], "[%" substr($2, 4) $3 ", " $5, ann);
> + next;
> +}
> +
> +# Arithmetic with register
> +/^[rw][0-9] [^=]+= [rw][0-9]/ {
> + emit(arith[$2], "%" $1 ", %" $3, ann);
> + next;
> +}
> +
> +# Arithmetic with immediate value
> +/^[rw][0-9] [^=]+= -?[0-9]+/ {
> + emit(arith[$2], "%" $1 ", " $3, ann);
> + next;
> +}
> +
> +# BPF helper call
> +/^call bpf/ {
> + sub(/#.*$/, "", $2);
> + emit("call", $2, ann);
> + next;
> +}
> +
> +# Function call
> +/^call pc[-+]/ {
> + off = int(substr($2, 3));
> + addr = pc + 1 + off;
> + if (addr in fn)
> + nm = fn[addr];
> + else
> + nm = $2;
> +
> + if (ann)
> + emit("call", nm, ann " -> " addr);
> + else
> + emit("call", nm, "-> " addr);
> + next;
> +}
> +
> +# Return
> +/^exit/ {
> + emit("exit", "", ann);
> + next;
> +}
> +
> +# Conditional branch
> +/^if r[0-9]+ / {
> + off = int(substr($6, 3));
> + addr = pc + 1 + off;
> + t_branch[branchc] = addr;
> + f_branch[branchc] = pc + 1;
> + branchc++;
> +
> + if ($4 ~ /^r/)
> + $4 = "%" $4;
> +
> + if (ann)
> + emit(cond[$3], "%" $2 ", " $4 ", " off, ann " -> " addr);
> + else
> + emit(cond[$3], "%" $2 ", " $4 ", " off, "-> " addr);
> + next;
> +}
> +
> +# Jump
> +/^goto / {
> + off = int(substr($2, 3));
> + addr = pc + 1 + off;
> +
> + if (ann)
> + emit("ja", off, ann " -> " addr);
> + else
> + emit("ja", off, "-> " addr);
> + next;
> +}
> +
> +{
> + print;
> +}
> -- 2.33.0 _______________________________________________ DTrace-devel
> mailing list DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list