[DTrace-devel] [PATCH] Add BPF verifier log post-processor scripts

Eugene Loh eugene.loh at oracle.com
Fri Aug 20 21:30:49 PDT 2021


I assume these are purely developer tools?  I'm confused what your 
position is on adding such tools to the repo.

Plus, they're stand-alone scripts, in contrast to something like the 
-xdisasm=0 stuff, which would best be part of the source code and is 
harder for a developer to keep patching as the source code evolves -- or 
harder to ask a user to send diagnostic output in the case such output 
is expected to help diagnose a remote problem.

The patch commit message should explain how to use these tools or, 
better yet, there should be usage information or documentation in the 
tools themselves.  Some comments in the code would also be nice.  The 
input to each tool is what?  It is generated and specified how?  (I 
think I know the answers now, but no one should have to do such 
detective work.)

Clearer variable names would be nice, especially in the absence of any 
comments.  E.g., in isolateFunc, instead of saddr and eaddr, call them 
start and end or something.

In isolateFunc, why is a fn[] array kept when only one name is of 
interest?  (Or maybe I read the code wrong.)

procVerifierLog changes disassembly syntax.  Why?  I suppose that's a 
rhetorical question;  apparently it's a matter of personal preference 
that I simply do not share.

procVerifierLog splits things onto multiple lines without making things 
more readable or reducing redundant output.  The script I use to 
postprocess BPF verifier output splits stuff into two columns.  The left 
column has assembly instructions, basically one per line, making it look 
like a normal assembly listing.  The right column has the changed state, 
annotating the line with the value (or range or tnum or whatever) that 
the BPF verifier is using.  So assembly and state are clearly separated 
and the volume of output is reduced tremendously by pulling out a lot of 
redundant output.

On 8/20/21 2:47 AM, Kris Van Hees wrote:

> Signed-off-by: Kris Van Hees<kris.van.hees at oracle.com>
> ---
>   cmd/isolateFunc     |  89 +++++++++++++
>   cmd/procVerifierLog | 316 ++++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 405 insertions(+)
>   create mode 100755 cmd/isolateFunc
>   create mode 100755 cmd/procVerifierLog
>
> diff --git a/cmd/isolateFunc b/cmd/isolateFunc
> new file mode 100755
> index 00000000..8afec455
> --- /dev/null
> +++ b/cmd/isolateFunc
> @@ -0,0 +1,89 @@
> +#!/usr/bin/awk -f
> +
> +BEGIN {
> +	if ((!fname && !faddr) || (fname && faddr)) {
> +		print "Syntax: isolateFunc -vfname=<NAME>";
> +		print "        isolateFunc -vfaddr=<ADDR>";
> +		exit(1);
> +	}
> +
> +	saddr = -1;
> +	eaddr = -1;
> +}
> +
> +$1 != "BPF:" {
> +	next;
> +}
> +
> +$2 ~ /^func#[0-9]/ {
> +	addr = int(substr($3, 2));
> +	if (NF >= 4)
> +		fn[addr] = substr($4, 2, length($4) - 2);
> +	else
> +		fn[addr] = $2;
> +
> +	if (fname == fn[addr] || (NF >= 4 && fname == $2)) {
> +		saddr = addr;
> +		eaddr = -1;
> +	} else if (addr <= faddr) {
> +		saddr = addr;
> +		eaddr = -1;
> +	} else if (eaddr == -1)
> +		eaddr = addr;
> +
> +	next;
> +}
> +
> +$2 !~ /^func#[0-9]/ && eaddr == -1 {
> +	eaddr = 1000000;
> +}
> +
> +int($2) == 0 {
> +	if (saddr == -1) {
> +		printf "ERROR: function  '%s' not found.\n", fname;
> +		exit(1);
> +	}
> +}
> +
> +$2 ~ /^(R[0-9]|fp)/ {
> +	rv[rc++] = $0;
> +	next;
> +}
> +
> +int($2) == saddr {
> +	printf "Function <%s> [%d-%d]\n", fn[saddr], saddr, eaddr;
> +}
> +
> +int($2) >= saddr && int($2) < eaddr {
> +	pc = int($2)
> +	if (pc < ppc)
> +		print "BPF: ----------";
> +	ppc = pc;
> +
> +	for (i = 0; i < rc; i++)
> +		print rv[i];
> +	print;
> +}
> +
> +$2 ~ /^----------/ {
> +	next;
> +}
> +
> +/returning from/  && int($8) >= saddr && int($8) < eaddr {
> +	print "BPF:";
> +	print;
> +	getline;
> +	print;
> +	next;
> +}
> +
> +/returning from/  {
> +	getline;
> +	next;
> +}
> +
> +$2 ~ /^[0-9]+:/ {
> +	delete rv;
> +	rc = 0;
> +	next;
> +}
> diff --git a/cmd/procVerifierLog b/cmd/procVerifierLog
> new file mode 100755
> index 00000000..a7f1d544
> --- /dev/null
> +++ b/cmd/procVerifierLog
> @@ -0,0 +1,316 @@
> +#!/usr/bin/gawk -f
> +
> +BEGIN {
> +	state = 0;
> +
> +	arith["+="] = "add";
> +	arith["-="] = "sub";
> +	arith["*="] = "mul";
> +	arith["/="] = "div";
> +	arith["|="] = "or";
> +	arith["&="] = "and";
> +	arith["<<="] = "lsh";
> +	arith[">>="] = "rsh";
> +	arith["%="] = "mod";
> +	arith["^="] = "xor";
> +	arith["s>>="] = "arsh";
> +
> +	cond["=="] = "jeq";
> +	cond["!="] = "jne";
> +	cond[">="] = "jge";
> +	cond["<="] = "jle";
> +	cond[">"] = "jgt";
> +	cond["<"] = "jlt";
> +	cond["s>="] = "jsge";
> +	cond["s<="] = "jsle";
> +	cond["s>"] = "jsgt";
> +	cond["s<"] = "jslt";
> +	cond["&"] = "jset";
> +
> +	ld[64] = "lddw";
> +	ld[32] = "ldw";
> +	ld[16] = "ldh";
> +	ld[8] = "ldb";
> +
> +	st[64] = "stdw";
> +	st[32] = "stw";
> +	st[16] = "sth";
> +	st[8] = "stb";
> +}
> +
> +state == 0 && /^Disassembly of final program/ {
> +	pn = $NF;
> +	sub(/:$/, "", pn);
> +	fa[0] = pn;
> +	state = 1;
> +}
> +
> +state == 1 && $1 == "BPF" && $2 == "OFFSET" {
> +	state = 2;
> +	print;
> +	while (getline == 1) {
> +		if (NF == 0)
> +			break;
> +		
> +		print;
> +
> +		if ($1 == "R_BPF_INSN_DISP32" && $4 ~ /^dt_/)
> +			fa[int($3)] = $4;
> +	}
> +
> +	asorti(fa, na)
> +
> +	delete na;
> +}
> +
> +/^BPF:  (frame[0-9]+|R[0-9]+)/ {
> +	next;
> +}
> +
> +/^BPF: func#[0-9]+ @[0-9]+$/ {
> +	i = int(substr($3, 2));
> +	if (i in fa) {
> +		fn[i] = fa[i];
> +		printf "BPF: %-8s %8s (%s)\n", $2, $3, fn[i];
> +	} else {
> +		fn[i] = $2;
> +		printf "BPF: %-8s %8s\n", $2, $3;
> +	}
> +	next;
> +}
> +
> +# Find a function name
> +function fname(addr, fn, i) {
> +	fn = "???";
> +	for (i in fa) {
> +		if (int(addr) < int(i))
> +			break;
> +
> +		fn = fa[i];
> +	}
> +	return fn;
> +}
> +
> +/^BPF: returning from callee:$/ {
> +	$4 = fname(pc);
> +	callfrom = $0;
> +
> +	getline;
> +	if (match($0, / R0/) > 0) {
> +		callretv = substr($0, RSTART + 1);
> +		sub(/ (R|fp-)[0-9].*/, "", callretv);
> +	} else
> +		callretv = "R0=???";
> +	next;
> +}
> +
> +/^BPF: to caller at [0-9]+:$/ {
> +	pc = int($5);
> +	$3 = fname(pc);
> +	$1 = "";
> +	if (!callfrom)
> +		callfrom = "BPF: returning";
> +	print "BPF: ----------";
> +	print callfrom $0;
> +	printf "BPF:    %s\n", callretv;
> +	next;
> +}
> +
> +/^BPF: [0-9]+: \(/ {
> +	state = 3;
> +	pc = int($2);
> +	opc = $3;
> +	$1 = $2 = $3 = "";
> +	sub(/^ +/, "");
> +
> +	if (pc in fn)
> +		ann = "<" fn[pc] ">";
> +	else
> +		ann = 0;
> +
> +	# The BPF verifier is reporting on an alternative branch or is trying
> +	# an alternative speculative value for something.  Either way, we went
> +	# back to earlier code, so indicate a control flow boundary and reset
> +	# the regstate so we report the initial register state for this
> +	# section of instructions.
> +	if (pc < ppc) {
> +		print "BPF: ----------";
> +		regstate = 0;
> +	}
> +
> +	ppc = pc;
> +
> +	callfrom = 0;
> +	callretv = 0;
> +}
> +
> +/^BPF: [0-9]+: (frame[0-9]: )?R[0-9]+/ {
> +	pc = int($2);
> +	$1 = $2 = "";
> +	sub(/^ +/, "");
> +
> +	if ($0 != regstate) {
> +		regstate = $0;
> +
> +		if (match($0, /^frame[0-9]: /) > 0) {
> +			printf "BPF:      %s\n", substr($0, 1, RLENGTH - 1);
> +			$0 = substr($0, RSTART + RLENGTH);
> +		}
> +
> +		while (match($0, / R[0-9]+/) > 0) {
> +			printf "BPF:        %s\n", substr($0, 1, RSTART);
> +			$0 = substr($0, RSTART + 1);
> +		}
> +
> +		if (match($0, / fp-[0-9]+/) > 0) {
> +			printf "BPF:        %s\n", substr($0, 1, RSTART);
> +			$0 = substr($0, RSTART + 1);
> +		}
> +
> +		printf "BPF:        %s\n", $0;
> +	}
> +	next;
> +}
> +
> +state != 3 {
> +	print;
> +	next;
> +}
> +
> +# Print an instruction
> +function emit(ins, args, ann, s, n) {
> +	gsub(/%r10/, "%fp", args);
> +	s = sprintf("BPF: % 5d: %4.4s %-4.4s %s", pc, opc, ins, args);
> +	if (ann) {
> +		n = length(s);
> +		if (n > 64)
> +			n = 0;
> +		else
> +			n = 64 - n;
> +		printf "%s%*s! %s\n", s, n, "", ann;
> +	} else
> +		print s;
> +}
> +
> +# Register-to-register assignment
> +/[rw][0-9]+ = [rw][0-9]+/ {
> +	emit("mov", "%" $1 ", %" $3, ann);
> +	next;
> +}
> +
> +# Negate Register value
> +/[rw][0-9]+ = -[rw][0-9]+/ {
> +	emit("neg", "%" $1, ann);
> +	next;
> +}
> +
> +# Immediate-to-register assignment
> +/^r[0-9]+ = [-0-9]+/ {
> +	emit("mov", "%" $1 ", " $3, ann);
> +	next;
> +}
> +
> +# 64-bit value assignment
> +/^r[0-9] = 0x[0-9a-f]+/ {
> +	emit("lddw", "%" $1 ", " $3, ann);
> +	next;
> +}
> +
> +# Load from register
> +/^r[0-9]+ = \*\(u[0-9]+ \*\)\(r[0-9]+ [-+][0-9]+\)/ {
> +	sz = int(substr($3, 4));
> +	sub(/\)/, "]", $5);
> +	emit(ld[sz], "%" $1 ", [%" substr($4, 4) $5, ann);
> +	next;
> +}
> +
> +# Store from register
> +/^\*\(u[0-9]+ \*\)\(r[0-9]+ [-+][0-9]+\) = r[0-9]+/ {
> +	sz = int(substr($1, 4));
> +	sub(/\)/, "]", $3);
> +	emit(st[sz], "[%" substr($2, 4) $3 ", %" $5, ann);
> +	next;
> +}
> +
> +# Store from immediate
> +/^\*\(u[0-9]+ \*\)\(r[0-9]+ [-+][0-9]+\) = [^r]/ {
> +	sz = int(substr($1, 4));
> +	sub(/\)/, "]", $3);
> +	emit(st[sz], "[%" substr($2, 4) $3 ", " $5, ann);
> +	next;
> +}
> +
> +# Arithmetic with register
> +/^[rw][0-9] [^=]+= [rw][0-9]/ {
> +	emit(arith[$2], "%" $1 ", %" $3, ann);
> +	next;
> +}
> +
> +# Arithmetic with immediate value
> +/^[rw][0-9] [^=]+= -?[0-9]+/ {
> +	emit(arith[$2], "%" $1 ", " $3, ann);
> +	next;
> +}
> +
> +# BPF helper call
> +/^call bpf/ {
> +	sub(/#.*$/, "", $2);
> +	emit("call", $2, ann);
> +	next;
> +}
> +
> +# Function call
> +/^call pc[-+]/ {
> +	off = int(substr($2, 3));
> +	addr = pc + 1 + off;
> +	if (addr in fn)
> +		nm = fn[addr];
> +	else
> +		nm = $2;
> +
> +	if (ann)
> +		emit("call", nm, ann " -> " addr);
> +	else
> +		emit("call", nm, "-> " addr);
> +	next;
> +}
> +
> +# Return
> +/^exit/ {
> +	emit("exit", "", ann);
> +	next;
> +}
> +
> +# Conditional branch
> +/^if r[0-9]+ / {
> +	off = int(substr($6, 3));
> +	addr = pc + 1 + off;
> +	t_branch[branchc] = addr;
> +	f_branch[branchc] = pc + 1;
> +	branchc++;
> +
> +	if ($4 ~ /^r/)
> +		$4 = "%" $4;
> +
> +	if (ann)
> +		emit(cond[$3], "%" $2 ", " $4 ", " off, ann " -> " addr);
> +	else
> +		emit(cond[$3], "%" $2 ", " $4 ", " off, "-> " addr);
> +	next;
> +}
> +
> +# Jump
> +/^goto / {
> +	off = int(substr($2, 3));
> +	addr = pc + 1 + off;
> +
> +	if (ann)
> +		emit("ja", off, ann " -> " addr);
> +	else
> +		emit("ja", off, "-> " addr);
> +	next;
> +}
> +
> +{
> +	print;
> +}
> -- 2.33.0 _______________________________________________ DTrace-devel 
> mailing list DTrace-devel at oss.oracle.com 
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list