[DTrace-devel] [PATCH v3] dtrace: BPF program load for '...' failed: No space left on device

eugene.loh at oracle.com eugene.loh at oracle.com
Fri Jan 15 09:41:16 PST 2021


From: Eugene Loh <eugene.loh at oracle.com>

A D script that produces BPF code with many code paths can result
in 16 Mbytes of BPF log file, ending with the above error message.
The log file really says nothing about why the BPF load failed.
The actual problem is that we supplied a 16-Mbyte log buffer that
is too small.

If no log buffer is supplied, this problem is not encountered.

Change DTrace's BPF program load to use no log buffer at first.
If the load fails, then retry with a log buffer.  The load should
again fail, but if the failure is not ENOSPC, we can simply report
the log and be done.  If the failure becomes ENOSPC, inform the
user of the problem and what action can be taken to increase the
buffer size.

Provide a new DTrace option to control the log buffer size.

Add tests for this fix.  Specifically, the aggregation function
quantize() can be used, since it must quantize a value into one
of 127 different bins.  The algorithm used has many code paths
and thereby exercises the BPF verifier well.

https://github.com/oracle/dtrace-utils/issues/6
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 include/dtrace/options_defines.h             |   5 +-
 libdtrace/dt_bpf.c                           |  44 ++++---
 libdtrace/dt_options.c                       |   3 +-
 test/unittest/misc/tst.bpflogsize-cmdline.sh | 114 +++++++++++++++++++
 test/unittest/misc/tst.bpflogsize-pragma.sh  | 114 +++++++++++++++++++
 5 files changed, 262 insertions(+), 18 deletions(-)
 create mode 100755 test/unittest/misc/tst.bpflogsize-cmdline.sh
 create mode 100755 test/unittest/misc/tst.bpflogsize-pragma.sh

diff --git a/include/dtrace/options_defines.h b/include/dtrace/options_defines.h
index ed4a7c15..18ea641f 100644
--- a/include/dtrace/options_defines.h
+++ b/include/dtrace/options_defines.h
@@ -2,7 +2,7 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  *
- * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -57,7 +57,8 @@
 #define	DTRACEOPT_QUIETRESIZE	27      /* quieten buffer-resize messages */
 #define	DTRACEOPT_NORESOLVE	28      /* prevent resolution of symbols */
 #define	DTRACEOPT_PCAPSIZE	29	/* number of bytes to be captured */
-#define	DTRACEOPT_MAX		30      /* number of options */
+#define	DTRACEOPT_BPFLOGSIZE	30	/* BPF verifier log, max # bytes */
+#define	DTRACEOPT_MAX		31      /* number of options */
 
 #define	DTRACEOPT_UNSET		(dtrace_optval_t)-2	/* unset option */
 
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index c6102f15..ec3d0963 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -323,9 +323,11 @@ dt_bpf_load_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
 		 const dtrace_difo_t *dp)
 {
 	struct bpf_load_program_attr	attr;
-	int				logsz = BPF_LOG_BUF_SIZE;
+	size_t				logsz;
 	char				*log;
 	int				rc;
+	const dtrace_probedesc_t	*pdp = prp->desc;
+	char				*p, *q;
 
 	/*
 	 * Check whether there are any probe-specific relocations to be
@@ -340,27 +342,39 @@ dt_bpf_load_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
 
 	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
 
-	log = dt_zalloc(dtp, logsz);
-	assert(log != NULL);
-
 	attr.prog_type = prp->prov->impl->prog_type;
 	attr.name = NULL;
 	attr.insns = dp->dtdo_buf;
 	attr.insns_cnt = dp->dtdo_len;
 	attr.license = BPF_CG_LICENSE;
-	attr.log_level = 4 | 2 | 1;
 
+	rc = bpf_load_program_xattr(&attr, NULL, 0);
+	if (rc >= 0)
+		return rc;
+
+	/* if failure, note error and rerun with logging */
+	dt_bpf_error(dtp, "BPF program load for '%s:%s:%s:%s' failed: %s\n",
+			  pdp->prv, pdp->mod, pdp->fun, pdp->prb,
+			  strerror(errno));
+	if (dtp->dt_options[DTRACEOPT_BPFLOGSIZE] != DTRACEOPT_UNSET)
+		logsz = dtp->dt_options[DTRACEOPT_BPFLOGSIZE];
+	else
+		logsz = BPF_LOG_BUF_SIZE;
+	attr.log_level = 4 | 2 | 1;
+	log = dt_zalloc(dtp, logsz);
+	assert(log != NULL);
 	rc = bpf_load_program_xattr(&attr, log, logsz);
-	if (rc < 0) {
-		const dtrace_probedesc_t	*pdp = prp->desc;
-		char				*p, *q;
 
-		rc = dt_bpf_error(dtp,
-				  "BPF program load for '%s:%s:%s:%s' failed: "
-				  "%s\n",
-				  pdp->prv, pdp->mod, pdp->fun, pdp->prb,
-				  strerror(errno));
+	/* since it failed once, it should fail again */
+	assert(rc < 0);
 
+	/* check whether we have an incomplete BPF log */
+	if (errno == ENOSPC) {
+		fprintf(stderr,
+		    "BPF verifier log is incomplete and is not reported.\n"
+		    "Set DTrace option 'bpflogsize' to some greater size for more output.\n"
+		    "(Current size is %ld.)\n", logsz);
+	} else {
 		/*
 		 * If there is BPF verifier output, print it with a "BPF: "
 		 * prefix so it is easier to distinguish.
@@ -377,7 +391,7 @@ dt_bpf_load_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
 
 	dt_free(dtp, log);
 
-	return rc;
+	return -1;
 }
 
 int
diff --git a/libdtrace/dt_options.c b/libdtrace/dt_options.c
index 53c9507a..af6caecf 100644
--- a/libdtrace/dt_options.c
+++ b/libdtrace/dt_options.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -1093,6 +1093,7 @@ static const dt_option_t _dtrace_ctoptions[] = {
  */
 static const dt_option_t _dtrace_rtoptions[] = {
 	{ "aggsize", dt_opt_size, DTRACEOPT_AGGSIZE },
+	{ "bpflogsize", dt_opt_size, DTRACEOPT_BPFLOGSIZE },
 	{ "bufsize", dt_opt_size, DTRACEOPT_BUFSIZE },
 	{ "bufpolicy", dt_opt_bufpolicy, DTRACEOPT_BUFPOLICY },
 	{ "bufresize", dt_opt_bufresize, DTRACEOPT_BUFRESIZE },
diff --git a/test/unittest/misc/tst.bpflogsize-cmdline.sh b/test/unittest/misc/tst.bpflogsize-cmdline.sh
new file mode 100755
index 00000000..6932ed74
--- /dev/null
+++ b/test/unittest/misc/tst.bpflogsize-cmdline.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+# @@timeout: 120
+
+dtrace=$1
+
+DIRNAME="$tmpdir/misc-BPFlog-cmdline.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+# form a D script that will not run with defaults and will require a large BPF log file
+
+cat > D.d << EOF
+#pragma D option quiet
+BEGIN
+{
+	x = 1;
+EOF
+
+for x in `seq 100`; do
+	echo "	@ = quantize(x); @ = quantize(x); @ = quantize(x); @ = quantize(x);" >> D.d
+done
+
+cat >> D.d << EOF
+	exit(0);
+}
+EOF
+
+# form the expected output when the BPF log size is too small
+# ("nn" represents the size that appears in the actual output)
+
+cat > D.out << EOF
+BPF verifier log is incomplete and is not reported.
+Set DTrace option 'bpflogsize' to some greater size for more output.
+(Current size is nn.)
+dtrace: could not enable tracing: BPF program load for 'dtrace:::BEGIN' failed: Argument list too long
+EOF
+
+# try the script with increasing BPF log size (starting with default)
+
+startedtoosmall=0
+cursiz=$((16 * 1024 * 1024 - 1))
+option=""
+while [ $cursiz -lt $((2 * 1024 * 1024 * 1024)) ]; do
+
+	# dtrace should not pass
+	$dtrace $option -s D.d >& tmp.out
+	if [ $? -eq 0 ]; then
+		echo unexpected pass
+		cat D.d
+		cat tmp.out
+		exit 1
+	fi
+
+	# usually, it will fail because the BPF log size is too small
+	if [ `sed s/$cursiz/nn/ tmp.out | diff - D.out | wc -l` -eq 0 ]; then
+		echo okay: $cursiz is too small
+
+		# confirm that we started too small
+		startedtoosmall=1
+
+		# so bump the size up and try again
+		cursiz=$((2 * $cursiz + 1))
+		option="-xbpflogsize=$cursiz"
+		continue
+	fi
+
+	# confirm that we started too small
+	if [ $startedtoosmall -eq 0 ]; then
+		echo the smallest sizes we try should be too small
+		exit 1
+	fi
+
+	# otherwise, it should fail with a huge dump
+	actsiz=`cat tmp.out | wc -c`
+	status=0
+	if [ $actsiz -lt $(($cursiz / 2)) ]; then
+		echo ERROR: BPF output should have fit in smaller buffer
+		status=1
+	fi
+	if [ $actsiz -gt $cursiz ]; then
+		echo ERROR: BPF output is larger than user-specified limit
+		status=1
+	fi
+	if [ `grep -cv '^BPF: ' tmp.out` -ne 1 ]; then
+		echo ERROR: expected only one non-\"BPF:\" output line
+		status=1
+	fi
+	if ! grep -q '^BPF: BPF program is too large. Processed .* insn' tmp.out; then
+		echo ERROR: BPF error message is missing
+		status=1
+	fi
+	if ! grep -q '^dtrace: could not enable tracing: BPF program load for .* failed: Argument list too long' tmp.out; then
+		echo ERROR: dtrace error message is missing
+		status=1
+	fi
+	if [ $status -ne 0 ]; then
+		head -10 tmp.out
+		echo "..."
+		tail -10 tmp.out
+	else
+		echo SUCCESS: test failed with expected error
+	fi
+	exit $status
+done
+
+echo "ERROR: BPF log size, currently $cursiz, has gotten unexpectedly large"
+exit 1
+
diff --git a/test/unittest/misc/tst.bpflogsize-pragma.sh b/test/unittest/misc/tst.bpflogsize-pragma.sh
new file mode 100755
index 00000000..6e3ba585
--- /dev/null
+++ b/test/unittest/misc/tst.bpflogsize-pragma.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+# @@timeout: 120
+
+dtrace=$1
+
+DIRNAME="$tmpdir/misc-BPFlog-pragma.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+# form a D script that will not run with defaults and will require a large BPF log file
+
+cat > D.d << EOF
+#pragma D option quiet
+/* #pragma D option bpflogsize=nnnn */
+BEGIN
+{
+	x = 1;
+EOF
+
+for x in `seq 100`; do
+	echo "	@ = quantize(x); @ = quantize(x); @ = quantize(x); @ = quantize(x);" >> D.d
+done
+
+cat >> D.d << EOF
+	exit(0);
+}
+EOF
+
+# form the expected output when the BPF log size is too small
+# ("nn" represents the size that appears in the actual output)
+
+cat > D.out << EOF
+BPF verifier log is incomplete and is not reported.
+Set DTrace option 'bpflogsize' to some greater size for more output.
+(Current size is nn.)
+dtrace: could not enable tracing: BPF program load for 'dtrace:::BEGIN' failed: Argument list too long
+EOF
+
+# try the script with increasing BPF log size (starting with default)
+
+startedtoosmall=0
+cursiz=$((16 * 1024 * 1024 - 1))
+while [ $cursiz -lt $((2 * 1024 * 1024 * 1024)) ]; do
+
+	# dtrace should not pass
+	$dtrace -s D.d >& tmp.out
+	if [ $? -eq 0 ]; then
+		echo unexpected pass
+		cat D.d
+		cat tmp.out
+		exit 1
+	fi
+
+	# usually, it will fail because the BPF log size is too small
+	if [ `sed s/$cursiz/nn/ tmp.out | diff - D.out | wc -l` -eq 0 ]; then
+		echo okay: $cursiz is too small
+
+		# confirm that we started too small
+		startedtoosmall=1
+
+		# so bump the size up and try again
+		cursiz=$((2 * $cursiz + 1))
+		sed -i 's:^.*bpflogsize.*$:#pragma D option bpflogsize='$cursiz':' D.d
+		continue
+	fi
+
+	# confirm that we started too small
+	if [ $startedtoosmall -eq 0 ]; then
+		echo the smallest sizes we try should be too small
+		exit 1
+	fi
+
+	# otherwise, it should fail with a huge dump
+	actsiz=`cat tmp.out | wc -c`
+	status=0
+	if [ $actsiz -lt $(($cursiz / 2)) ]; then
+		echo ERROR: BPF output should have fit in smaller buffer
+		status=1
+	fi
+	if [ $actsiz -gt $cursiz ]; then
+		echo ERROR: BPF output is larger than user-specified limit
+		status=1
+	fi
+	if [ `grep -cv '^BPF: ' tmp.out` -ne 1 ]; then
+		echo ERROR: expected only one non-\"BPF:\" output line
+		status=1
+	fi
+	if ! grep -q '^BPF: BPF program is too large. Processed .* insn' tmp.out; then
+		echo ERROR: BPF error message is missing
+		status=1
+	fi
+	if ! grep -q '^dtrace: could not enable tracing: BPF program load for .* failed: Argument list too long' tmp.out; then
+		echo ERROR: dtrace error message is missing
+		status=1
+	fi
+	if [ $status -ne 0 ]; then
+		head -10 tmp.out
+		echo "..."
+		tail -10 tmp.out
+	else
+		echo SUCCESS: test failed with expected error
+	fi
+	exit $status
+done
+
+echo "ERROR: BPF log size, currently $cursiz, has gotten unexpectedly large"
+exit 1
+
-- 
2.18.4




More information about the DTrace-devel mailing list