[DTrace-devel] [PATCH 10/61] Locked-memory limit

eugene.loh at oracle.com eugene.loh at oracle.com
Fri Jul 8 14:44:54 UTC 2022


From: Eugene Loh <eugene.loh at oracle.com>

Locked memory is needed for BPF operations such as creating maps and
loading programs.  Therefore, dt_vopen() sets RLIMIT_MEMLOCK to 32 Mbytes,
an amount that seems sufficient for many tests in our suite.  On the other
hand, the value is somewhat arbitrary, excessive for many D scripts yet
insufficient for others, including some of our tests.  Further, it is
silently modifying a resource limit.

Remove dt_vopen()'s silent modification of the locked-memory limit.

Explicitly set "ulimit -l" in runtest.sh to accommodate the tests in our
suite.

While users can similarly set "ulimit -l" explicitly, it would be more
convenient if there were alternative mechanisms, especially when using
"sudo dtrace".  Therefore, add a D option lockmemkb.  Notice that ulimit
allows the value "unlimited" for users who simply want to ignore any limit.

Add some more verbose error messages that explain that encountering
EPERM during BPF map creation or BPF program load may be solved by
adjusting the locked-memory limit.

With UEKR7, the locked-memory limit seems to be ignored for root.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 include/dtrace/options_defines.h            |  3 +-
 libdtrace/dt_bpf.c                          | 39 +++++++++++++++++----
 libdtrace/dt_open.c                         | 13 -------
 libdtrace/dt_options.c                      | 15 ++++++++
 libdtrace/dt_work.c                         | 13 ++++++-
 runtest.sh                                  |  4 +++
 test/unittest/misc/tst.lockmemkb-cmdline.r  | 16 +++++++++
 test/unittest/misc/tst.lockmemkb-cmdline.sh | 18 ++++++++++
 test/unittest/misc/tst.lockmemkb-cmdline.x  | 16 +++++++++
 test/unittest/misc/tst.lockmemkb-envvar.r   |  1 +
 test/unittest/misc/tst.lockmemkb-envvar.sh  | 18 ++++++++++
 test/unittest/misc/tst.lockmemkb-envvar.x   |  1 +
 test/unittest/misc/tst.lockmemkb-pragma.r   |  1 +
 test/unittest/misc/tst.lockmemkb-pragma.sh  | 28 +++++++++++++++
 test/unittest/misc/tst.lockmemkb-pragma.x   |  1 +
 test/unittest/misc/tst.lockmemkb-x.r        |  1 +
 test/unittest/misc/tst.lockmemkb-x.sh       | 18 ++++++++++
 test/unittest/misc/tst.lockmemkb-x.x        |  1 +
 18 files changed, 185 insertions(+), 22 deletions(-)
 create mode 100644 test/unittest/misc/tst.lockmemkb-cmdline.r
 create mode 100755 test/unittest/misc/tst.lockmemkb-cmdline.sh
 create mode 100755 test/unittest/misc/tst.lockmemkb-cmdline.x
 create mode 120000 test/unittest/misc/tst.lockmemkb-envvar.r
 create mode 100755 test/unittest/misc/tst.lockmemkb-envvar.sh
 create mode 120000 test/unittest/misc/tst.lockmemkb-envvar.x
 create mode 120000 test/unittest/misc/tst.lockmemkb-pragma.r
 create mode 100755 test/unittest/misc/tst.lockmemkb-pragma.sh
 create mode 120000 test/unittest/misc/tst.lockmemkb-pragma.x
 create mode 120000 test/unittest/misc/tst.lockmemkb-x.r
 create mode 100755 test/unittest/misc/tst.lockmemkb-x.sh
 create mode 120000 test/unittest/misc/tst.lockmemkb-x.x

diff --git a/include/dtrace/options_defines.h b/include/dtrace/options_defines.h
index 5ecd6285..e9aa444f 100644
--- a/include/dtrace/options_defines.h
+++ b/include/dtrace/options_defines.h
@@ -61,7 +61,8 @@
 #define	DTRACEOPT_MAXFRAMES	31	/* maximum number of stack frames */
 #define	DTRACEOPT_BPFLOG	32	/* always output BPF verifier log */
 #define	DTRACEOPT_SCRATCHSIZE	33	/* max scratch size permitted */
-#define	DTRACEOPT_MAX		34	/* number of options */
+#define	DTRACEOPT_LOCKMEMKB	34	/* max locked memory (kilobytes) */
+#define	DTRACEOPT_MAX		35	/* number of options */
 
 #define	DTRACEOPT_UNSET		(dtrace_optval_t)-2	/* unset option */
 
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index e68bf561..02a160fe 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -58,6 +58,20 @@ dt_bpf_error(dtrace_hdl_t *dtp, const char *fmt, ...)
 	return dt_set_errno(dtp, EDT_BPF);
 }
 
+static int
+dt_bpf_lockmem_error(dtrace_hdl_t *dtp, const char *msg)
+{
+	return dt_bpf_error(dtp, "%s:\n"
+			    "\tIt is possible that the locked-memory limit is too low.\n"
+			    "\tYou can try a higher limit with 'ulimit -l n'.\n"
+			    "\tOr, if you use 'sudo dtrace', try 'sudo dtrace -xlockmemkb=n'.\n"
+			    "\tHere, n is a number of kilobytes or the string 'unlimited'.\n"
+			    "\tTo see the locked-memory usage of DTrace jobs on a system,\n"
+			    "\tcheck 'memlock' fields in 'sudo bpftool map' and\n"
+			    "\t'sudo bpftool prog' output while the DTrace jobs are running.\n"
+			    , msg);
+}
+
 /*
  * Load the value for the given key in the map referenced by the given fd.
  */
@@ -113,9 +127,15 @@ create_gmap(dtrace_hdl_t *dtp, const char *name, enum bpf_map_type type,
 	dt_dprintf("Creating BPF map '%s' (ksz %u, vsz %u, sz %d)\n",
 		   name, ksz, vsz, size);
 	fd = bpf_create_map_name(type, name, ksz, vsz, size, 0);
-	if (fd < 0)
-		return dt_bpf_error(dtp, "failed to create BPF map '%s': %s\n",
-				    name, strerror(errno));
+	if (fd < 0) {
+		char msg[64];
+
+		snprintf(msg, sizeof(msg),
+			 "failed to create BPF map '%s'", name);
+		if (errno == EPERM)
+			return dt_bpf_lockmem_error(dtp, msg);
+		return dt_bpf_error(dtp, "%s: %s\n", msg, strerror(errno));
+	}
 
 	dt_dprintf("BPF map '%s' is FD %d (ksz %u, vsz %u, sz %d)\n",
 		   name, fd, ksz, vsz, size);
@@ -464,10 +484,15 @@ dt_bpf_load_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
 	assert(log != NULL);
 	rc = bpf_load_program_xattr(&attr, log, logsz);
 	if (rc < 0) {
-		dt_bpf_error(dtp,
-			     "BPF program load for '%s:%s:%s:%s' failed: %s\n",
-			     pdp->prv, pdp->mod, pdp->fun, pdp->prb,
-			     strerror(origerrno ? origerrno : errno));
+		char msg[64];
+
+		snprintf(msg, sizeof(msg),
+			 "BPF program load for '%s:%s:%s:%s' failed",
+		         pdp->prv, pdp->mod, pdp->fun, pdp->prb);
+		if (errno == EPERM)
+			return dt_bpf_lockmem_error(dtp, msg);
+		dt_bpf_error(dtp, "%s: %s\n", msg,
+		     strerror(origerrno ? origerrno : errno));
 
 		/* check whether we have an incomplete BPF log */
 		if (errno == ENOSPC) {
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 316ef4d0..4ba44ab3 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -707,19 +707,6 @@ dt_vopen(int version, int flags, int *errp,
 		setrlimit(RLIMIT_NOFILE, &rl);
 	}
 
-	/*
-	 * Also, raise the limit on size that can be locked into memory,
-	 * which is needed for BPF operations.
-	 */
-	if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
-		rlim_t lim = 32 * 1024 * 1024;
-
-		if (rl.rlim_cur < lim) {
-			rl.rlim_cur = rl.rlim_max = lim;
-			setrlimit(RLIMIT_MEMLOCK, &rl);
-		}
-	}
-
 	if ((dtp = malloc(sizeof(dtrace_hdl_t))) == NULL)
 		return set_open_errno(dtp, errp, EDT_NOMEM);
 
diff --git a/libdtrace/dt_options.c b/libdtrace/dt_options.c
index 5d3ff2ae..dfdac451 100644
--- a/libdtrace/dt_options.c
+++ b/libdtrace/dt_options.c
@@ -642,6 +642,20 @@ dt_opt_version(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
 	return dt_reduce(dtp, v);
 }
 
+static int
+dt_opt_lockmemkb(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
+{
+	if (arg == NULL)
+		return dt_set_errno(dtp, EDT_BADOPTVAL);
+
+	if (strcmp(arg, "unlimited") == 0)
+		dtp->dt_options[option] = RLIM_INFINITY;
+	else
+		dtp->dt_options[option] = atoll(arg);
+
+	return 0;
+}
+
 static int
 dt_opt_runtime(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
 {
@@ -1120,6 +1134,7 @@ static const dt_option_t _dtrace_rtoptions[] = {
 	{ "grabanon", dt_opt_runtime, DTRACEOPT_GRABANON },
 	{ "jstackframes", dt_opt_runtime, DTRACEOPT_JSTACKFRAMES },
 	{ "jstackstrsize", dt_opt_size, DTRACEOPT_JSTACKSTRSIZE },
+	{ "lockmemkb", dt_opt_lockmemkb, DTRACEOPT_LOCKMEMKB },
 	{ "maxframes", dt_opt_runtime, DTRACEOPT_MAXFRAMES },
 	{ "nspec", dt_opt_runtime, DTRACEOPT_NSPEC },
 	{ "pcapsize", dt_opt_pcapsize, DTRACEOPT_PCAPSIZE },
diff --git a/libdtrace/dt_work.c b/libdtrace/dt_work.c
index ff2a2458..a9d9f1e8 100644
--- a/libdtrace/dt_work.c
+++ b/libdtrace/dt_work.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -55,10 +55,21 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
 	size_t			size;
 	int			err;
 	struct epoll_event	ev;
+	dtrace_optval_t		lockmemkb = DTRACEOPT_UNSET;
+	struct rlimit		rl;
 
 	if (dtp->dt_active)
 		return dt_set_errno(dtp, EINVAL);
 
+	/*
+	 * Set the locked-memory limit if so directed by the user.
+	 */
+        if (dtrace_getopt(dtp, "lockmemkb", &lockmemkb) == 0 &&
+            lockmemkb != DTRACEOPT_UNSET) {
+                rl.rlim_cur = rl.rlim_max = lockmemkb * 1024;
+                setrlimit(RLIMIT_MEMLOCK, &rl);
+        }
+
 	/*
 	 * Create the global BPF maps.  This is done only once regardless of
 	 * how many programs there are.
diff --git a/runtest.sh b/runtest.sh
index 1495285d..af1f67d3 100755
--- a/runtest.sh
+++ b/runtest.sh
@@ -312,6 +312,10 @@ logdir=$(find_next_numeric_dir test/log)
 LOGFILE=$logdir/runtest.log
 SUMFILE=$logdir/runtest.sum
 
+# Set a locked-memory limit that should be big enough for the test suite.
+
+ulimit -l $((128 * 1024 * 1024))
+
 # If running as root, remember and turn off core_pattern, and set the
 # coredumpsize to a biggish value.
 
diff --git a/test/unittest/misc/tst.lockmemkb-cmdline.r b/test/unittest/misc/tst.lockmemkb-cmdline.r
new file mode 100644
index 00000000..f8c0fbd5
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-cmdline.r
@@ -0,0 +1,16 @@
+1
+
+             1234
+0
+
+             1234
+0
+-- @@stderr --
+dtrace: could not enable tracing: failed to create BPF map 'state':
+	It is possible that the locked-memory limit is too low.
+	You can try a higher limit with 'ulimit -l n'.
+	Or, if you use 'sudo dtrace', try 'sudo dtrace -xlockmemkb=n'.
+	Here, n is a number of kilobytes or the string 'unlimited'.
+	To see the locked-memory usage of DTrace jobs on a system,
+	check 'memlock' fields in 'sudo bpftool map' and
+	'sudo bpftool prog' output while the DTrace jobs are running.
diff --git a/test/unittest/misc/tst.lockmemkb-cmdline.sh b/test/unittest/misc/tst.lockmemkb-cmdline.sh
new file mode 100755
index 00000000..f7303ec1
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-cmdline.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+for val in 1 16384 unlimited; do
+	ulimit -l $val
+
+	$dtrace -qn 'BEGIN { @ = avg(1234); exit(0); }'
+	echo $?
+done
+
+exit 0
diff --git a/test/unittest/misc/tst.lockmemkb-cmdline.x b/test/unittest/misc/tst.lockmemkb-cmdline.x
new file mode 100755
index 00000000..5f27acb6
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-cmdline.x
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
+
+if [ $MAJOR -lt 5 ]; then
+        exit 0
+fi
+if [ $MAJOR -eq 5 -a $MINOR -lt 15 ]; then
+        exit 0
+fi
+
+# Somehow, UEKR6 (5.4.17) has problems with the the locked-memory limit,
+# but UEKR7 (5.15.0) does not
+
+echo "no locked-memory limit on newer kernels?"
+exit 1
diff --git a/test/unittest/misc/tst.lockmemkb-envvar.r b/test/unittest/misc/tst.lockmemkb-envvar.r
new file mode 120000
index 00000000..ca002833
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-envvar.r
@@ -0,0 +1 @@
+tst.lockmemkb-cmdline.r
\ No newline at end of file
diff --git a/test/unittest/misc/tst.lockmemkb-envvar.sh b/test/unittest/misc/tst.lockmemkb-envvar.sh
new file mode 100755
index 00000000..dd008c0e
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-envvar.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+ulimit -l 1
+
+for val in 1 16384 unlimited; do
+	DTRACE_OPT_LOCKMEMKB=$val $dtrace -qn 'BEGIN { @ = avg(1234); exit(0); }'
+	echo $?
+done
+
+exit 0
diff --git a/test/unittest/misc/tst.lockmemkb-envvar.x b/test/unittest/misc/tst.lockmemkb-envvar.x
new file mode 120000
index 00000000..d132129b
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-envvar.x
@@ -0,0 +1 @@
+tst.lockmemkb-cmdline.x
\ No newline at end of file
diff --git a/test/unittest/misc/tst.lockmemkb-pragma.r b/test/unittest/misc/tst.lockmemkb-pragma.r
new file mode 120000
index 00000000..ca002833
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-pragma.r
@@ -0,0 +1 @@
+tst.lockmemkb-cmdline.r
\ No newline at end of file
diff --git a/test/unittest/misc/tst.lockmemkb-pragma.sh b/test/unittest/misc/tst.lockmemkb-pragma.sh
new file mode 100755
index 00000000..543b0332
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-pragma.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+ulimit -l 1
+
+for val in 1 16384 unlimited; do
+	$dtrace -qs /dev/stdin << EOF
+		#pragma D option lockmemkb=$val
+
+		BEGIN
+		{
+			@ = avg(1234);
+			exit(0);
+		}
+EOF
+	echo $?
+done
+
+exit 0
+
+
diff --git a/test/unittest/misc/tst.lockmemkb-pragma.x b/test/unittest/misc/tst.lockmemkb-pragma.x
new file mode 120000
index 00000000..d132129b
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-pragma.x
@@ -0,0 +1 @@
+tst.lockmemkb-cmdline.x
\ No newline at end of file
diff --git a/test/unittest/misc/tst.lockmemkb-x.r b/test/unittest/misc/tst.lockmemkb-x.r
new file mode 120000
index 00000000..ca002833
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-x.r
@@ -0,0 +1 @@
+tst.lockmemkb-cmdline.r
\ No newline at end of file
diff --git a/test/unittest/misc/tst.lockmemkb-x.sh b/test/unittest/misc/tst.lockmemkb-x.sh
new file mode 100755
index 00000000..48557a9b
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-x.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+ulimit -l 1
+
+for val in 1 16384 unlimited; do
+	$dtrace -xlockmemkb=$val -qn 'BEGIN { @ = avg(1234); exit(0); }'
+	echo $?
+done
+
+exit 0
diff --git a/test/unittest/misc/tst.lockmemkb-x.x b/test/unittest/misc/tst.lockmemkb-x.x
new file mode 120000
index 00000000..d132129b
--- /dev/null
+++ b/test/unittest/misc/tst.lockmemkb-x.x
@@ -0,0 +1 @@
+tst.lockmemkb-cmdline.x
\ No newline at end of file
-- 
2.18.4




More information about the DTrace-devel mailing list