[DTrace-devel] [PATCH v2 2/2] Fix copy from user space when accessing high-number pid entry args

eugene.loh at oracle.com eugene.loh at oracle.com
Thu Apr 21 01:07:33 UTC 2022


From: Eugene Loh <eugene.loh at oracle.com>

The pid-provider "entry" probes must access function arguments -- at
first from registers and, if there are many args, then also from the
stack.  We copied these stack locations using the BPF probe_read()
helper function.  Early versions of this function, however, were only
reliable for copying from kernel space.  Apparently, this issue was
irrelevant on x86_64, but caused probe_read() to return an error
code on aarch64 when copying from user space.

With Linux 5.4-rc6, 6ae08ae3dea2 "bpf: Add probe_read_{user, kernel} and
probe_read_{user, kernel}_str helpers" adds a probe_read_user() helper
function.  Unfortunately, the function seems not to be available in UEK6
(5.4.17).

Introduce a dt_bpf_func_id_in_range(), which checks if a function ID
is in-range for the installed kernel.

Change dt_cg_tramp_copy_args_from_regs() to use probe_read_user()
if available but probe_read() otherwise.

Change test pid/tst.args1.d to XFAIL on aarch64 for Linux kernels
before 5.5.  Technically, Linux 5.4 should be fine, but we have
seen the aforementioned problems on UEK6.

Incidentally, with Linux 5.7-rc6, 0ebeea8ca8a4 "bpf: Restrict
bpf_probe_read{, str}() only to archs where they work" turns
bpf_probe_read() off altogether on archs that have overlapping
kernel and user address ranges.  This does not affect x86, arm64,
or arm.

And with Linux 5.8-rc1, 8d92db5c04d1 "bpf: rework the compat kernel
probe handling", on archs with non-overlapping ranges (including
x86, arm64, and arm), bpf_probe_read() automatically chooses between
bpf_probe_read_user() and bpf_probe_read_kernel().  This current patch
becomes unnecessary, but it remains the proper thing to do.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_bpf.c                    | 57 +++++++++++++++++++++++++++
 libdtrace/dt_bpf.h                    |  1 +
 libdtrace/dt_cg.c                     | 21 ++++++++--
 test/unittest/pid/tst.args1.aarch64.x | 16 ++++++++
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100755 test/unittest/pid/tst.args1.aarch64.x

diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index 7a966474..8511ac96 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -565,6 +565,63 @@ dt_bpf_reloc_error_prog(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
 	dp->dtdo_brelen -= rp - nrp;
 }
 
+/*
+ * Check if func_id is in range (0 <= func_id < __BPF_FUNC_MAX_ID)
+ * for the loaded kernel.  We try to load a trivial BPF program that
+ * only calls a helper function;  the BPF verifier will report the
+ * "invalid func " message for out-of-range func IDs.
+ */
+int
+dt_bpf_func_id_in_range(int func_id)
+{
+	struct bpf_load_program_attr	attr;
+	struct bpf_insn			insns[2];
+	size_t				logsz = BPF_LOG_BUF_SIZE;
+	char				*log;
+	int				rc = 0, fd;
+	char				*p;
+	char				*invfunc = "invalid func ";
+
+	/* prepare to load a BPF program */
+	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
+	attr.prog_type = BPF_PROG_TYPE_KPROBE;
+	attr.insns = insns;
+	attr.insns_cnt = sizeof(insns) / sizeof(struct bpf_insn);
+	attr.license = BPF_CG_LICENSE;
+	attr.log_level = 1;
+
+	/* here is our trivial program */
+	memset(insns, 0, sizeof(insns));
+	insns[0].code = BPF_CALL | BPF_K | BPF_JMP;	/* call func_id */
+	insns[0].imm = func_id;
+	insns[1].code = BPF_EXIT | BPF_K | BPF_JMP;	/* exit */
+
+	log = malloc(logsz);
+	assert(log != NULL);
+
+	/* try to load the program */
+	fd = bpf_load_program_xattr(&attr, log, logsz);
+
+	/* success: we're done */
+	if (fd >= 0) {
+		rc = 1;
+		close(fd);
+		goto out;
+	}
+
+	/* "invalid func " means func_id is not in range */
+	for (p = log; p && *p; p = strchr(p, '\n') + 1)
+		if (strncmp(p, invfunc, strlen(invfunc)) == 0)
+			goto out;
+
+	/* otherwise, func_id is okay for our purposes */
+	rc = 1;
+
+out:
+	free(log);
+	return rc;
+}
+
 int
 dt_bpf_load_progs(dtrace_hdl_t *dtp, uint_t cflags)
 {
diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
index f7745956..b372a933 100644
--- a/libdtrace/dt_bpf.h
+++ b/libdtrace/dt_bpf.h
@@ -39,6 +39,7 @@ extern int dt_bpf_gmap_create(struct dtrace_hdl *);
 extern int dt_bpf_map_lookup(int fd, const void *key, void *val);
 extern int dt_bpf_map_update(int fd, const void *key, const void *val);
 extern int dt_bpf_map_delete(int fd, const void *key);
+extern int dt_bpf_func_id_in_range(int func_id);
 extern int dt_bpf_load_progs(struct dtrace_hdl *, uint_t);
 
 #ifdef	__cplusplus
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index d1505667..2bd08dd2 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -275,7 +275,7 @@ void
 dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
 {
 	dt_irlist_t	*dlp = &pcb->pcb_ir;
-	int		i;
+	int		i, bpf_probe_read_func_id;
 
 	/*
 	 *	for (i = 0; i < PT_REGS_ARGC; i++)
@@ -304,6 +304,21 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
 	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(7), BPF_REG_0));
 #endif
 
+	/*
+	 * Provisionally set the read function ID to BPF_FUNC_probe_read_user.
+	 * Since that function might not be defined in the installed linux/bpf.h,
+	 * we hardwire the value in.
+	 */
+	bpf_probe_read_func_id = 112;
+
+	/* Pick the BPF probe_read helper function to use.  Since we
+	 * will be reading from user space, try to use bpf_probe_read_user().
+	 * Unfortunately, it is not available until Linux 5.4... and yet not in
+	 * UEK6.  We try our best but fall back to bpf_probe_read() if necessary.
+	 */
+	if (! dt_bpf_func_id_in_range(bpf_probe_read_func_id))
+		bpf_probe_read_func_id = BPF_FUNC_probe_read;
+
 	/*
 	 * Generate code to read the remainder of the arguments from the stack
 	 * (if possible).  We (ab)use the %r0 spill slot on the stack to read
@@ -316,7 +331,7 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
 	 *		uint64_t	*sp;
 	 *
 	 *		sp = (uint64_t *)(((dt_pt_regs *)rp)->sp;
-	 *		rc = bpf_probe_read(dctx->mst->argv[i],
+	 *		rc = bpf_probe_read[_user](dctx->mst->argv[i],
 	 *				    sizeof(uint64_t),
 	 *				    &sp[i - PT_REGS_ARGC +
 	 *					    PT_REGS_ARGSTKBASE]);
@@ -346,7 +361,7 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
 		emit(dlp,  BPF_MOV_IMM(BPF_REG_2, sizeof(uint64_t)));
 		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_3, rp, PT_REGS_SP));
 		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, (i - PT_REGS_ARGC + PT_REGS_ARGSTKBASE) * sizeof(uint64_t)));
-		emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_probe_read));
+		emit(dlp,  BPF_CALL_HELPER(bpf_probe_read_func_id));
 		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_ok));
 		emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(i), 0));
 		emitl(dlp, lbl_ok,
diff --git a/test/unittest/pid/tst.args1.aarch64.x b/test/unittest/pid/tst.args1.aarch64.x
new file mode 100755
index 00000000..8e529fcc
--- /dev/null
+++ b/test/unittest/pid/tst.args1.aarch64.x
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
+
+if [ $MAJOR -gt 5 ]; then
+        exit 0
+fi
+if [ $MAJOR -eq 5 -a $MINOR -gt 4 ]; then
+	exit 0
+fi
+
+# Technically, Linux 5.4 should also work,
+# but there seem to be problems with UEK6.
+
+echo "pid entry probes have bad arg8/arg9 on pre-5.5 kernels on ARM"
+exit 1
-- 
2.18.4




More information about the DTrace-devel mailing list