[DTrace-devel] [PATCH 2/2] Fix copy from user space when accessing high-number pid entry args
eugene.loh at oracle.com
eugene.loh at oracle.com
Fri Apr 15 18:53:51 UTC 2022
From: Eugene Loh <eugene.loh at oracle.com>
The pid-provider "entry" probes must access function arguments -- at
first from registers and, if there are many args, then also from the
stack. We copied these stack locations using the BPF probe_read()
helper function. Early versions of this function, however, were only
reliable for copying from kernel space. Apparently, this issue was
irrelevant on x86_64, but caused probe_read() to return an error
code on aarch64 when copying from user space.
With Linux 5.4-rc6, 6ae08ae3dea2 "bpf: Add probe_read_{user, kernel} and
probe_read_{user, kernel}_str helpers" adds a probe_read_user() helper
function. Unfortunately, the function seems not to be available in UEK6
(5.4.17), even when it appears in the installed /usr/include/linux/bpf.h
header.
Introduce a dt_bpf_func_max_id, which ideally will be __BPF_FUNC_MAX_ID
from bpf.h but is lowered empirically if necessary.
Change dt_cg_tramp_copy_args_from_regs() to use probe_read_user()
if available but probe_read() otherwise.
Change test pid/tst.args1.d to XFAIL on aarch64 for Linux kernels
before 5.5. Technically, Linux 5.4 should be fine, but we have
seen the aforementioned problems on UEK6.
Incidentally, with Linux 5.7-rc6, 0ebeea8ca8a4 "bpf: Restrict
bpf_probe_read{, str}() only to archs where they work" turns
bpf_probe_read() off altogether on archs that have overlapping
kernel and user address ranges. This does not affect x86, arm64,
or arm.
And with Linux 5.8-rc1, 8d92db5c04d1 "bpf: rework the compat kernel
probe handling", on archs with non-overlapping ranges (including
x86, arm64, and arm), bpf_probe_read() automatically chooses between
bpf_probe_read_user() and bpf_probe_read_kernel(). This current patch
becomes unnecessary, but it remains the proper thing to do.
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
libdtrace/dt_bpf.c | 72 +++++++++++++++++++++++++++
libdtrace/dt_bpf.h | 1 +
libdtrace/dt_cg.c | 19 +++++--
libdtrace/dt_impl.h | 1 +
libdtrace/dt_open.c | 2 +
test/unittest/pid/tst.args1.aarch64.x | 16 ++++++
6 files changed, 108 insertions(+), 3 deletions(-)
create mode 100755 test/unittest/pid/tst.args1.aarch64.x
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index ade09b15..f4ab9b4c 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -552,6 +552,78 @@ dt_bpf_reloc_error_prog(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
dp->dtdo_brelen -= rp - nrp;
}
+/*
+ * Set dt_bpf_func_max_id. While it should be __BPF_FUNC_MAX_ID from
+ * /usr/include/linux/bpf.h, we have seen cases where the kernel was
+ * apparently built with a smaller value than what is seen in the installed
+ * header file. So we try to load a trivial BPF program that calls a helper
+ * function; we look for the "invalid func " message (emitted by the BPF
+ * verifier for out-of-range func IDs) to go away to find out what
+ * __BPF_FUNC_MAX_ID value the kernel is actually using.
+ */
+int
+dt_bpf_func_max_id_set(dtrace_hdl_t *dtp)
+{
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[2];
+ size_t logsz = BPF_LOG_BUF_SIZE;
+ char *log;
+ int rc = 0, func_id;
+ char *p;
+ char *invfunc = "invalid func ";
+
+ /* prepare to load a BPF program */
+ memset(&attr, 0, sizeof(struct bpf_load_program_attr));
+ attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ attr.insns = insns;
+ attr.insns_cnt = sizeof(insns) / sizeof(struct bpf_insn);
+ attr.license = BPF_CG_LICENSE;
+ attr.log_level = 1;
+
+ /* here is our trivial program, missing only the func id */
+ memset(insns, 0, sizeof(insns));
+ insns[0].code = BPF_CALL | BPF_K | BPF_JMP; /* call */
+ insns[1].code = BPF_EXIT | BPF_K | BPF_JMP; /* exit */
+
+ log = dt_zalloc(dtp, logsz);
+ assert(log != NULL);
+
+ for (func_id = __BPF_FUNC_MAX_ID - 1; func_id >= 0; func_id--) {
+ int fd;
+
+ /* set the func id and try to load the program */
+ insns[0].imm = func_id;
+ fd = bpf_load_program_xattr(&attr, log, logsz);
+
+ /* success: we're done */
+ if (fd >= 0) {
+ close(fd);
+ dtp->dt_bpf_func_max_id = func_id + 1;
+ goto out;
+ }
+
+ /* "invalid func " means func_id is still too high */
+ for (p = log; p && *p; p = strchr(p, '\n') + 1)
+ if (strncmp(p, invfunc, strlen(invfunc)) == 0)
+ goto still_too_high;
+
+ /* otherwise, func_id is okay for our purposes */
+ dtp->dt_bpf_func_max_id = func_id + 1;
+ goto out;
+
+still_too_high:
+ continue;
+ }
+
+ /* something is wrong if we always saw "invalid func " */
+ dtp->dt_bpf_func_max_id = 0;
+ rc = 1;
+
+out:
+ dt_free(dtp, log);
+ return rc;
+}
+
int
dt_bpf_load_progs(dtrace_hdl_t *dtp, uint_t cflags)
{
diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
index f7745956..e96f37b0 100644
--- a/libdtrace/dt_bpf.h
+++ b/libdtrace/dt_bpf.h
@@ -39,6 +39,7 @@ extern int dt_bpf_gmap_create(struct dtrace_hdl *);
extern int dt_bpf_map_lookup(int fd, const void *key, void *val);
extern int dt_bpf_map_update(int fd, const void *key, const void *val);
extern int dt_bpf_map_delete(int fd, const void *key);
+extern int dt_bpf_func_max_id_set(struct dtrace_hdl *dtp);
extern int dt_bpf_load_progs(struct dtrace_hdl *, uint_t);
#ifdef __cplusplus
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index a149c575..ea12a8f3 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -270,7 +270,7 @@ void
dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
{
dt_irlist_t *dlp = &pcb->pcb_ir;
- int i;
+ int i, bpf_probe_read_func_id;
/*
* for (i = 0; i < PT_REGS_ARGC; i++)
@@ -299,6 +299,19 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(7), BPF_REG_0));
#endif
+ /* Pick the BPF probe_read helper function to use. Since we
+ * will be reading from user space, try to use bpf_probe_read_user().
+ * Unfortunately, it is not available until Linux 5.4... and yet not
+ * in UEK6. In fact, even if the function is found in the installed
+ * bpf.h header file, the kernel might still not recognize it. So,
+ * we try our best but fall back to bpf_probe_read() if necessary.
+ */
+ bpf_probe_read_func_id = BPF_FUNC_probe_read;
+#ifdef BPF_FUNC_probe_read_user
+ if (BPF_FUNC_probe_read_user < dtp->dt_bpf_func_max_id)
+ bpf_probe_read_func_id = BPF_FUNC_probe_read_user;
+#endif
+
/*
* Generate code to read the remainder of the arguments from the stack
* (if possible). We (ab)use the %r0 spill slot on the stack to read
@@ -311,7 +324,7 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
* uint64_t *sp;
*
* sp = (uint64_t *)(((dt_pt_regs *)rp)->sp;
- * rc = bpf_probe_read(dctx->mst->argv[i],
+ * rc = bpf_probe_read[_user](dctx->mst->argv[i],
* sizeof(uint64_t),
* &sp[i - PT_REGS_ARGC +
* PT_REGS_ARGSTKBASE]);
@@ -341,7 +354,7 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
emit(dlp, BPF_MOV_IMM(BPF_REG_2, sizeof(uint64_t)));
emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, rp, PT_REGS_SP));
emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, (i - PT_REGS_ARGC + PT_REGS_ARGSTKBASE) * sizeof(uint64_t)));
- emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
+ emit(dlp, BPF_CALL_HELPER(bpf_probe_read_func_id));
emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_ok));
emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(i), 0));
emitl(dlp, lbl_ok,
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 747815ac..ebe1dfc9 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -416,6 +416,7 @@ struct dtrace_hdl {
dt_list_t dt_lib_dep_sorted; /* dependency sorted library list */
dt_global_pcap_t dt_pcap; /* global tshark/pcap state */
char *dt_freopen_filename; /* filename for freopen() action */
+ int dt_bpf_func_max_id; /* exclusive upper bound for BPF helper funcs */
};
/*
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 55277291..7cd25ca9 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -38,6 +38,7 @@
#include <dt_probe.h>
#include <dt_dis.h>
#include <dt_peb.h>
+#include <dt_bpf.h>
const dt_version_t _dtrace_versions[] = {
DT_VERS_1_0, /* D API 1.0.0 (PSARC 2001/466) Solaris 10 FCS */
@@ -753,6 +754,7 @@ dt_vopen(int version, int flags, int *errp,
pthread_mutex_init(&dtp->dt_sprintf_lock, NULL);
dt_dof_init(dtp);
uname(&dtp->dt_uts);
+ dt_bpf_func_max_id_set(dtp);
/*
* The default module path is derived in part from the utsname release
diff --git a/test/unittest/pid/tst.args1.aarch64.x b/test/unittest/pid/tst.args1.aarch64.x
new file mode 100755
index 00000000..f3d82eea
--- /dev/null
+++ b/test/unittest/pid/tst.args1.aarch64.x
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
+
+if [ $MAJOR -gt 5 ]; then
+ exit 0
+fi
+if [ $MAJOR -eq 5 -a $MINOR -gt 4 ]; then
+ exit 0
+fi
+
+# Technically, Linux 5.4 should also work,
+# but there seem to be problems with UEK6.
+
+echo "pid entry probes have bad arg8/arg9 on pre-5.5 kernels on ARM"
+exit 1
--
2.27.0
More information about the DTrace-devel
mailing list