[DTrace-devel] [PATCH 02/61] Fix copy from user space when accessing high-number pid entry args
Kris Van Hees
kris.van.hees at oracle.com
Thu Jul 28 12:18:22 UTC 2022
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
... although I modified it to make use of my 'BPF helper mapping' patch that
provides similar functionality in terms of being able to determine which BPF
helper to use for probe_read[_(user|kernel)].
... queued to go into dev after my patch series
On Fri, Jul 08, 2022 at 10:44:46AM -0400, eugene.loh--- via DTrace-devel wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
>
> The pid-provider "entry" probes must access function arguments -- at
> first from registers and, if there are many args, then also from the
> stack. We copied these stack locations using the BPF probe_read()
> helper function. Early versions of this function, however, were only
> reliable for copying from kernel space. Apparently, this issue was
> irrelevant on x86_64, but caused probe_read() to return an error
> code on aarch64 when copying from user space.
>
> With Linux 5.4-rc6, 6ae08ae3dea2 "bpf: Add probe_read_{user, kernel} and
> probe_read_{user, kernel}_str helpers" adds a probe_read_user() helper
> function. Unfortunately, the function seems not to be available in UEK6
> (5.4.17).
>
> Introduce a dt_bpf_func_id_in_range(), which checks if a function ID
> is in-range for the installed kernel.
>
> Change dt_cg_tramp_copy_args_from_regs() to use probe_read_user()
> if available but probe_read() otherwise.
>
> Change test pid/tst.args1.d to XFAIL on aarch64 for Linux kernels
> before 5.5. Technically, Linux 5.4 should be fine, but we have
> seen the aforementioned problems on UEK6.
>
> Incidentally, with Linux 5.7-rc6, 0ebeea8ca8a4 "bpf: Restrict
> bpf_probe_read{, str}() only to archs where they work" turns
> bpf_probe_read() off altogether on archs that have overlapping
> kernel and user address ranges. This does not affect x86, arm64,
> or arm.
>
> And with Linux 5.8-rc1, 8d92db5c04d1 "bpf: rework the compat kernel
> probe handling", on archs with non-overlapping ranges (including
> x86, arm64, and arm), bpf_probe_read() automatically chooses between
> bpf_probe_read_user() and bpf_probe_read_kernel(). This current patch
> becomes unnecessary, but it remains the proper thing to do.
>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
> libdtrace/dt_bpf.c | 57 +++++++++++++++++++++++++++
> libdtrace/dt_bpf.h | 1 +
> libdtrace/dt_cg.c | 21 ++++++++--
> test/unittest/pid/tst.args1.aarch64.x | 16 ++++++++
> 4 files changed, 92 insertions(+), 3 deletions(-)
> create mode 100755 test/unittest/pid/tst.args1.aarch64.x
>
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 31003eb6..e68bf561 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -535,6 +535,63 @@ dt_bpf_reloc_error_prog(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
> dp->dtdo_brelen -= rp - nrp;
> }
>
> +/*
> + * Check if func_id is in range (0 <= func_id < __BPF_FUNC_MAX_ID)
> + * for the loaded kernel. We try to load a trivial BPF program that
> + * only calls a helper function; the BPF verifier will report the
> + * "invalid func " message for out-of-range func IDs.
> + */
> +int
> +dt_bpf_func_id_in_range(int func_id)
> +{
> + struct bpf_load_program_attr attr;
> + struct bpf_insn insns[2];
> + size_t logsz = BPF_LOG_BUF_SIZE;
> + char *log;
> + int rc = 0, fd;
> + char *p;
> + char *invfunc = "invalid func ";
> +
> + /* prepare to load a BPF program */
> + memset(&attr, 0, sizeof(struct bpf_load_program_attr));
> + attr.prog_type = BPF_PROG_TYPE_KPROBE;
> + attr.insns = insns;
> + attr.insns_cnt = sizeof(insns) / sizeof(struct bpf_insn);
> + attr.license = BPF_CG_LICENSE;
> + attr.log_level = 1;
> +
> + /* here is our trivial program */
> + memset(insns, 0, sizeof(insns));
> + insns[0].code = BPF_CALL | BPF_K | BPF_JMP; /* call func_id */
> + insns[0].imm = func_id;
> + insns[1].code = BPF_EXIT | BPF_K | BPF_JMP; /* exit */
> +
> + log = malloc(logsz);
> + assert(log != NULL);
> +
> + /* try to load the program */
> + fd = bpf_load_program_xattr(&attr, log, logsz);
> +
> + /* success: we're done */
> + if (fd >= 0) {
> + rc = 1;
> + close(fd);
> + goto out;
> + }
> +
> + /* "invalid func " means func_id is not in range */
> + for (p = log; p && *p; p = strchr(p, '\n') + 1)
> + if (strncmp(p, invfunc, strlen(invfunc)) == 0)
> + goto out;
> +
> + /* otherwise, func_id is okay for our purposes */
> + rc = 1;
> +
> +out:
> + free(log);
> + return rc;
> +}
> +
> int
> dt_bpf_load_progs(dtrace_hdl_t *dtp, uint_t cflags)
> {
> diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
> index 713c446c..12b9fce3 100644
> --- a/libdtrace/dt_bpf.h
> +++ b/libdtrace/dt_bpf.h
> @@ -45,6 +45,7 @@ extern int dt_bpf_gmap_create(struct dtrace_hdl *);
> extern int dt_bpf_map_lookup(int fd, const void *key, void *val);
> extern int dt_bpf_map_update(int fd, const void *key, const void *val);
> extern int dt_bpf_map_delete(int fd, const void *key);
> +extern int dt_bpf_func_id_in_range(int func_id);
> extern int dt_bpf_load_progs(struct dtrace_hdl *, uint_t);
>
> #ifdef __cplusplus
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 823fe8e3..58f0ddd1 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -275,7 +275,7 @@ void
> dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
> {
> dt_irlist_t *dlp = &pcb->pcb_ir;
> - int i;
> + int i, bpf_probe_read_func_id;
>
> /*
> * for (i = 0; i < PT_REGS_ARGC; i++)
> @@ -304,6 +304,21 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
> emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(7), BPF_REG_0));
> #endif
>
> + /*
> + * Provisionally set the read function ID to BPF_FUNC_probe_read_user.
> + * Since that function might not be defined in the installed linux/bpf.h,
> + * we hardwire the value in.
> + */
> + bpf_probe_read_func_id = 112;
> +
> + /* Pick the BPF probe_read helper function to use. Since we
> + * will be reading from user space, try to use bpf_probe_read_user().
> + * Unfortunately, it is not available until Linux 5.4... and yet not in
> + * UEK6. We try our best but fall back to bpf_probe_read() if necessary.
> + */
> + if (! dt_bpf_func_id_in_range(bpf_probe_read_func_id))
> + bpf_probe_read_func_id = BPF_FUNC_probe_read;
> +
> /*
> * Generate code to read the remainder of the arguments from the stack
> * (if possible). We (ab)use the %r0 spill slot on the stack to read
> @@ -316,7 +331,7 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
> * uint64_t *sp;
> *
> * sp = (uint64_t *)(((dt_pt_regs *)rp)->sp;
> - * rc = bpf_probe_read(dctx->mst->argv[i],
> + * rc = bpf_probe_read[_user](dctx->mst->argv[i],
> * sizeof(uint64_t),
> * &sp[i - PT_REGS_ARGC +
> * PT_REGS_ARGSTKBASE]);
> @@ -346,7 +361,7 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
> emit(dlp, BPF_MOV_IMM(BPF_REG_2, sizeof(uint64_t)));
> emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, rp, PT_REGS_SP));
> emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, (i - PT_REGS_ARGC + PT_REGS_ARGSTKBASE) * sizeof(uint64_t)));
> - emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
> + emit(dlp, BPF_CALL_HELPER(bpf_probe_read_func_id));
> emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_ok));
> emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(i), 0));
> emitl(dlp, lbl_ok,
> diff --git a/test/unittest/pid/tst.args1.aarch64.x b/test/unittest/pid/tst.args1.aarch64.x
> new file mode 100755
> index 00000000..8e529fcc
> --- /dev/null
> +++ b/test/unittest/pid/tst.args1.aarch64.x
> @@ -0,0 +1,16 @@
> +#!/bin/sh
> +
> +read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
> +
> +if [ $MAJOR -gt 5 ]; then
> + exit 0
> +fi
> +if [ $MAJOR -eq 5 -a $MINOR -gt 4 ]; then
> + exit 0
> +fi
> +
> +# Technically, Linux 5.4 should also work,
> +# but there seem to be problems with UEK6.
> +
> +echo "pid entry probes have bad arg8/arg9 on pre-5.5 kernels on ARM"
> +exit 1
> --
> 2.18.4
>
>
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list