[DTrace-devel] [PATCH 2/2] Fix copy from user space when accessing high-number pid entry args

Kris Van Hees kris.van.hees at oracle.com
Wed Apr 20 02:53:37 UTC 2022


I think I would rather see logic that tests for a specific helper id rather
than try to determine the highest supported function id.  All that is really
needed it knowing whether the running kernel supports a specific BPF helper.

On Fri, Apr 15, 2022 at 02:53:51PM -0400, eugene.loh--- via DTrace-devel wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> The pid-provider "entry" probes must access function arguments -- at
> first from registers and, if there are many args, then also from the
> stack.  We copied these stack locations using the BPF probe_read()
> helper function.  Early versions of this function, however, were only
> reliable for copying from kernel space.  Apparently, this issue was
> irrelevant on x86_64, but caused probe_read() to return an error
> code on aarch64 when copying from user space.
> 
> With Linux 5.4-rc6, 6ae08ae3dea2 "bpf: Add probe_read_{user, kernel} and
> probe_read_{user, kernel}_str helpers" adds a probe_read_user() helper
> function.  Unfortunately, the function seems not to be available in UEK6
> (5.4.17), even when it appears in the installed /usr/include/linux/bpf.h
> header.
> 
> Introduce a dt_bpf_func_max_id, which ideally will be __BPF_FUNC_MAX_ID
> from bpf.h but is lowered empirically if necessary.
> 
> Change dt_cg_tramp_copy_args_from_regs() to use probe_read_user()
> if available but probe_read() otherwise.
> 
> Change test pid/tst.args1.d to XFAIL on aarch64 for Linux kernels
> before 5.5.  Technically, Linux 5.4 should be fine, but we have
> seen the aforementioned problems on UEK6.
> 
> Incidentally, with Linux 5.7-rc6, 0ebeea8ca8a4 "bpf: Restrict
> bpf_probe_read{, str}() only to archs where they work" turns
> bpf_probe_read() off altogether on archs that have overlapping
> kernel and user address ranges.  This does not affect x86, arm64,
> or arm.
> 
> And with Linux 5.8-rc1, 8d92db5c04d1 "bpf: rework the compat kernel
> probe handling", on archs with non-overlapping ranges (including
> x86, arm64, and arm), bpf_probe_read() automatically chooses between
> bpf_probe_read_user() and bpf_probe_read_kernel().  This current patch
> becomes unnecessary, but it remains the proper thing to do.
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>  libdtrace/dt_bpf.c                    | 72 +++++++++++++++++++++++++++
>  libdtrace/dt_bpf.h                    |  1 +
>  libdtrace/dt_cg.c                     | 19 +++++--
>  libdtrace/dt_impl.h                   |  1 +
>  libdtrace/dt_open.c                   |  2 +
>  test/unittest/pid/tst.args1.aarch64.x | 16 ++++++
>  6 files changed, 108 insertions(+), 3 deletions(-)
>  create mode 100755 test/unittest/pid/tst.args1.aarch64.x
> 
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index ade09b15..f4ab9b4c 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -552,6 +552,78 @@ dt_bpf_reloc_error_prog(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
>  	dp->dtdo_brelen -= rp - nrp;
>  }
>  
> +/*
> + * Set dt_bpf_func_max_id.  While it should be __BPF_FUNC_MAX_ID from
> + * /usr/include/linux/bpf.h, we have seen cases where the kernel was
> + * apparently built with a smaller value than what is seen in the installed
> + * header file.  So we try to load a trivial BPF program that calls a helper
> + * function;  we look for the "invalid func " message (emitted by the BPF
> + * verifier for out-of-range func IDs) to go away to find out what
> + * __BPF_FUNC_MAX_ID value the kernel is actually using.
> + */
> +int
> +dt_bpf_func_max_id_set(dtrace_hdl_t *dtp)
> +{
> +	struct bpf_load_program_attr	attr;
> +	struct bpf_insn			insns[2];
> +	size_t				logsz = BPF_LOG_BUF_SIZE;
> +	char				*log;
> +	int				rc = 0, func_id;
> +	char				*p;
> +	char				*invfunc = "invalid func ";
> +
> +	/* prepare to load a BPF program */
> +	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
> +	attr.prog_type = BPF_PROG_TYPE_KPROBE;
> +	attr.insns = insns;
> +	attr.insns_cnt = sizeof(insns) / sizeof(struct bpf_insn);
> +	attr.license = BPF_CG_LICENSE;
> +	attr.log_level = 1;
> +
> +	/* here is our trivial program, missing only the func id */
> +	memset(insns, 0, sizeof(insns));
> +	insns[0].code = BPF_CALL | BPF_K | BPF_JMP;	/* call */
> +	insns[1].code = BPF_EXIT | BPF_K | BPF_JMP;	/* exit */
> +
> +	log = dt_zalloc(dtp, logsz);
> +	assert(log != NULL);
> +
> +	for (func_id = __BPF_FUNC_MAX_ID - 1; func_id >= 0; func_id--) {
> +		int fd;
> +
> +		/* set the func id and try to load the program */
> +		insns[0].imm = func_id;
> +		fd = bpf_load_program_xattr(&attr, log, logsz);
> +
> +		/* success: we're done */
> +		if (fd >= 0) {
> +			close(fd);
> +			dtp->dt_bpf_func_max_id = func_id + 1;
> +			goto out;
> +		}
> +
> +		/* "invalid func " means func_id is still too high */
> +		for (p = log; p && *p; p = strchr(p, '\n') + 1)
> +			if (strncmp(p, invfunc, strlen(invfunc)) == 0)
> +				goto still_too_high;
> +
> +		/* otherwise, func_id is okay for our purposes */
> +		dtp->dt_bpf_func_max_id = func_id + 1;
> +		goto out;
> +
> +still_too_high:
> +		continue;
> +	}
> +
> +	/* something is wrong if we always saw "invalid func " */
> +	dtp->dt_bpf_func_max_id = 0;
> +	rc = 1;
> +
> +out:
> +	dt_free(dtp, log);
> +	return rc;
> +}
> +
>  int
>  dt_bpf_load_progs(dtrace_hdl_t *dtp, uint_t cflags)
>  {
> diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
> index f7745956..e96f37b0 100644
> --- a/libdtrace/dt_bpf.h
> +++ b/libdtrace/dt_bpf.h
> @@ -39,6 +39,7 @@ extern int dt_bpf_gmap_create(struct dtrace_hdl *);
>  extern int dt_bpf_map_lookup(int fd, const void *key, void *val);
>  extern int dt_bpf_map_update(int fd, const void *key, const void *val);
>  extern int dt_bpf_map_delete(int fd, const void *key);
> +extern int dt_bpf_func_max_id_set(struct dtrace_hdl *dtp);
>  extern int dt_bpf_load_progs(struct dtrace_hdl *, uint_t);
>  
>  #ifdef	__cplusplus
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index a149c575..ea12a8f3 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -270,7 +270,7 @@ void
>  dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
>  {
>  	dt_irlist_t	*dlp = &pcb->pcb_ir;
> -	int		i;
> +	int		i, bpf_probe_read_func_id;
>  
>  	/*
>  	 *	for (i = 0; i < PT_REGS_ARGC; i++)
> @@ -299,6 +299,19 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
>  	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(7), BPF_REG_0));
>  #endif
>  
> +	/* Pick the BPF probe_read helper function to use.  Since we
> +	 * will be reading from user space, try to use bpf_probe_read_user().
> +	 * Unfortunately, it is not available until Linux 5.4... and yet not
> +	 * in UEK6.  In fact, even if the function is found in the installed
> +	 * bpf.h header file, the kernel might still not recognize it.  So,
> +	 * we try our best but fall back to bpf_probe_read() if necessary.
> +	 */
> +	bpf_probe_read_func_id = BPF_FUNC_probe_read;
> +#ifdef BPF_FUNC_probe_read_user
> +	if (BPF_FUNC_probe_read_user < dtp->dt_bpf_func_max_id)
> +		bpf_probe_read_func_id = BPF_FUNC_probe_read_user;
> +#endif
> +
>  	/*
>  	 * Generate code to read the remainder of the arguments from the stack
>  	 * (if possible).  We (ab)use the %r0 spill slot on the stack to read
> @@ -311,7 +324,7 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
>  	 *		uint64_t	*sp;
>  	 *
>  	 *		sp = (uint64_t *)(((dt_pt_regs *)rp)->sp;
> -	 *		rc = bpf_probe_read(dctx->mst->argv[i],
> +	 *		rc = bpf_probe_read[_user](dctx->mst->argv[i],
>  	 *				    sizeof(uint64_t),
>  	 *				    &sp[i - PT_REGS_ARGC +
>  	 *					    PT_REGS_ARGSTKBASE]);
> @@ -341,7 +354,7 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int rp)
>  		emit(dlp,  BPF_MOV_IMM(BPF_REG_2, sizeof(uint64_t)));
>  		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_3, rp, PT_REGS_SP));
>  		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, (i - PT_REGS_ARGC + PT_REGS_ARGSTKBASE) * sizeof(uint64_t)));
> -		emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_probe_read));
> +		emit(dlp,  BPF_CALL_HELPER(bpf_probe_read_func_id));
>  		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_ok));
>  		emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(i), 0));
>  		emitl(dlp, lbl_ok,
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index 747815ac..ebe1dfc9 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -416,6 +416,7 @@ struct dtrace_hdl {
>  	dt_list_t dt_lib_dep_sorted;	/* dependency sorted library list */
>  	dt_global_pcap_t dt_pcap; /* global tshark/pcap state */
>  	char *dt_freopen_filename; /* filename for freopen() action */
> +	int dt_bpf_func_max_id;	/* exclusive upper bound for BPF helper funcs */
>  };
>  
>  /*
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index 55277291..7cd25ca9 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -38,6 +38,7 @@
>  #include <dt_probe.h>
>  #include <dt_dis.h>
>  #include <dt_peb.h>
> +#include <dt_bpf.h>
>  
>  const dt_version_t _dtrace_versions[] = {
>  	DT_VERS_1_0,	/* D API 1.0.0 (PSARC 2001/466) Solaris 10 FCS */
> @@ -753,6 +754,7 @@ dt_vopen(int version, int flags, int *errp,
>  	pthread_mutex_init(&dtp->dt_sprintf_lock, NULL);
>  	dt_dof_init(dtp);
>  	uname(&dtp->dt_uts);
> +	dt_bpf_func_max_id_set(dtp);
>  
>  	/*
>  	 * The default module path is derived in part from the utsname release
> diff --git a/test/unittest/pid/tst.args1.aarch64.x b/test/unittest/pid/tst.args1.aarch64.x
> new file mode 100755
> index 00000000..f3d82eea
> --- /dev/null
> +++ b/test/unittest/pid/tst.args1.aarch64.x
> @@ -0,0 +1,16 @@
> +#!/bin/sh
> +
> +read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
> +
> +if [ $MAJOR -gt 5 ]; then
> +        exit 0
> +fi
> +if [ $MAJOR -eq 5 -a $MINOR -gt 4 ]; then
> +	exit 0
> +fi
> +
> +# Technically, Linux 5.4 should also work,
> +# but there seem to be problems with UEK6.
> +
> +echo "pid entry probes have bad arg8/arg9 on pre-5.5 kernels on ARM"
> +exit 1
> -- 
> 2.27.0
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list