[DTrace-devel] [PATCH v3 05/12] Add support for the stack() action

Kris Van Hees kris.van.hees at oracle.com
Fri Jun 11 20:44:43 PDT 2021


Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

... with minor change listed below

On Fri, Jun 11, 2021 at 08:28:28PM -0400, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> Implement the stack() action using the bpf_get_stack() helper
> function.  This implementation most closely resembles the legacy
> DTrace implementation.  Someday it may make sense to switch over
> to the bpf_get_stackid() instead, which would allow consolidation
> of like stacks.
> 
> The max stack size can be controlled by the kernel parameter
> perf_event_max_stack, and we would like to allow users to increase
> that limit.  We will eventually need to know this limit in various
> places.  So, add a dtp->dt_options[DTRACEOPT_MAXFRAMES] value to
> store that value.  Since the value is stored as an option, it can
> further be reduced below the kernel value by the user.
> 
> Change the stack() testing.  Specifically, replace the default
> test since BEGIN no longer has a kernel stack.
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>  include/dtrace/options_defines.h             |  3 +-
>  libdtrace/dt_cg.c                            | 51 ++++++++++++++++----
>  libdtrace/dt_consume.c                       | 20 ++++++--
>  libdtrace/dt_impl.h                          |  1 +
>  libdtrace/dt_open.c                          |  8 +++
>  libdtrace/dt_options.c                       |  1 +
>  test/unittest/stack/tst.default.d            | 21 --------
>  test/unittest/stack/tst.default.r            |  1 -
>  test/unittest/stack/tst.default.r.p          |  5 --
>  test/unittest/stack/tst.stack3_fbt.aarch64.r | 11 +++++
>  test/unittest/stack/tst.stack3_fbt.d         | 25 ++++++++++
>  test/unittest/stack/tst.stack3_fbt.x86_64.r  | 11 +++++
>  test/unittest/stack/tst.stack_fbt.aarch64.r  | 14 ++++++
>  test/unittest/stack/tst.stack_fbt.d          | 25 ++++++++++
>  test/unittest/stack/tst.stack_fbt.x86_64.r   | 13 +++++
>  15 files changed, 168 insertions(+), 42 deletions(-)
>  delete mode 100644 test/unittest/stack/tst.default.d
>  delete mode 100644 test/unittest/stack/tst.default.r
>  delete mode 100755 test/unittest/stack/tst.default.r.p
>  create mode 100644 test/unittest/stack/tst.stack3_fbt.aarch64.r
>  create mode 100644 test/unittest/stack/tst.stack3_fbt.d
>  create mode 100644 test/unittest/stack/tst.stack3_fbt.x86_64.r
>  create mode 100644 test/unittest/stack/tst.stack_fbt.aarch64.r
>  create mode 100644 test/unittest/stack/tst.stack_fbt.d
>  create mode 100644 test/unittest/stack/tst.stack_fbt.x86_64.r
> 
> diff --git a/include/dtrace/options_defines.h b/include/dtrace/options_defines.h
> index 18ea641f..bda4f739 100644
> --- a/include/dtrace/options_defines.h
> +++ b/include/dtrace/options_defines.h
> @@ -58,7 +58,8 @@
>  #define	DTRACEOPT_NORESOLVE	28      /* prevent resolution of symbols */
>  #define	DTRACEOPT_PCAPSIZE	29	/* number of bytes to be captured */
>  #define	DTRACEOPT_BPFLOGSIZE	30	/* BPF verifier log, max # bytes */
> -#define	DTRACEOPT_MAX		31      /* number of options */
> +#define	DTRACEOPT_MAXFRAMES	31	/* maximum number of stack frames */
> +#define	DTRACEOPT_MAX		32      /* number of options */
>  
>  #define	DTRACEOPT_UNSET		(dtrace_optval_t)-2	/* unset option */
>  
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index f7168ffe..a36fbd1c 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -1333,21 +1333,54 @@ dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
>  static void
>  dt_cg_act_stack(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
>  {
> -	dt_node_t *arg = dnp->dn_args;
> -#ifdef FIXME
> -	uint32_t nframes = 0;
> -#endif
> +	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
> +	dt_irlist_t	*dlp = &pcb->pcb_ir;
> +	dt_regset_t	*drp = pcb->pcb_regs;
> +	dt_node_t	*arg = dnp->dn_args;
> +	int		nframes = dtp->dt_options[DTRACEOPT_STACKFRAMES];
> +	int		skip = 0;
> +	uint_t		off;
> +	uint_t		lbl_valid = dt_irlist_label(dlp);
> +
> +	if (nframes == DTRACEOPT_UNSET)
> +		nframes = _dtrace_stackframes;
>  
>  	if (arg != NULL) {
> -		if (!dt_node_is_posconst(arg)) {
> +		if (!dt_node_is_posconst(arg))
>  			dnerror(arg, D_STACK_SIZE, "stack( ) argument #1 must "
>  				"be a non-zero positive integer constant\n");
> -		}
>  
> -#ifdef FIXME
> -		nframes = (uint32_t)arg->dn_value;
> -#endif
> +		nframes = arg->dn_value;
>  	}
> +
> +	/*
> +	 * If more frames are requested than allowed, silently reduce nframes.
> +	 */
> +	if (nframes > dtp->dt_options[DTRACEOPT_MAXFRAMES])
> +		nframes = dtp->dt_options[DTRACEOPT_MAXFRAMES];
> +
> +	/* Reserve space in the output buffer. */
> +	off = dt_rec_add(dtp, dt_cg_fill_gap, DTRACEACT_STACK,
> +			 sizeof(uint64_t) * nframes, sizeof(uint64_t),
> +			 NULL, nframes);
> +
> +	/* Now call bpf_get_stack(ctx, buf, size, flags). */
> +	if (dt_regset_xalloc_args(drp) == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_1, DCTX_CTX));
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, off));
> +	emit(dlp,  BPF_MOV_IMM(BPF_REG_3, sizeof(uint64_t) * nframes));
> +	emit(dlp,  BPF_MOV_IMM(BPF_REG_4, skip & BPF_F_SKIP_FIELD_MASK));
> +	dt_regset_xalloc(drp, BPF_REG_0);
> +	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_get_stack));
> +	dt_regset_free_args(drp);
> +	emit(dlp,  BPF_BRANCH_IMM(BPF_JSGE, BPF_REG_0, 0, lbl_valid));
> +	dt_cg_probe_error(pcb, -1, DTRACEFLT_BADSTACK, 0);
> +	emitl(dlp, lbl_valid,
> +		   BPF_NOP());
> +	dt_regset_free(drp, BPF_REG_0);
>  }
>  
>  static void
> diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
> index f39156df..755e645e 100644
> --- a/libdtrace/dt_consume.c
> +++ b/libdtrace/dt_consume.c
> @@ -2018,15 +2018,18 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
>  		for (i = 0; i < pdat->dtpda_ddesc->dtdd_nrecs; i++) {
>  			int			n;
>  			dtrace_recdesc_t	*rec;
> +			dtrace_actkind_t	act;
>  			int (*func)(dtrace_hdl_t *, FILE *, void *,
>  			    const dtrace_probedata_t *,
>  			    const dtrace_recdesc_t *, uint_t,
>  			    const void *buf, size_t);
> +			caddr_t			recdata;
>  
>  			rec = &pdat->dtpda_ddesc->dtdd_recs[i];
> -			pdat->dtpda_data = data + rec->dtrd_offset;
> +			act = rec->dtrd_action;
> +			pdat->dtpda_data = recdata = data + rec->dtrd_offset;
>  
> -			if (rec->dtrd_action == DTRACEACT_LIBACT) {
> +			if (act == DTRACEACT_LIBACT) {
>  				switch (rec->dtrd_arg) {
>  				case DT_ACT_DENORMALIZE:
>  					if (dt_normalize(dtp, data, rec) != 0)
> @@ -2057,7 +2060,15 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
>  			if (rval != DTRACE_CONSUME_THIS)
>  				return dt_set_errno(dtp, EDT_BADRVAL);
>  
> -			switch (rec->dtrd_action) {
> +			switch (act) {
> +			case DTRACEACT_STACK: {
> +				int depth = rec->dtrd_arg;
> +
> +				if (dt_print_stack(dtp, fp, NULL, recdata,
> +				    depth, rec->dtrd_size / depth) < 0)
> +					return -1;
> +				continue;
> +			}
>  			case DTRACEACT_PRINTF:
>  				func = dtrace_fprintf;
>  				break;
> @@ -2092,8 +2103,7 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
>  				continue;
>  			}
>  
> -			n = dt_print_trace(dtp, fp, rec, pdat->dtpda_data,
> -					   quiet);
> +			n = dt_print_trace(dtp, fp, rec, recdata, quiet);
>  			if (n < 0)
>  				return DTRACE_WORKSTATUS_ERROR;
>  		}
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index c2d32863..ac99cf52 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -792,6 +792,7 @@ extern uint_t _dtrace_pidlrulim;	/* number of proc handles to cache */
>  extern size_t _dtrace_bufsize;		/* default dt_buf_create() size */
>  extern int _dtrace_argmax;		/* default maximum probe arguments */
>  extern int _dtrace_debug_assert;	/* turn on expensive assertions */
> +extern int _dtrace_stackframes;		/* default number of stack frames */
>  
>  extern const char *_dtrace_moddir;	/* default kernel module directory */
>  
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index f2b86f7c..ccefa5b5 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -612,6 +612,7 @@ uint_t _dtrace_pidbuckets = 64; /* default number of pid hash buckets */
>  uint_t _dtrace_pidlrulim = 8;	/* default number of pid handles to cache */
>  size_t _dtrace_bufsize = 512;	/* default dt_buf_create() size */
>  int _dtrace_argmax = 32;	/* default maximum number of probe arguments */
> +int _dtrace_stackframes = 20;	/* default number of stack frames */
>  
>  const char *const _dtrace_version = DT_VERS_STRING; /* API version string */
>  const char *const _libdtrace_vcs_version = DT_GIT_VERSION; /* Build version string */
> @@ -645,6 +646,7 @@ dt_vopen(int version, int flags, int *errp,
>  	int i, err;
>  	char modpath[PATH_MAX];
>  	struct rlimit rl;
> +	FILE *fd;
>  
>  	const dt_intrinsic_t *dinp;
>  	const dt_typedef_t *dtyp;
> @@ -786,6 +788,12 @@ dt_vopen(int version, int flags, int *errp,
>  	 */
>  	dtp->dt_options[DTRACEOPT_STRSIZE] = 256;
>  
> +	/* Set the default value of maxframes. */
> +	fd = fopen("/proc/sys/kernel/perf_event_max_stack", "r");
> +	assert(fd);
> +	assert(fscanf(fd, "%lu", &dtp->dt_options[DTRACEOPT_MAXFRAMES]) == 1);

Using an assert with a side effect is asking for trouble.  Two options here:
you can either report an error and terminate, or you can set the option to some
reasonable default limit.  The second option seems fair.  If the default you
choose is higher than the system limit (that we somehow failed to read), then
bpf_get_stack() will still only report as many frames as the system limit
allows so that is safe.

I cannot imagine why we might not be able to read the system limit, but strange
things do happen at times.

> +	fclose(fd);
> +
>  	dtp->dt_cpp_argv[0] = (char *)strbasename(dtp->dt_cpp_path);
>  
>  	snprintf(isadef, sizeof(isadef), "-D__SUNW_D_%u",
> diff --git a/libdtrace/dt_options.c b/libdtrace/dt_options.c
> index 9f5337f3..a8dd5829 100644
> --- a/libdtrace/dt_options.c
> +++ b/libdtrace/dt_options.c
> @@ -1104,6 +1104,7 @@ static const dt_option_t _dtrace_rtoptions[] = {
>  	{ "grabanon", dt_opt_runtime, DTRACEOPT_GRABANON },
>  	{ "jstackframes", dt_opt_runtime, DTRACEOPT_JSTACKFRAMES },
>  	{ "jstackstrsize", dt_opt_size, DTRACEOPT_JSTACKSTRSIZE },
> +	{ "maxframes", dt_opt_runtime, DTRACEOPT_MAXFRAMES },
>  	{ "nspec", dt_opt_runtime, DTRACEOPT_NSPEC },
>  	{ "pcapsize", dt_opt_pcapsize, DTRACEOPT_PCAPSIZE },
>  	{ "specsize", dt_opt_size, DTRACEOPT_SPECSIZE },
> diff --git a/test/unittest/stack/tst.default.d b/test/unittest/stack/tst.default.d
> deleted file mode 100644
> index 2e50ec39..00000000
> --- a/test/unittest/stack/tst.default.d
> +++ /dev/null
> @@ -1,21 +0,0 @@
> -/*
> - * Oracle Linux DTrace.
> - * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
> - * Licensed under the Universal Permissive License v 1.0 as shown at
> - * http://oss.oracle.com/licenses/upl.
> - */
> -/* @@xfail: dtv2 */
> -
> -/*
> - * ASSERTION:
> - *   Test the stack action with the default stack depth.
> - *
> - * SECTION: Output Formatting/printf()
> - *
> - */
> -
> -BEGIN
> -{
> -	stack();
> -	exit(0);
> -}
> diff --git a/test/unittest/stack/tst.default.r b/test/unittest/stack/tst.default.r
> deleted file mode 100644
> index e3a4e4c3..00000000
> --- a/test/unittest/stack/tst.default.r
> +++ /dev/null
> @@ -1 +0,0 @@
> -              dtrace`dtrace_ioctl+{ptr}
> diff --git a/test/unittest/stack/tst.default.r.p b/test/unittest/stack/tst.default.r.p
> deleted file mode 100755
> index 281c025f..00000000
> --- a/test/unittest/stack/tst.default.r.p
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#!/bin/sed -nf
> -
> -# Eliminate all lines other than dtrace`ioctl.
> -
> -/dtrace`dtrace_ioctl/p
> diff --git a/test/unittest/stack/tst.stack3_fbt.aarch64.r b/test/unittest/stack/tst.stack3_fbt.aarch64.r
> new file mode 100644
> index 00000000..5c8bfaed
> --- /dev/null
> +++ b/test/unittest/stack/tst.stack3_fbt.aarch64.r
> @@ -0,0 +1,11 @@
> +                   FUNCTION:NAME
> +                          :BEGIN 
> +               __vfs_write:entry 
> +              vmlinux`__vfs_write
> +              vmlinux`ksys_write+{ptr}
> +              vmlinux`__arm64_sys_write+{ptr}
> +
> +
> +-- @@stderr --
> +dtrace: script 'test/unittest/stack/tst.stack3_fbt.d' matched 2 probes
> +dtrace: allowing destructive actions
> diff --git a/test/unittest/stack/tst.stack3_fbt.d b/test/unittest/stack/tst.stack3_fbt.d
> new file mode 100644
> index 00000000..1a2eaf58
> --- /dev/null
> +++ b/test/unittest/stack/tst.stack3_fbt.d
> @@ -0,0 +1,25 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: Test the stack action with depth 3.
> + *
> + * SECTION: Output Formatting/printf()
> + */
> +
> +#pragma D option destructive
> +
> +BEGIN
> +{
> +	system("echo write something > /dev/null");
> +}
> +
> +fbt::__vfs_write:entry
> +{
> +	stack(3);
> +	exit(0);
> +}
> diff --git a/test/unittest/stack/tst.stack3_fbt.x86_64.r b/test/unittest/stack/tst.stack3_fbt.x86_64.r
> new file mode 100644
> index 00000000..f24c8cba
> --- /dev/null
> +++ b/test/unittest/stack/tst.stack3_fbt.x86_64.r
> @@ -0,0 +1,11 @@
> +                   FUNCTION:NAME
> +                          :BEGIN 
> +               __vfs_write:entry 
> +              vmlinux`__vfs_write+{ptr}
> +              vmlinux`ksys_write+{ptr}
> +              vmlinux`__x64_sys_write+{ptr}
> +
> +
> +-- @@stderr --
> +dtrace: script 'test/unittest/stack/tst.stack3_fbt.d' matched 2 probes
> +dtrace: allowing destructive actions
> diff --git a/test/unittest/stack/tst.stack_fbt.aarch64.r b/test/unittest/stack/tst.stack_fbt.aarch64.r
> new file mode 100644
> index 00000000..3a2896c4
> --- /dev/null
> +++ b/test/unittest/stack/tst.stack_fbt.aarch64.r
> @@ -0,0 +1,14 @@
> +                   FUNCTION:NAME
> +                          :BEGIN 
> +               __vfs_write:entry 
> +              vmlinux`__vfs_write
> +              vmlinux`ksys_write+{ptr}
> +              vmlinux`__arm64_sys_write+{ptr}
> +              vmlinux`el0_svc_common+{ptr}
> +              vmlinux`el0_svc_handler+{ptr}
> +              vmlinux`el0_svc+{ptr}
> +
> +
> +-- @@stderr --
> +dtrace: script 'test/unittest/stack/tst.stack_fbt.d' matched 2 probes
> +dtrace: allowing destructive actions
> diff --git a/test/unittest/stack/tst.stack_fbt.d b/test/unittest/stack/tst.stack_fbt.d
> new file mode 100644
> index 00000000..27db2116
> --- /dev/null
> +++ b/test/unittest/stack/tst.stack_fbt.d
> @@ -0,0 +1,25 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: Test the stack action with the default stack depth.
> + *
> + * SECTION: Output Formatting/printf()
> + */
> +
> +#pragma D option destructive
> +
> +BEGIN
> +{
> +	system("echo write something > /dev/null");
> +}
> +
> +fbt::__vfs_write:entry
> +{
> +	stack();
> +	exit(0);
> +}
> diff --git a/test/unittest/stack/tst.stack_fbt.x86_64.r b/test/unittest/stack/tst.stack_fbt.x86_64.r
> new file mode 100644
> index 00000000..792ce80a
> --- /dev/null
> +++ b/test/unittest/stack/tst.stack_fbt.x86_64.r
> @@ -0,0 +1,13 @@
> +                   FUNCTION:NAME
> +                          :BEGIN 
> +               __vfs_write:entry 
> +              vmlinux`__vfs_write+{ptr}
> +              vmlinux`ksys_write+{ptr}
> +              vmlinux`__x64_sys_write+{ptr}
> +              vmlinux`do_syscall_64+{ptr}
> +              vmlinux`entry_SYSCALL_64+{ptr}
> +
> +
> +-- @@stderr --
> +dtrace: script 'test/unittest/stack/tst.stack_fbt.d' matched 2 probes
> +dtrace: allowing destructive actions
> -- 
> 2.18.4
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list