[DTrace-devel] [PATCH 08/12] Add support for built-in variable stackdepth

Kris Van Hees kris.van.hees at oracle.com
Tue Jun 8 23:07:38 PDT 2021


Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

... with changes as listed below.

On Fri, May 28, 2021 at 02:35:12PM -0400, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> Add space to the "mem" BPF map where we can write a stack of maximum
> depth.  (This approach should be replaced eventually with a more general
> mechanism for using scratch space.)
> 
> Since precompiled BPF code needs to verify the use of the new buffer, we
> we make its offset and size available to get_bvar() via STKOFF and STKSIZ.
> 
> For DIF_VAR_STACKDEPTH, add code to write the stack and return its length.
> 
> Fix test/unittest/stackdepth/tst.value.d:
> 
>   *)  Use a probe that has a kernel stack.
> 
>   *)  Change the postprocessing to check properly whether a line is empty.
> 
>   *)  Remove xfail.
> 
> Consolidate stackdepth tests with other built-in-variable tests:
> 
>   *)  stackdepth/tst.default.d was already simply a subset of
>       variables/bvar/tst.stackdepth.d and so can be deleted.
> 
>   *)  stackdepth/tst.value.* is moved to variables/bvar/tst.stackdepth2.*
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>  bpf/get_bvar.c                                | 25 +++++++++++++++++--
>  libdtrace/dt_bpf.c                            |  7 ++++--
>  libdtrace/dt_bpf.h                            |  2 ++
>  libdtrace/dt_cc.c                             |  6 +++++
>  libdtrace/dt_dlibs.c                          |  2 ++
>  test/unittest/stackdepth/tst.default.d        | 20 ---------------
>  .../bvar/tst.stackdepth2.d}                   | 12 ++++++++-
>  .../bvar/tst.stackdepth2.r}                   |  0
>  .../bvar/tst.stackdepth2.r.p}                 |  4 +--
>  9 files changed, 51 insertions(+), 27 deletions(-)
>  delete mode 100644 test/unittest/stackdepth/tst.default.d
>  rename test/unittest/{stackdepth/tst.value.d => variables/bvar/tst.stackdepth2.d} (80%)
>  rename test/unittest/{stackdepth/tst.value.r => variables/bvar/tst.stackdepth2.r} (100%)
>  rename test/unittest/{stackdepth/tst.value.r.p => variables/bvar/tst.stackdepth2.r.p} (100%)
> 
> diff --git a/bpf/get_bvar.c b/bpf/get_bvar.c
> index 198cb673..570334f9 100644
> --- a/bpf/get_bvar.c
> +++ b/bpf/get_bvar.c
> @@ -25,6 +25,8 @@ extern struct bpf_map_def probes;
>  extern struct bpf_map_def state;
>  
>  extern uint64_t STBSZ;
> +extern uint64_t STKOFF;
> +extern uint64_t STKSIZ;
>  
>  #define error(dctx, fault, illval) \
>  	({ \
> @@ -54,8 +56,27 @@ noinline uint64_t dt_get_bvar(dt_dctx_t *dctx, uint32_t id)
>  	case DIF_VAR_ARG9:
>  		return mst->argv[id - DIF_VAR_ARG0];
>  	case DIF_VAR_STACKDEPTH: {
> -		/* FIXME: no stack() yet */
> -		return 0;
> +		uint64_t stacksize = (uint64_t) &STKSIZ;

I'd rename this to be 'bufsize;, or just 'size'.  By the way, note that the
prototype for bpf_get_stack) actually specifies u32 as type for the buffer
size.

> +		uint64_t flags = 0 & BPF_F_SKIP_FIELD_MASK;
> +		char *buf = ((char *) dctx->buf) + ((uint64_t) &STKOFF);
> +		uint64_t copylen;

I'd rename this to be 'stacksize'.

> +
> +		copylen = bpf_get_stack(dctx->ctx, buf, stacksize, flags);
> +		if (copylen < 0)
> +			return error(dctx, DTRACEFLT_BADSTACK, 0 /* FIXME */);
> +
> +		/* FIXME: if we filled the buffer, was it too small? */
> +#if 0
> +		if (copylen == stacksize)
> +			return error(dctx, DTRACEFLT_BADSTACK, 0 /* FIXME */);
> +#endif

You cannot assume that a filled buffer means that the stack is larger than what
we can retrieve because it might just be exactly that size.  I'd favour simply
returning the value that was obtained, and leave it to the user to interpret.
There is no real 'right' solution here that will always work - but it seems
better to not cause a fault when we are not sure whether the stack is complete
or not.

> +
> +		/*
> +		 * While linux/bpf.h does not describe the meaning of
> +		 * bpf_get_stack()'s return value outside of its sign,
> +		 * it is presumably the length of the copied stack.
> +		 */
> +		return copylen / 8;

return stacksize / sizeof(uint64_t);

Interestingly, the (minimal) documentation for bpf_get_stack() never actually
specifies what the stack content looks like.  But we know it is an array of
program counter values and each of those is a uint64_t value.

>  	}
>  	case DIF_VAR_CALLER: {
>  		uint64_t flags = 0 & BPF_F_SKIP_FIELD_MASK;
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 3ef0b262..a5bfafa6 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -274,8 +274,11 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>  
>  	if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
>  			sizeof(uint32_t),
> -			roundup(sizeof(dt_mstate_t), 8) + 8 +
> -				roundup(dtp->dt_maxreclen, 8), 1) == -1)
> +			roundup(sizeof(dt_mstate_t), 8)
> +			  + 8

I would add the + 8 to the previous line.  It is special padding to ensure
that when we write the output buffer to the perf event ring buffer, we can
include a 4-byte padding ahead of the buffer to ensure proper alignment of
the data (due to the perf event header content).

> +			  + roundup(dtp->dt_maxreclen, 8)
> +			  + 8 * dtp->dt_maxframes,
> +			1) == -1)
>  		return -1;		/* dt_errno is set for us */
>  
>  	/*
> diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
> index 7fa9eed3..3afc95fa 100644
> --- a/libdtrace/dt_bpf.h
> +++ b/libdtrace/dt_bpf.h
> @@ -23,6 +23,8 @@ extern "C" {
>  #define DT_CONST_CLID	3
>  #define DT_CONST_ARGC	4
>  #define DT_CONST_STBSZ	5
> +#define DT_CONST_STKOFF	6
> +#define DT_CONST_STKSIZ	7
>  
>  extern int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
>  			   int group_fd, unsigned long flags);
> diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
> index a374a747..050a9bf6 100644
> --- a/libdtrace/dt_cc.c
> +++ b/libdtrace/dt_cc.c
> @@ -2327,6 +2327,12 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
>  			case DT_CONST_STBSZ:
>  				nrp->dofr_data = dtp->dt_strlen;
>  				continue;
> +			case DT_CONST_STKOFF:
> +				nrp->dofr_data = roundup(dtp->dt_maxreclen, 8);
> +				continue;
> +			case DT_CONST_STKSIZ:
> +				nrp->dofr_data = 8 * dtp->dt_maxframes;

sizeof(uint6_t) *

> +				continue;
>  			default:
>  				/* probe name -> value is probe id */
>  				if (strchr(idp->di_name, ':') != NULL)
> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> index fae58b1a..10886adb 100644
> --- a/libdtrace/dt_dlibs.c
> +++ b/libdtrace/dt_dlibs.c
> @@ -78,6 +78,8 @@ static const dt_ident_t		dt_bpf_symbols[] = {
>  	DT_BPF_SYMBOL_ID(CLID, DT_IDENT_SCALAR, DT_CONST_CLID),
>  	DT_BPF_SYMBOL_ID(ARGC, DT_IDENT_SCALAR, DT_CONST_ARGC),
>  	DT_BPF_SYMBOL_ID(STBSZ, DT_IDENT_SCALAR, DT_CONST_STBSZ),
> +	DT_BPF_SYMBOL_ID(STKOFF, DT_IDENT_SCALAR, DT_CONST_STKOFF),
> +	DT_BPF_SYMBOL_ID(STKSIZ, DT_IDENT_SCALAR, DT_CONST_STKSIZ),
>  	/* End-of-list marker */
>  	{ NULL, }
>  };
> diff --git a/test/unittest/stackdepth/tst.default.d b/test/unittest/stackdepth/tst.default.d
> deleted file mode 100644
> index 7267ccf1..00000000
> --- a/test/unittest/stackdepth/tst.default.d
> +++ /dev/null
> @@ -1,20 +0,0 @@
> -/*
> - * Oracle Linux DTrace.
> - * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
> - * Licensed under the Universal Permissive License v 1.0 as shown at
> - * http://oss.oracle.com/licenses/upl.
> - */
> -
> -/*
> - * ASSERTION:
> - *   Test the stackdepth variable.
> - *
> - * SECTION: Variables/Built-in Variables
> - *
> - */
> -
> -BEGIN
> -{
> -	trace(stackdepth);
> -	exit(0);
> -}
> diff --git a/test/unittest/stackdepth/tst.value.d b/test/unittest/variables/bvar/tst.stackdepth2.d
> similarity index 80%
> rename from test/unittest/stackdepth/tst.value.d
> rename to test/unittest/variables/bvar/tst.stackdepth2.d
> index 97cc54df..7e28a24b 100644
> --- a/test/unittest/stackdepth/tst.value.d
> +++ b/test/unittest/variables/bvar/tst.stackdepth2.d
> @@ -4,8 +4,8 @@
>   * Licensed under the Universal Permissive License v 1.0 as shown at
>   * http://oss.oracle.com/licenses/upl.
>   */
> -/* @@xfail: dtv2 */
>  
> +#pragma D option destructive
>  #pragma D option quiet
>  
>  /*
> @@ -17,6 +17,11 @@
>   */
>  
>  BEGIN
> +{
> +	system("echo write something > /dev/null");
> +}
> +
> +fbt::__vfs_write:entry
>  {
>  	printf("DEPTH %d\n", stackdepth);
>  	printf("TRACE BEGIN\n");
> @@ -24,3 +29,8 @@ BEGIN
>  	printf("TRACE END\n");
>  	exit(0);
>  }
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/stackdepth/tst.value.r b/test/unittest/variables/bvar/tst.stackdepth2.r
> similarity index 100%
> rename from test/unittest/stackdepth/tst.value.r
> rename to test/unittest/variables/bvar/tst.stackdepth2.r
> diff --git a/test/unittest/stackdepth/tst.value.r.p b/test/unittest/variables/bvar/tst.stackdepth2.r.p
> similarity index 100%
> rename from test/unittest/stackdepth/tst.value.r.p
> rename to test/unittest/variables/bvar/tst.stackdepth2.r.p
> index fa83eb0c..9b071181 100755
> --- a/test/unittest/stackdepth/tst.value.r.p
> +++ b/test/unittest/variables/bvar/tst.stackdepth2.r.p
> @@ -12,12 +12,12 @@
>  	getline;
>  	count = 0;
>  	while ($0 !~ /^TRACE END/) {
> +		if (NF)
> +			count++;
>  		if (getline != 1) {
>  			print "EOF or error while processing stack\n";
>  			exit 0;
>  		}
> -		if (NF)
> -			count++;
>  	}
>  }
>  
> -- 
> 2.18.4
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list