[DTrace-devel] [PATCH 6/7] Add support for built-in variable stackdepth

eugene.loh at oracle.com eugene.loh at oracle.com
Thu May 6 12:31:49 PDT 2021


From: Eugene Loh <eugene.loh at oracle.com>

Add space to the "mem" BPF map where we can write a stack of
maximum length (PERF_MAX_STACK_DEPTH frames).

For DIF_VAR_STACKDEPTH, add code to write the stack, then check its length.

Fix test/unittest/stackdepth/tst.value.d:

  *)  Use a probe that has a kernel stack.

  *)  Change the postprocessing to check properly whether a line is empty.

  *)  Remove xfail.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 bpf/get_bvar.c                         | 22 ++++++++++++++++++++--
 libdtrace/dt_bpf.c                     |  6 ++++--
 libdtrace/dt_cg.c                      |  5 +++--
 test/unittest/stackdepth/tst.value.d   |  7 ++++++-
 test/unittest/stackdepth/tst.value.r.p |  4 ++--
 5 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/bpf/get_bvar.c b/bpf/get_bvar.c
index 3116e1a8..5313c498 100644
--- a/bpf/get_bvar.c
+++ b/bpf/get_bvar.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
  */
 #include <linux/bpf.h>
+#include <linux/perf_event.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <bpf-helpers.h>
@@ -49,8 +50,25 @@ noinline uint64_t dt_get_bvar(dt_dctx_t *dctx, uint32_t id)
 	case DIF_VAR_ARG9:
 		return mst->argv[id - DIF_VAR_ARG0];
 	case DIF_VAR_STACKDEPTH: {
-		/* FIXME: no stack() yet */
-		return 0;
+		uint64_t skip = 0;
+		/* FIXME: need to determine buf more robustly? */
+		uint64_t *buf = ((uint64_t *) &mst[1]) + 1;
+
+		/* FIXME: how should we handle error? */
+		if (bpf_get_stack(dctx->ctx, buf, 8 * PERF_MAX_STACK_DEPTH,
+				  skip & BPF_F_SKIP_FIELD_MASK) < 0)
+			return 0;
+
+		/*
+		 * One can try a logarithmic approach
+		 * if evidence emerges that this linear algorithm is too costly.
+		 */
+		for (skip = 0; skip < PERF_MAX_STACK_DEPTH; skip++)
+			if (buf[skip] == 0)
+				return skip;
+
+		/* FIXME: if we hit the PERF_MAX_STACK_DEPTH limit, indicate an error? */
+		return skip;
 	}
 	case DIF_VAR_CALLER: {
 		uint64_t skip = 1;
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index 1f31f845..fb85aaf2 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -247,8 +247,10 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 
 	if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
 			sizeof(uint32_t),
-			roundup(sizeof(dt_mstate_t), 8) + 8 +
-				roundup(dtp->dt_maxreclen, 8), 1) == -1)
+			roundup(sizeof(dt_mstate_t), 8) +
+			8 +
+			8 * PERF_MAX_STACK_DEPTH +
+			roundup(dtp->dt_maxreclen, 8), 1) == -1)
 		return -1;		/* dt_errno is set for us */
 
 	if (create_gmap(dtp, "strtab", BPF_MAP_TYPE_ARRAY,
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index cdf7a77f..3648abda 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -131,13 +131,14 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
 	 *						sizeof(dt_mstate_t), 8)
 	 *	*((uint64_t *)&buf[0]) = 0;
 	 *				// stdw [%r0 + 0], 0
-	 *	buf += 8;		// add %r0, 8
+	 *	buf += 8 + 8 * PERF_MAX_STACK_DEPTH;
+	 *				// add %r0, 8 + 8 * PERF_MAX_STACK_DEPTH
 	 *				//     (%r0 = pointer to buffer space)
 	 *	dctx.buf = buf;		// stdw [%fp + DCTX_FP(DCTX_BUF)], %r0
 	 */
 	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, roundup(sizeof(dt_mstate_t), 8)));
 	emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_0, 0, 0));
-	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8 + 8 * PERF_MAX_STACK_DEPTH));
 	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_BUF), BPF_REG_0));
 
 	/*
diff --git a/test/unittest/stackdepth/tst.value.d b/test/unittest/stackdepth/tst.value.d
index 97cc54df..a9ffef4d 100644
--- a/test/unittest/stackdepth/tst.value.d
+++ b/test/unittest/stackdepth/tst.value.d
@@ -4,8 +4,8 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
+#pragma D option destructive
 #pragma D option quiet
 
 /*
@@ -17,6 +17,11 @@
  */
 
 BEGIN
+{
+	system("echo write something > /dev/null");
+}
+
+fbt::__vfs_write:entry
 {
 	printf("DEPTH %d\n", stackdepth);
 	printf("TRACE BEGIN\n");
diff --git a/test/unittest/stackdepth/tst.value.r.p b/test/unittest/stackdepth/tst.value.r.p
index fa83eb0c..9b071181 100755
--- a/test/unittest/stackdepth/tst.value.r.p
+++ b/test/unittest/stackdepth/tst.value.r.p
@@ -12,12 +12,12 @@
 	getline;
 	count = 0;
 	while ($0 !~ /^TRACE END/) {
+		if (NF)
+			count++;
 		if (getline != 1) {
 			print "EOF or error while processing stack\n";
 			exit 0;
 		}
-		if (NF)
-			count++;
 	}
 }
 
-- 
2.18.4




More information about the DTrace-devel mailing list