[DTrace-devel] [PATCH 05/12] Add support for the stack() action

eugene.loh at oracle.com eugene.loh at oracle.com
Fri May 28 11:35:09 PDT 2021


From: Eugene Loh <eugene.loh at oracle.com>

Implement the stack() action using the bpf_get_stack() helper
function.  This implementation most closely resembles the legacy
DTrace implementation.  Someday it may make sense to switch over
to the bpf_get_stackid() instead, which would allow consolidation
of like stacks.

The max stack size can be controlled by the kernel parameter
perf_event_max_stack, and we would like to allow users to increase
that limit.  We will eventually need to know this limit in various
places.  So, add a dtp->dt_maxframes value.

Change the stack() testing.  Specifically, replace the default
test since BEGIN no longer has a kernel stack.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_cg.c                            | 69 +++++++++++++++++---
 libdtrace/dt_consume.c                       | 21 ++++--
 libdtrace/dt_impl.h                          |  1 +
 libdtrace/dt_open.c                          |  6 ++
 test/unittest/stack/tst.default.d            | 21 ------
 test/unittest/stack/tst.default.r            |  1 -
 test/unittest/stack/tst.default.r.p          |  5 --
 test/unittest/stack/tst.stack3_fbt.aarch64.r | 11 ++++
 test/unittest/stack/tst.stack3_fbt.d         | 25 +++++++
 test/unittest/stack/tst.stack3_fbt.x86_64.r  | 11 ++++
 test/unittest/stack/tst.stack_fbt.aarch64.r  | 14 ++++
 test/unittest/stack/tst.stack_fbt.d          | 25 +++++++
 test/unittest/stack/tst.stack_fbt.x86_64.r   | 13 ++++
 13 files changed, 182 insertions(+), 41 deletions(-)
 delete mode 100644 test/unittest/stack/tst.default.d
 delete mode 100644 test/unittest/stack/tst.default.r
 delete mode 100755 test/unittest/stack/tst.default.r.p
 create mode 100644 test/unittest/stack/tst.stack3_fbt.aarch64.r
 create mode 100644 test/unittest/stack/tst.stack3_fbt.d
 create mode 100644 test/unittest/stack/tst.stack3_fbt.x86_64.r
 create mode 100644 test/unittest/stack/tst.stack_fbt.aarch64.r
 create mode 100644 test/unittest/stack/tst.stack_fbt.d
 create mode 100644 test/unittest/stack/tst.stack_fbt.x86_64.r

diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index f7168ffe..e7715aec 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -1333,21 +1333,72 @@ dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 static void
 dt_cg_act_stack(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	dt_node_t *arg = dnp->dn_args;
-#ifdef FIXME
-	uint32_t nframes = 0;
-#endif
+	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	dt_regset_t	*drp = pcb->pcb_regs;
+	dt_node_t	*arg = dnp->dn_args;
+	int		nframes = dtp->dt_options[DTRACEOPT_STACKFRAMES];
+	int		skip = 0;
+	uint_t		off;
+	uint_t		lbl_valid = dt_irlist_label(dlp);
+
+	/*
+	 * Legacy default was dtrace_stackframes_default,
+	 * in kernel file dtrace/dtrace_state.c.
+	 */
+	if (nframes == DTRACEOPT_UNSET)
+		nframes = 20;
 
 	if (arg != NULL) {
-		if (!dt_node_is_posconst(arg)) {
+		if (!dt_node_is_posconst(arg))
 			dnerror(arg, D_STACK_SIZE, "stack( ) argument #1 must "
 				"be a non-zero positive integer constant\n");
-		}
 
-#ifdef FIXME
-		nframes = (uint32_t)arg->dn_value;
-#endif
+		nframes = arg->dn_value;
 	}
+
+	/*
+	 * FIXME:
+	 * What should happen if more frames are requested than allowed?
+	 *   1.  Silently reduce nframes?
+	 *   2.  Report message and reduce nframes?
+	 *   3.  Report message and abort?
+	 * Option 1 is the easiest and what we do here.
+	 *
+	 * A message could explain that the limit can be raised:
+	 *         # sysctl kernel.perf_event_max_stack=<new value>
+	 * See bpf_get_stack() info in /usr/include/linux/bpf.h.
+	 *
+	 * Note that DTv1 allows much larger stacks but does not handle
+	 * the "too large" case very well:
+	 *         # dtrace -qn 'BEGIN {stack(1234567); exit(0)}'
+	 *         dtrace: 1 drop on CPU 2
+	 *         [...hang...]
+	 */
+	if (nframes > dtp->dt_maxframes)
+		nframes = dtp->dt_maxframes;
+
+	/* Figure out where we want to be in the output buffer. */
+	off = dt_rec_add(dtp, dt_cg_fill_gap, DTRACEACT_STACK,
+			 8 * nframes, 8, NULL, nframes);
+
+	/* Now call bpf_get_stack(ctx, buf, size, flags). */
+	if (dt_regset_xalloc_args(drp) == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_1, DCTX_CTX));
+	emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, off));
+	emit(dlp,  BPF_MOV_IMM(BPF_REG_3, 8 * nframes));
+	emit(dlp,  BPF_MOV_IMM(BPF_REG_4, skip & BPF_F_SKIP_FIELD_MASK));
+	dt_regset_xalloc(drp, BPF_REG_0);
+	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_get_stack));
+	dt_regset_free_args(drp);
+	emit(dlp,  BPF_BRANCH_IMM(BPF_JSGE, BPF_REG_0, 0, lbl_valid));
+	dt_cg_probe_error(pcb, -1, DTRACEFLT_BADSTACK, 0 /* FIXME */);
+	emitl(dlp, lbl_valid,
+		   BPF_NOP());
+	dt_regset_free(drp, BPF_REG_0);
 }
 
 static void
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index f39156df..483f9f28 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -2018,15 +2018,18 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
 		for (i = 0; i < pdat->dtpda_ddesc->dtdd_nrecs; i++) {
 			int			n;
 			dtrace_recdesc_t	*rec;
+			dtrace_actkind_t	act;
 			int (*func)(dtrace_hdl_t *, FILE *, void *,
 			    const dtrace_probedata_t *,
 			    const dtrace_recdesc_t *, uint_t,
 			    const void *buf, size_t);
+			caddr_t			addr;
 
 			rec = &pdat->dtpda_ddesc->dtdd_recs[i];
-			pdat->dtpda_data = data + rec->dtrd_offset;
+			act = rec->dtrd_action;
+			pdat->dtpda_data = addr = data + rec->dtrd_offset;
 
-			if (rec->dtrd_action == DTRACEACT_LIBACT) {
+			if (act == DTRACEACT_LIBACT) {
 				switch (rec->dtrd_arg) {
 				case DT_ACT_DENORMALIZE:
 					if (dt_normalize(dtp, data, rec) != 0)
@@ -2057,7 +2060,16 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
 			if (rval != DTRACE_CONSUME_THIS)
 				return dt_set_errno(dtp, EDT_BADRVAL);
 
-			switch (rec->dtrd_action) {
+			if (act == DTRACEACT_STACK) {
+				int depth = rec->dtrd_arg;
+
+				if (dt_print_stack(dtp, fp, NULL, addr, depth,
+				    rec->dtrd_size / depth) < 0)
+					return -1;
+				continue;
+			}
+
+			switch (act) {
 			case DTRACEACT_PRINTF:
 				func = dtrace_fprintf;
 				break;
@@ -2092,8 +2104,7 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
 				continue;
 			}
 
-			n = dt_print_trace(dtp, fp, rec, pdat->dtpda_data,
-					   quiet);
+			n = dt_print_trace(dtp, fp, rec, addr, quiet);
 			if (n < 0)
 				return DTRACE_WORKSTATUS_ERROR;
 		}
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index c2d32863..240e29cc 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -380,6 +380,7 @@ struct dtrace_hdl {
 	dt_list_t dt_lib_dep_sorted;	/* dependency sorted library list */
 	dt_global_pcap_t dt_pcap; /* global tshark/pcap state */
 	char *dt_freopen_filename; /* filename for freopen() action */
+	uint_t dt_maxframes;	/* maximum stack depth */
 };
 
 /*
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index f2b86f7c..0323e68a 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -645,6 +645,7 @@ dt_vopen(int version, int flags, int *errp,
 	int i, err;
 	char modpath[PATH_MAX];
 	struct rlimit rl;
+	FILE *fd;
 
 	const dt_intrinsic_t *dinp;
 	const dt_typedef_t *dtyp;
@@ -756,6 +757,11 @@ dt_vopen(int version, int flags, int *errp,
 	dt_dof_init(dtp);
 	uname(&dtp->dt_uts);
 
+	fd = fopen("/proc/sys/kernel/perf_event_max_stack", "r");
+	assert(fd);
+	assert(fscanf(fd, "%u", &dtp->dt_maxframes) == 1);
+	fclose(fd);
+
 	/*
 	 * The default module path is derived in part from the utsname release
 	 * string.  So too is the path component which is added to .d file
diff --git a/test/unittest/stack/tst.default.d b/test/unittest/stack/tst.default.d
deleted file mode 100644
index 2e50ec39..00000000
--- a/test/unittest/stack/tst.default.d
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Oracle Linux DTrace.
- * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
- * Licensed under the Universal Permissive License v 1.0 as shown at
- * http://oss.oracle.com/licenses/upl.
- */
-/* @@xfail: dtv2 */
-
-/*
- * ASSERTION:
- *   Test the stack action with the default stack depth.
- *
- * SECTION: Output Formatting/printf()
- *
- */
-
-BEGIN
-{
-	stack();
-	exit(0);
-}
diff --git a/test/unittest/stack/tst.default.r b/test/unittest/stack/tst.default.r
deleted file mode 100644
index e3a4e4c3..00000000
--- a/test/unittest/stack/tst.default.r
+++ /dev/null
@@ -1 +0,0 @@
-              dtrace`dtrace_ioctl+{ptr}
diff --git a/test/unittest/stack/tst.default.r.p b/test/unittest/stack/tst.default.r.p
deleted file mode 100755
index 281c025f..00000000
--- a/test/unittest/stack/tst.default.r.p
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sed -nf
-
-# Eliminate all lines other than dtrace`ioctl.
-
-/dtrace`dtrace_ioctl/p
diff --git a/test/unittest/stack/tst.stack3_fbt.aarch64.r b/test/unittest/stack/tst.stack3_fbt.aarch64.r
new file mode 100644
index 00000000..5c8bfaed
--- /dev/null
+++ b/test/unittest/stack/tst.stack3_fbt.aarch64.r
@@ -0,0 +1,11 @@
+                   FUNCTION:NAME
+                          :BEGIN 
+               __vfs_write:entry 
+              vmlinux`__vfs_write
+              vmlinux`ksys_write+{ptr}
+              vmlinux`__arm64_sys_write+{ptr}
+
+
+-- @@stderr --
+dtrace: script 'test/unittest/stack/tst.stack3_fbt.d' matched 2 probes
+dtrace: allowing destructive actions
diff --git a/test/unittest/stack/tst.stack3_fbt.d b/test/unittest/stack/tst.stack3_fbt.d
new file mode 100644
index 00000000..1a2eaf58
--- /dev/null
+++ b/test/unittest/stack/tst.stack3_fbt.d
@@ -0,0 +1,25 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+/*
+ * ASSERTION: Test the stack action with depth 3.
+ *
+ * SECTION: Output Formatting/printf()
+ */
+
+#pragma D option destructive
+
+BEGIN
+{
+	system("echo write something > /dev/null");
+}
+
+fbt::__vfs_write:entry
+{
+	stack(3);
+	exit(0);
+}
diff --git a/test/unittest/stack/tst.stack3_fbt.x86_64.r b/test/unittest/stack/tst.stack3_fbt.x86_64.r
new file mode 100644
index 00000000..f24c8cba
--- /dev/null
+++ b/test/unittest/stack/tst.stack3_fbt.x86_64.r
@@ -0,0 +1,11 @@
+                   FUNCTION:NAME
+                          :BEGIN 
+               __vfs_write:entry 
+              vmlinux`__vfs_write+{ptr}
+              vmlinux`ksys_write+{ptr}
+              vmlinux`__x64_sys_write+{ptr}
+
+
+-- @@stderr --
+dtrace: script 'test/unittest/stack/tst.stack3_fbt.d' matched 2 probes
+dtrace: allowing destructive actions
diff --git a/test/unittest/stack/tst.stack_fbt.aarch64.r b/test/unittest/stack/tst.stack_fbt.aarch64.r
new file mode 100644
index 00000000..3a2896c4
--- /dev/null
+++ b/test/unittest/stack/tst.stack_fbt.aarch64.r
@@ -0,0 +1,14 @@
+                   FUNCTION:NAME
+                          :BEGIN 
+               __vfs_write:entry 
+              vmlinux`__vfs_write
+              vmlinux`ksys_write+{ptr}
+              vmlinux`__arm64_sys_write+{ptr}
+              vmlinux`el0_svc_common+{ptr}
+              vmlinux`el0_svc_handler+{ptr}
+              vmlinux`el0_svc+{ptr}
+
+
+-- @@stderr --
+dtrace: script 'test/unittest/stack/tst.stack_fbt.d' matched 2 probes
+dtrace: allowing destructive actions
diff --git a/test/unittest/stack/tst.stack_fbt.d b/test/unittest/stack/tst.stack_fbt.d
new file mode 100644
index 00000000..27db2116
--- /dev/null
+++ b/test/unittest/stack/tst.stack_fbt.d
@@ -0,0 +1,25 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+/*
+ * ASSERTION: Test the stack action with the default stack depth.
+ *
+ * SECTION: Output Formatting/printf()
+ */
+
+#pragma D option destructive
+
+BEGIN
+{
+	system("echo write something > /dev/null");
+}
+
+fbt::__vfs_write:entry
+{
+	stack();
+	exit(0);
+}
diff --git a/test/unittest/stack/tst.stack_fbt.x86_64.r b/test/unittest/stack/tst.stack_fbt.x86_64.r
new file mode 100644
index 00000000..792ce80a
--- /dev/null
+++ b/test/unittest/stack/tst.stack_fbt.x86_64.r
@@ -0,0 +1,13 @@
+                   FUNCTION:NAME
+                          :BEGIN 
+               __vfs_write:entry 
+              vmlinux`__vfs_write+{ptr}
+              vmlinux`ksys_write+{ptr}
+              vmlinux`__x64_sys_write+{ptr}
+              vmlinux`do_syscall_64+{ptr}
+              vmlinux`entry_SYSCALL_64+{ptr}
+
+
+-- @@stderr --
+dtrace: script 'test/unittest/stack/tst.stack_fbt.d' matched 2 probes
+dtrace: allowing destructive actions
-- 
2.18.4




More information about the DTrace-devel mailing list