[DTrace-devel] [PATCH 05/12] Add support for the stack() action
eugene.loh at oracle.com
eugene.loh at oracle.com
Fri May 28 11:35:09 PDT 2021
From: Eugene Loh <eugene.loh at oracle.com>
Implement the stack() action using the bpf_get_stack() helper
function. This implementation most closely resembles the legacy
DTrace implementation. Someday it may make sense to switch over
to the bpf_get_stackid() instead, which would allow consolidation
of like stacks.
The max stack size can be controlled by the kernel parameter
perf_event_max_stack, and we would like to allow users to increase
that limit. We will eventually need to know this limit in various
places. So, add a dtp->dt_maxframes value.
Change the stack() testing. Specifically, replace the default
test since BEGIN no longer has a kernel stack.
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
libdtrace/dt_cg.c | 69 +++++++++++++++++---
libdtrace/dt_consume.c | 21 ++++--
libdtrace/dt_impl.h | 1 +
libdtrace/dt_open.c | 6 ++
test/unittest/stack/tst.default.d | 21 ------
test/unittest/stack/tst.default.r | 1 -
test/unittest/stack/tst.default.r.p | 5 --
test/unittest/stack/tst.stack3_fbt.aarch64.r | 11 ++++
test/unittest/stack/tst.stack3_fbt.d | 25 +++++++
test/unittest/stack/tst.stack3_fbt.x86_64.r | 11 ++++
test/unittest/stack/tst.stack_fbt.aarch64.r | 14 ++++
test/unittest/stack/tst.stack_fbt.d | 25 +++++++
test/unittest/stack/tst.stack_fbt.x86_64.r | 13 ++++
13 files changed, 182 insertions(+), 41 deletions(-)
delete mode 100644 test/unittest/stack/tst.default.d
delete mode 100644 test/unittest/stack/tst.default.r
delete mode 100755 test/unittest/stack/tst.default.r.p
create mode 100644 test/unittest/stack/tst.stack3_fbt.aarch64.r
create mode 100644 test/unittest/stack/tst.stack3_fbt.d
create mode 100644 test/unittest/stack/tst.stack3_fbt.x86_64.r
create mode 100644 test/unittest/stack/tst.stack_fbt.aarch64.r
create mode 100644 test/unittest/stack/tst.stack_fbt.d
create mode 100644 test/unittest/stack/tst.stack_fbt.x86_64.r
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index f7168ffe..e7715aec 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -1333,21 +1333,72 @@ dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
static void
dt_cg_act_stack(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
{
- dt_node_t *arg = dnp->dn_args;
-#ifdef FIXME
- uint32_t nframes = 0;
-#endif
+ dtrace_hdl_t *dtp = pcb->pcb_hdl;
+ dt_irlist_t *dlp = &pcb->pcb_ir;
+ dt_regset_t *drp = pcb->pcb_regs;
+ dt_node_t *arg = dnp->dn_args;
+ int nframes = dtp->dt_options[DTRACEOPT_STACKFRAMES];
+ int skip = 0;
+ uint_t off;
+ uint_t lbl_valid = dt_irlist_label(dlp);
+
+ /*
+ * Legacy default was dtrace_stackframes_default,
+ * in kernel file dtrace/dtrace_state.c.
+ */
+ if (nframes == DTRACEOPT_UNSET)
+ nframes = 20;
if (arg != NULL) {
- if (!dt_node_is_posconst(arg)) {
+ if (!dt_node_is_posconst(arg))
dnerror(arg, D_STACK_SIZE, "stack( ) argument #1 must "
"be a non-zero positive integer constant\n");
- }
-#ifdef FIXME
- nframes = (uint32_t)arg->dn_value;
-#endif
+ nframes = arg->dn_value;
}
+
+ /*
+ * FIXME:
+ * What should happen if more frames are requested than allowed?
+ * 1. Silently reduce nframes?
+ * 2. Report message and reduce nframes?
+ * 3. Report message and abort?
+ * Option 1 is the easiest and what we do here.
+ *
+ * A message could explain that the limit can be raised:
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * See bpf_get_stack() info in /usr/include/linux/bpf.h.
+ *
+ * Note that DTv1 allows much larger stacks but does not handle
+ * the "too large" case very well:
+ * # dtrace -qn 'BEGIN {stack(1234567); exit(0)}'
+ * dtrace: 1 drop on CPU 2
+ * [...hang...]
+ */
+ if (nframes > dtp->dt_maxframes)
+ nframes = dtp->dt_maxframes;
+
+ /* Figure out where we want to be in the output buffer. */
+ off = dt_rec_add(dtp, dt_cg_fill_gap, DTRACEACT_STACK,
+ 8 * nframes, 8, NULL, nframes);
+
+ /* Now call bpf_get_stack(ctx, buf, size, flags). */
+ if (dt_regset_xalloc_args(drp) == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_1, DCTX_CTX));
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_9));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, off));
+ emit(dlp, BPF_MOV_IMM(BPF_REG_3, 8 * nframes));
+ emit(dlp, BPF_MOV_IMM(BPF_REG_4, skip & BPF_F_SKIP_FIELD_MASK));
+ dt_regset_xalloc(drp, BPF_REG_0);
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_get_stack));
+ dt_regset_free_args(drp);
+ emit(dlp, BPF_BRANCH_IMM(BPF_JSGE, BPF_REG_0, 0, lbl_valid));
+ dt_cg_probe_error(pcb, -1, DTRACEFLT_BADSTACK, 0 /* FIXME */);
+ emitl(dlp, lbl_valid,
+ BPF_NOP());
+ dt_regset_free(drp, BPF_REG_0);
}
static void
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index f39156df..483f9f28 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -2018,15 +2018,18 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
for (i = 0; i < pdat->dtpda_ddesc->dtdd_nrecs; i++) {
int n;
dtrace_recdesc_t *rec;
+ dtrace_actkind_t act;
int (*func)(dtrace_hdl_t *, FILE *, void *,
const dtrace_probedata_t *,
const dtrace_recdesc_t *, uint_t,
const void *buf, size_t);
+ caddr_t addr;
rec = &pdat->dtpda_ddesc->dtdd_recs[i];
- pdat->dtpda_data = data + rec->dtrd_offset;
+ act = rec->dtrd_action;
+ pdat->dtpda_data = addr = data + rec->dtrd_offset;
- if (rec->dtrd_action == DTRACEACT_LIBACT) {
+ if (act == DTRACEACT_LIBACT) {
switch (rec->dtrd_arg) {
case DT_ACT_DENORMALIZE:
if (dt_normalize(dtp, data, rec) != 0)
@@ -2057,7 +2060,16 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
if (rval != DTRACE_CONSUME_THIS)
return dt_set_errno(dtp, EDT_BADRVAL);
- switch (rec->dtrd_action) {
+ if (act == DTRACEACT_STACK) {
+ int depth = rec->dtrd_arg;
+
+ if (dt_print_stack(dtp, fp, NULL, addr, depth,
+ rec->dtrd_size / depth) < 0)
+ return -1;
+ continue;
+ }
+
+ switch (act) {
case DTRACEACT_PRINTF:
func = dtrace_fprintf;
break;
@@ -2092,8 +2104,7 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
continue;
}
- n = dt_print_trace(dtp, fp, rec, pdat->dtpda_data,
- quiet);
+ n = dt_print_trace(dtp, fp, rec, addr, quiet);
if (n < 0)
return DTRACE_WORKSTATUS_ERROR;
}
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index c2d32863..240e29cc 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -380,6 +380,7 @@ struct dtrace_hdl {
dt_list_t dt_lib_dep_sorted; /* dependency sorted library list */
dt_global_pcap_t dt_pcap; /* global tshark/pcap state */
char *dt_freopen_filename; /* filename for freopen() action */
+ uint_t dt_maxframes; /* maximum stack depth */
};
/*
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index f2b86f7c..0323e68a 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -645,6 +645,7 @@ dt_vopen(int version, int flags, int *errp,
int i, err;
char modpath[PATH_MAX];
struct rlimit rl;
+ FILE *fd;
const dt_intrinsic_t *dinp;
const dt_typedef_t *dtyp;
@@ -756,6 +757,11 @@ dt_vopen(int version, int flags, int *errp,
dt_dof_init(dtp);
uname(&dtp->dt_uts);
+ fd = fopen("/proc/sys/kernel/perf_event_max_stack", "r");
+ assert(fd);
+ assert(fscanf(fd, "%u", &dtp->dt_maxframes) == 1);
+ fclose(fd);
+
/*
* The default module path is derived in part from the utsname release
* string. So too is the path component which is added to .d file
diff --git a/test/unittest/stack/tst.default.d b/test/unittest/stack/tst.default.d
deleted file mode 100644
index 2e50ec39..00000000
--- a/test/unittest/stack/tst.default.d
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Oracle Linux DTrace.
- * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
- * Licensed under the Universal Permissive License v 1.0 as shown at
- * http://oss.oracle.com/licenses/upl.
- */
-/* @@xfail: dtv2 */
-
-/*
- * ASSERTION:
- * Test the stack action with the default stack depth.
- *
- * SECTION: Output Formatting/printf()
- *
- */
-
-BEGIN
-{
- stack();
- exit(0);
-}
diff --git a/test/unittest/stack/tst.default.r b/test/unittest/stack/tst.default.r
deleted file mode 100644
index e3a4e4c3..00000000
--- a/test/unittest/stack/tst.default.r
+++ /dev/null
@@ -1 +0,0 @@
- dtrace`dtrace_ioctl+{ptr}
diff --git a/test/unittest/stack/tst.default.r.p b/test/unittest/stack/tst.default.r.p
deleted file mode 100755
index 281c025f..00000000
--- a/test/unittest/stack/tst.default.r.p
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sed -nf
-
-# Eliminate all lines other than dtrace`ioctl.
-
-/dtrace`dtrace_ioctl/p
diff --git a/test/unittest/stack/tst.stack3_fbt.aarch64.r b/test/unittest/stack/tst.stack3_fbt.aarch64.r
new file mode 100644
index 00000000..5c8bfaed
--- /dev/null
+++ b/test/unittest/stack/tst.stack3_fbt.aarch64.r
@@ -0,0 +1,11 @@
+ FUNCTION:NAME
+ :BEGIN
+ __vfs_write:entry
+ vmlinux`__vfs_write
+ vmlinux`ksys_write+{ptr}
+ vmlinux`__arm64_sys_write+{ptr}
+
+
+-- @@stderr --
+dtrace: script 'test/unittest/stack/tst.stack3_fbt.d' matched 2 probes
+dtrace: allowing destructive actions
diff --git a/test/unittest/stack/tst.stack3_fbt.d b/test/unittest/stack/tst.stack3_fbt.d
new file mode 100644
index 00000000..1a2eaf58
--- /dev/null
+++ b/test/unittest/stack/tst.stack3_fbt.d
@@ -0,0 +1,25 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+/*
+ * ASSERTION: Test the stack action with depth 3.
+ *
+ * SECTION: Output Formatting/printf()
+ */
+
+#pragma D option destructive
+
+BEGIN
+{
+ system("echo write something > /dev/null");
+}
+
+fbt::__vfs_write:entry
+{
+ stack(3);
+ exit(0);
+}
diff --git a/test/unittest/stack/tst.stack3_fbt.x86_64.r b/test/unittest/stack/tst.stack3_fbt.x86_64.r
new file mode 100644
index 00000000..f24c8cba
--- /dev/null
+++ b/test/unittest/stack/tst.stack3_fbt.x86_64.r
@@ -0,0 +1,11 @@
+ FUNCTION:NAME
+ :BEGIN
+ __vfs_write:entry
+ vmlinux`__vfs_write+{ptr}
+ vmlinux`ksys_write+{ptr}
+ vmlinux`__x64_sys_write+{ptr}
+
+
+-- @@stderr --
+dtrace: script 'test/unittest/stack/tst.stack3_fbt.d' matched 2 probes
+dtrace: allowing destructive actions
diff --git a/test/unittest/stack/tst.stack_fbt.aarch64.r b/test/unittest/stack/tst.stack_fbt.aarch64.r
new file mode 100644
index 00000000..3a2896c4
--- /dev/null
+++ b/test/unittest/stack/tst.stack_fbt.aarch64.r
@@ -0,0 +1,14 @@
+ FUNCTION:NAME
+ :BEGIN
+ __vfs_write:entry
+ vmlinux`__vfs_write
+ vmlinux`ksys_write+{ptr}
+ vmlinux`__arm64_sys_write+{ptr}
+ vmlinux`el0_svc_common+{ptr}
+ vmlinux`el0_svc_handler+{ptr}
+ vmlinux`el0_svc+{ptr}
+
+
+-- @@stderr --
+dtrace: script 'test/unittest/stack/tst.stack_fbt.d' matched 2 probes
+dtrace: allowing destructive actions
diff --git a/test/unittest/stack/tst.stack_fbt.d b/test/unittest/stack/tst.stack_fbt.d
new file mode 100644
index 00000000..27db2116
--- /dev/null
+++ b/test/unittest/stack/tst.stack_fbt.d
@@ -0,0 +1,25 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+/*
+ * ASSERTION: Test the stack action with the default stack depth.
+ *
+ * SECTION: Output Formatting/printf()
+ */
+
+#pragma D option destructive
+
+BEGIN
+{
+ system("echo write something > /dev/null");
+}
+
+fbt::__vfs_write:entry
+{
+ stack();
+ exit(0);
+}
diff --git a/test/unittest/stack/tst.stack_fbt.x86_64.r b/test/unittest/stack/tst.stack_fbt.x86_64.r
new file mode 100644
index 00000000..792ce80a
--- /dev/null
+++ b/test/unittest/stack/tst.stack_fbt.x86_64.r
@@ -0,0 +1,13 @@
+ FUNCTION:NAME
+ :BEGIN
+ __vfs_write:entry
+ vmlinux`__vfs_write+{ptr}
+ vmlinux`ksys_write+{ptr}
+ vmlinux`__x64_sys_write+{ptr}
+ vmlinux`do_syscall_64+{ptr}
+ vmlinux`entry_SYSCALL_64+{ptr}
+
+
+-- @@stderr --
+dtrace: script 'test/unittest/stack/tst.stack_fbt.d' matched 2 probes
+dtrace: allowing destructive actions
--
2.18.4
More information about the DTrace-devel
mailing list