[DTrace-devel] [PATCH 2/7] Implement the sched provider
Kris Van Hees
kris.van.hees at oracle.com
Tue May 9 22:29:30 UTC 2023
Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
libdtrace/Build | 2 +
libdtrace/dt_open.c | 1 +
libdtrace/dt_prov_sched.c | 250 ++++++++++++++++++++++++++++++++++++++
libdtrace/dt_provider.h | 1 +
4 files changed, 254 insertions(+)
create mode 100644 libdtrace/dt_prov_sched.c
diff --git a/libdtrace/Build b/libdtrace/Build
index 483b2f02..299cb128 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -52,6 +52,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
dt_prov_proc.c \
dt_prov_profile.c \
dt_prov_rawtp.c \
+ dt_prov_sched.c \
dt_prov_sdt.c \
dt_prov_syscall.c \
dt_prov_uprobe.c \
@@ -95,6 +96,7 @@ dt_prov_fbt.c_CFLAGS := -Wno-pedantic
dt_prov_proc.c_CFLAGS := -Wno-pedantic
dt_prov_profile.c_CFLAGS := -Wno-pedantic
dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
+dt_prov_sched.c_CFLAGS := -Wno-pedantic
dt_prov_sdt.c_CFLAGS := -Wno-pedantic
dt_prov_syscall.c_CFLAGS := -Wno-pedantic
dt_prov_uprobe.c_CFLAGS := -Wno-pedantic
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 52e23fe5..04364807 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -70,6 +70,7 @@ static const dt_provimpl_t *dt_providers[] = {
&dt_proc,
&dt_profile,
&dt_rawtp,
+ &dt_sched,
&dt_sdt,
&dt_syscall,
&dt_uprobe,
diff --git a/libdtrace/dt_prov_sched.c b/libdtrace/dt_prov_sched.c
new file mode 100644
index 00000000..0308e212
--- /dev/null
+++ b/libdtrace/dt_prov_sched.c
@@ -0,0 +1,250 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ *
+ * The 'sched' SDT provider for DTrace specific probes.
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <bpf_asm.h>
+
+#include "dt_dctx.h"
+#include "dt_cg.h"
+#include "dt_bpf.h"
+#include "dt_provider.h"
+#include "dt_probe.h"
+#include "dt_pt_regs.h"
+
+static const char prvname[] = "sched";
+static const char modname[] = "vmlinux";
+
+/*
+ * Implement sched probes as dependent probes on other probes. E.g. for the
+ * on-cpu probe, call dt_probe_add_dependent() to add the on-cpu probe to the
+ * underlying fbt::schedule_tail:entry probe. When the program for probe
+ * fbt::schedule_tail:entry) is generated, loop through all dependent probes,
+ * and call the trampoline callback for each dependent probe to generate a
+ * pseudo-trampoline that converts the underlying probe into the dependent
+ * probe. Finally, code is generated to call each of the clauses of the
+ * dependent probe.
+ *
+ * The process of converting the main probe to the dependent probe needs to be
+ * done in a way that preserves the original probe data so that multiple
+ * dependent probes are possible (and each needs to be able to convert the
+ * main probe data into the proper dependent data using a pseudo-trampoline).
+ *
+ * The conversion needs to be able to implement predicate-style conditions that
+ * determine whether the dependent probe is to fire when the main probe does.
+ * The sched:::preempt and sched:::sleep probes are an example of dependent
+ * probes that need a conditional. If the condition fails, the main probe
+ * processing should move on to the next dependent probe (if any). If there
+ * are no more dependent probes, it will end up skipping to the exit label,
+ *
+ * Dependent probes need priorities for cases where two probes are dependent on
+ * the same underlying probe, but one needs to 'fire' before the other.
+ */
+
+typedef struct probe_arg {
+ const char *name; /* name of probe */
+ int argno; /* argument number */
+ dt_argdesc_t argdesc; /* argument description */
+} probe_arg_t;
+
+static probe_arg_t probe_args[] = {
+#if 0
+ { "change-pri", 0, { 0, "struct task_struct *", "lwpsinfo_t *" } },
+ { "change-pri", 1, { 0, "struct task_struct *", "psinfo_t *" } },
+ { "change-pri", 2, { 1, "int", } },
+ { "dequeue", 0, { 0, "struct task_struct *", "lwpsinfo_t *" } },
+ { "dequeue", 1, { 0, "struct task_struct *", "psinfo_t *" } },
+ { "dequeue", 2, { 1, "cpuinfo_t *", } },
+ { "dequeue", 3, { 2, "int", } },
+ { "enqueue", 0, { 0, "struct task_struct *", "lwpsinfo_t *" } },
+ { "enqueue", 1, { 0, "struct task_struct *", "psinfo_t *" } },
+ { "enqueue", 2, { 1, "cpuinfo_t *", } },
+ { "off-cpu", 0, { 0, "struct task_struct *", "lwpsinfo_t *" } },
+ { "off-cpu", 1, { 0, "struct task_struct *", "psinfo_t *" } },
+#endif
+ { "on-cpu", },
+ { "preempt", },
+ { "remain-cpu", },
+ { "sleep", },
+#if 0
+ { "surrender", 0, { 0, "struct task_struct *", "lwpsinfo_t *" } },
+ { "surrender", 1, { 0, "struct task_struct *", "psinfo_t *" } },
+ { "tick", 0, { 0, "struct task_struct *", "lwpsinfo_t *" } },
+ { "tick", 1, { 0, "struct task_struct *", "psinfo_t *" } },
+ { "wakeup", 0, { 0, "struct task_struct *", "lwpsinfo_t *" } },
+ { "wakeup", 1, { 0, "struct task_struct *", "psinfo_t *" } },
+#endif
+};
+
+static const dtrace_pattr_t pattr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
+/*
+ * Provide all the "sched" SDT probes.
+ */
+static int populate(dtrace_hdl_t *dtp)
+{
+ dt_provider_t *prv;
+ int i;
+ int n = 0;
+
+ prv = dt_provider_create(dtp, prvname, &dt_sched, &pattr);
+ if (prv == NULL)
+ return 0;
+
+ /*
+ * Create "sched" probes based on the probe_args list. Since each
+ * probe will have at least one entry (with argno == 0), we can use
+ * those entries to identify the probe names.
+ */
+ for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
+ probe_arg_t *arg = &probe_args[i];
+
+ if (arg->argno == 0 &&
+ dt_probe_insert(dtp, prv, prvname, modname, "", arg->name,
+ NULL))
+ n++;
+ }
+
+ return n;
+}
+
+static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp)
+{
+ dt_probe_t *uprp = NULL;
+ dtrace_probedesc_t pd;
+
+ if (strcmp(prp->desc->prb, "on-cpu") == 0) {
+ pd.id = DTRACE_IDNONE;
+ pd.prv = "fbt";
+ pd.mod = "";
+ pd.fun = "schedule_tail";
+ pd.prb = "entry";
+
+ uprp = dt_probe_lookup(dtp, &pd);
+ assert(uprp != NULL);
+
+ dt_probe_add_dependent(dtp, uprp, prp);
+ dt_probe_enable(dtp, uprp);
+ } else if (strcmp(prp->desc->prb, "preempt") == 0 ||
+ strcmp(prp->desc->prb, "sleep") == 0) {
+ pd.id = DTRACE_IDNONE;
+ pd.prv = "sdt";
+ pd.mod = "sched";
+ pd.fun = "";
+ pd.prb = "sched_switch";
+
+ uprp = dt_probe_lookup(dtp, &pd);
+ assert(uprp != NULL);
+
+ dt_probe_add_dependent(dtp, uprp, prp);
+ dt_probe_enable(dtp, uprp);
+ }
+
+ /*
+ * Finally, ensure we're in the list of enablings as well.
+ * (This ensures that, among other things, the probes map
+ * gains entries for us.)
+ */
+ if (!dt_in_list(&dtp->dt_enablings, prp))
+ dt_list_append(&dtp->dt_enablings, prp);
+}
+
+/*
+ * Generate a BPF trampoline for a SDT probe.
+ *
+ * The trampoline function is called when a SDT probe triggers, and it must
+ * satisfy the following prototype:
+ *
+ * int dt_proc(void *data)
+ *
+ * The trampoline will populate a dt_dctx_t struct and then call the function
+ * that implements the compiled D clause. It returns the value that it gets
+ * back from that function.
+ */
+static void trampoline(dt_pcb_t *pcb, uint_t exitlbl)
+{
+ dtrace_hdl_t *dtp = pcb->pcb_hdl;
+ dt_irlist_t *dlp = &pcb->pcb_ir;
+ dt_probe_t *prp = pcb->pcb_probe;
+
+ if (strcmp(prp->desc->prb, "preempt") == 0) {
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(3)));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JLT, BPF_REG_0, 1 << 8, pcb->pcb_exitlbl));
+ } else if (strcmp(prp->desc->prb, "sleep") == 0) {
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(3)));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JGE, BPF_REG_0, 1 << 8, pcb->pcb_exitlbl));
+ }
+}
+
+static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
+ int *argcp, dt_argdesc_t **argvp)
+{
+ int i;
+ int pidx = -1;
+ int argc = 0;
+ dt_argdesc_t *argv = NULL;
+
+ for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
+ probe_arg_t *arg = &probe_args[i];
+
+ if (strcmp(arg->name, prp->desc->prb) == 0) {
+ if (pidx == -1) {
+ pidx = i;
+
+ if (arg->argdesc.native == NULL)
+ break;
+ }
+
+ argc++;
+ }
+ }
+
+ if (argc == 0)
+ goto done;
+
+ argv = dt_zalloc(dtp, argc * sizeof(dt_argdesc_t));
+ if (!argv)
+ return -ENOMEM;
+
+ for (i = pidx; i < pidx + argc; i++) {
+ probe_arg_t *arg = &probe_args[i];
+
+ argv[arg->argno] = arg->argdesc;
+ }
+
+done:
+ *argcp = argc;
+ *argvp = argv;
+
+ return 0;
+}
+
+dt_provimpl_t dt_sched = {
+ .name = prvname,
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ .populate = &populate,
+ .enable = &enable,
+ .trampoline = &trampoline,
+ .probe_info = &probe_info,
+};
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index da1b4e57..f3b21f55 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -84,6 +84,7 @@ extern dt_provimpl_t dt_fbt;
extern dt_provimpl_t dt_proc;
extern dt_provimpl_t dt_profile;
extern dt_provimpl_t dt_rawtp;
+extern dt_provimpl_t dt_sched;
extern dt_provimpl_t dt_sdt;
extern dt_provimpl_t dt_syscall;
extern dt_provimpl_t dt_uprobe;
--
2.40.1
More information about the DTrace-devel
mailing list