[DTrace-devel] [PATCH v3 1/2] Implement the sched provider (first part)

Kris Van Hees kris.van.hees at oracle.com
Wed May 24 06:07:58 UTC 2023


Partial implementation of the sched SDT provider.  The following probes
are implemented:

sched:::dequeue (arg1 is NULL)
sched:::enqueue (arg1 is NULL)
sched:::off-cpu
sched:::on-cpu (limited trigger locations)
sched:::surrender
sched:::tick
sched:::wakeup

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
Reviewed-by: Nick Alcock <nick.alcock at oracle.com>
---
 libdtrace/Build                        |   2 +
 libdtrace/dt_open.c                    |   1 +
 libdtrace/dt_prov_sched.c              | 151 +++++++++++++++++++++++++
 libdtrace/dt_provider.h                |   1 +
 test/demo/sched/dtrace.d               |   1 -
 test/demo/sched/howlong.d              |   1 -
 test/demo/sched/qlen.d                 |   1 -
 test/demo/sched/qtime.d                |   1 -
 test/demo/sched/tick.d                 |   1 -
 test/demo/sched/ticktime.d             |   1 -
 test/demo/sched/where.d                |   1 -
 test/demo/sched/whoqueue.d             |   1 -
 test/demo/sched/whosteal.d             |   1 -
 test/unittest/buffering/tst.cputime.sh |   1 -
 14 files changed, 155 insertions(+), 10 deletions(-)
 create mode 100644 libdtrace/dt_prov_sched.c

diff --git a/libdtrace/Build b/libdtrace/Build
index cf18a043..89972c66 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -52,6 +52,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
 			  dt_prov_proc.c \
 			  dt_prov_profile.c \
 			  dt_prov_rawtp.c \
+			  dt_prov_sched.c \
 			  dt_prov_sdt.c \
 			  dt_prov_syscall.c \
 			  dt_prov_uprobe.c \
@@ -96,6 +97,7 @@ dt_prov_fbt.c_CFLAGS := -Wno-pedantic
 dt_prov_proc.c_CFLAGS := -Wno-pedantic
 dt_prov_profile.c_CFLAGS := -Wno-pedantic
 dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
+dt_prov_sched.c_CFLAGS := -Wno-pedantic
 dt_prov_sdt.c_CFLAGS := -Wno-pedantic
 dt_prov_syscall.c_CFLAGS := -Wno-pedantic
 dt_prov_uprobe.c_CFLAGS := -Wno-pedantic
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index d13b859a..110d45bc 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -71,6 +71,7 @@ static const dt_provimpl_t *dt_providers[] = {
 	&dt_proc,
 	&dt_profile,
 	&dt_rawtp,
+	&dt_sched,
 	&dt_sdt,
 	&dt_syscall,
 	&dt_uprobe,
diff --git a/libdtrace/dt_prov_sched.c b/libdtrace/dt_prov_sched.c
new file mode 100644
index 00000000..8703c936
--- /dev/null
+++ b/libdtrace/dt_prov_sched.c
@@ -0,0 +1,151 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ *
+ * The 'sched' SDT provider for DTrace-specific probes.
+ */
+#include <assert.h>
+#include <errno.h>
+
+#include "dt_dctx.h"
+#include "dt_cg.h"
+#include "dt_provider_sdt.h"
+#include "dt_probe.h"
+
+static const char		prvname[] = "sched";
+static const char		modname[] = "vmlinux";
+
+static probe_dep_t	probes[] = {
+	{ "dequeue",
+	  DTRACE_PROBESPEC_NAME,	"fbt::dequeue_task_*:entry" },
+	{ "enqueue",
+	  DTRACE_PROBESPEC_NAME,	"fbt::enqueue_task_*:entry" },
+	{ "off-cpu",
+	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_switch" },
+	{ "on-cpu",
+	  DTRACE_PROBESPEC_NAME,	"fbt::schedule_tail:entry" },
+	{ "surrender",
+	  DTRACE_PROBESPEC_NAME,	"fbt::do_sched_yield:entry" },
+	{ "tick",
+	  DTRACE_PROBESPEC_NAME,	"fbt::scheduler_tick:entry" },
+	{ "wakeup",
+	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_wakeup" },
+	{ NULL, }
+};
+
+static probe_arg_t probe_args[] = {
+#if 0
+	{ "change-pri", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
+	{ "change-pri", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
+	{ "change-pri", 2, { 1, 0, "int", } },
+#endif
+	{ "dequeue", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
+	{ "dequeue", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
+	{ "dequeue", 2, { 1, 0, "cpuinfo_t *", } },
+	{ "dequeue", 3, { 2, 0, "int", } },
+	{ "enqueue", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
+	{ "enqueue", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
+	{ "enqueue", 2, { 1, 0, "cpuinfo_t *", } },
+	{ "off-cpu", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
+	{ "off-cpu", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
+	{ "on-cpu", },
+#if 0
+	{ "preempt", },
+	{ "remain-cpu", },
+	{ "sleep", },
+#endif
+	{ "surrender", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
+	{ "surrender", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
+	{ "tick", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
+	{ "tick", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
+	{ "wakeup", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
+	{ "wakeup", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
+	{ NULL, }
+};
+
+static const dtrace_pattr_t	pattr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
+/*
+ * Provide all the "sched" SDT probes.
+ */
+static int populate(dtrace_hdl_t *dtp)
+{
+	return dt_sdt_populate(dtp, prvname, modname, &dt_sched, &pattr,
+			       probe_args, probes);
+}
+
+/*
+ * Generate a BPF trampoline for a SDT probe.
+ *
+ * The trampoline function is called when a SDT probe triggers, and it must
+ * satisfy the following prototype:
+ *
+ *	int dt_sched(void *data)
+ *
+ * The trampoline will populate a dt_dctx_t struct and then call the function
+ * that implements the compiled D clause.  It returns the value that it gets
+ * back from that function.
+ */
+static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
+{
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	dt_probe_t	*prp = pcb->pcb_probe;
+
+	if (strcmp(prp->desc->prb, "dequeue") == 0) {
+		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
+		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+		/*
+		 * FIXME: arg1 should be a pointer to cpuinfo_t for the CPU
+		 *	  associated with the runqueue.
+		 */
+		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), 0));
+	} else if (strcmp(prp->desc->prb, "enqueue") == 0) {
+/*
+ * This is ugly but necessary...  enqueue_task() takes a flags argument and the
+ * ENQUEUE_HEAD flag is used to indicate that the task is to be placed at the
+ * head of the queue.  We need to be able to pass this special case as arg2
+ * in the enqueue probe.
+ *
+ * The flag values are found in kernel/sched/sched.h which is not exposed
+ * outside the kernel source tree.
+ */
+#define ENQUEUE_HEAD	0x10
+
+		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
+		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+		/*
+		 * FIXME: arg1 should be a pointer to cpuinfo_t for the CPU
+		 *	  associated with the runqueue.
+		 */
+		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), 0));
+		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(2)));
+		emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_0, ENQUEUE_HEAD));
+		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
+	} else if (strcmp(prp->desc->prb, "off-cpu") == 0) {
+		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(2)));
+		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+	} else if (strcmp(prp->desc->prb, "surrender") == 0 ||
+		   strcmp(prp->desc->prb, "tick") == 0) {
+		emit(dlp, BPF_CALL_HELPER(BPF_FUNC_get_current_task));
+		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+	}
+
+	return 0;
+}
+
+dt_provimpl_t	dt_sched = {
+	.name		= prvname,
+	.prog_type	= BPF_PROG_TYPE_UNSPEC,
+	.populate	= &populate,
+	.enable		= &dt_sdt_enable,
+	.trampoline	= &trampoline,
+	.probe_info	= &dt_sdt_probe_info,
+};
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index 068c65a2..252a73d4 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -73,6 +73,7 @@ extern dt_provimpl_t dt_fbt;
 extern dt_provimpl_t dt_proc;
 extern dt_provimpl_t dt_profile;
 extern dt_provimpl_t dt_rawtp;
+extern dt_provimpl_t dt_sched;
 extern dt_provimpl_t dt_sdt;
 extern dt_provimpl_t dt_syscall;
 extern dt_provimpl_t dt_uprobe;
diff --git a/test/demo/sched/dtrace.d b/test/demo/sched/dtrace.d
index cedffb53..a1e9b4a8 100644
--- a/test/demo/sched/dtrace.d
+++ b/test/demo/sched/dtrace.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 #pragma D option quiet
 
diff --git a/test/demo/sched/howlong.d b/test/demo/sched/howlong.d
index f68239e9..ace1b8fc 100644
--- a/test/demo/sched/howlong.d
+++ b/test/demo/sched/howlong.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 sched:::off-cpu
 /curlwpsinfo->pr_state == SSLEEP/
diff --git a/test/demo/sched/qlen.d b/test/demo/sched/qlen.d
index 029cbbf6..95025bd6 100644
--- a/test/demo/sched/qlen.d
+++ b/test/demo/sched/qlen.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 sched:::enqueue
 {
diff --git a/test/demo/sched/qtime.d b/test/demo/sched/qtime.d
index ba010f3e..344a88ea 100644
--- a/test/demo/sched/qtime.d
+++ b/test/demo/sched/qtime.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 sched:::enqueue
 {
diff --git a/test/demo/sched/tick.d b/test/demo/sched/tick.d
index 54d18d70..57376f1f 100644
--- a/test/demo/sched/tick.d
+++ b/test/demo/sched/tick.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 sched:::tick,
 sched:::enqueue
diff --git a/test/demo/sched/ticktime.d b/test/demo/sched/ticktime.d
index 3fd0bfbc..dca76dba 100644
--- a/test/demo/sched/ticktime.d
+++ b/test/demo/sched/ticktime.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 uint64_t last[int];
 
diff --git a/test/demo/sched/where.d b/test/demo/sched/where.d
index 8845610d..906920d0 100644
--- a/test/demo/sched/where.d
+++ b/test/demo/sched/where.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 sched:::on-cpu
 {
diff --git a/test/demo/sched/whoqueue.d b/test/demo/sched/whoqueue.d
index 3057600c..1eb3d1b4 100644
--- a/test/demo/sched/whoqueue.d
+++ b/test/demo/sched/whoqueue.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 #pragma D option quiet
 #pragma D option nspec=4
diff --git a/test/demo/sched/whosteal.d b/test/demo/sched/whosteal.d
index 5ac091b3..5287dd99 100644
--- a/test/demo/sched/whosteal.d
+++ b/test/demo/sched/whosteal.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 #pragma D option quiet
 
diff --git a/test/unittest/buffering/tst.cputime.sh b/test/unittest/buffering/tst.cputime.sh
index cde1fba0..6a420e8d 100755
--- a/test/unittest/buffering/tst.cputime.sh
+++ b/test/unittest/buffering/tst.cputime.sh
@@ -6,7 +6,6 @@
 # http://oss.oracle.com/licenses/upl.
 #
 # @@timeout: 12
-# @@xfail: dtv2
 
 script()
 {
-- 
2.40.1




More information about the DTrace-devel mailing list