[DTrace-devel] [PATCH v2 1/2] Implement the sched provider

Kris Van Hees kris.van.hees at oracle.com
Tue May 23 20:14:54 UTC 2023


On Tue, May 23, 2023 at 03:57:24PM -0400, Eugene Loh via DTrace-devel wrote:
> Still no tests in this patch?

Tests that need xfail removed are being collected - but I wanted to get the
code changes out first to be looked at for review.

> Should there be a comment that this is a partial implementation (not all
> probes)?

Yes, I'll list what is implemented in the commit msg.

> How do I map what I see in probes[] to the DTv1 DTRACE_SCHED()
> instrumentation points we used to have in kernel/sched/core.c? Or, if that
> is the wrong way to think about it, how about a few words about the change?

You cannot really map from one to the other.  In fact, other than the older
version being an source to look at during implementation, you shouldn't even
look at this as a change.  It is truly a new implementation of the sched SDT
probes.  It is more about functionality than doing the same thing or changing
anything.

I will (as suggested above) list that this is a limited implementation of the
provider.  Not all probes are provided and those that are are not all fully
implemented yet.  it is a work in progress as we incrementally add to the set
of probes.

> On 5/22/23 18:51, Kris Van Hees via DTrace-devel wrote:
> > Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> > ---
> >   libdtrace/Build           |   2 +
> >   libdtrace/dt_open.c       |   1 +
> >   libdtrace/dt_prov_sched.c | 140 ++++++++++++++++++++++++++++++++++++++
> >   libdtrace/dt_provider.h   |   1 +
> >   4 files changed, 144 insertions(+)
> >   create mode 100644 libdtrace/dt_prov_sched.c
> > 
> > diff --git a/libdtrace/Build b/libdtrace/Build
> > index cf18a043..89972c66 100644
> > --- a/libdtrace/Build
> > +++ b/libdtrace/Build
> > @@ -52,6 +52,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
> >   			  dt_prov_proc.c \
> >   			  dt_prov_profile.c \
> >   			  dt_prov_rawtp.c \
> > +			  dt_prov_sched.c \
> >   			  dt_prov_sdt.c \
> >   			  dt_prov_syscall.c \
> >   			  dt_prov_uprobe.c \
> > @@ -96,6 +97,7 @@ dt_prov_fbt.c_CFLAGS := -Wno-pedantic
> >   dt_prov_proc.c_CFLAGS := -Wno-pedantic
> >   dt_prov_profile.c_CFLAGS := -Wno-pedantic
> >   dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
> > +dt_prov_sched.c_CFLAGS := -Wno-pedantic
> >   dt_prov_sdt.c_CFLAGS := -Wno-pedantic
> >   dt_prov_syscall.c_CFLAGS := -Wno-pedantic
> >   dt_prov_uprobe.c_CFLAGS := -Wno-pedantic
> > diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> > index d13b859a..110d45bc 100644
> > --- a/libdtrace/dt_open.c
> > +++ b/libdtrace/dt_open.c
> > @@ -71,6 +71,7 @@ static const dt_provimpl_t *dt_providers[] = {
> >   	&dt_proc,
> >   	&dt_profile,
> >   	&dt_rawtp,
> > +	&dt_sched,
> >   	&dt_sdt,
> >   	&dt_syscall,
> >   	&dt_uprobe,
> > diff --git a/libdtrace/dt_prov_sched.c b/libdtrace/dt_prov_sched.c
> > new file mode 100644
> > index 00000000..c6a09747
> > --- /dev/null
> > +++ b/libdtrace/dt_prov_sched.c
> > @@ -0,0 +1,140 @@
> > +/*
> > + * Oracle Linux DTrace.
> > + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> > + * Licensed under the Universal Permissive License v 1.0 as shown at
> > + * http://oss.oracle.com/licenses/upl.
> > + *
> > + * The 'sched' SDT provider for DTrace-specific probes.
> > + */
> > +#include <assert.h>
> > +#include <errno.h>
> > +
> > +#include "dt_dctx.h"
> > +#include "dt_cg.h"
> > +#include "dt_provider_sdt.h"
> > +#include "dt_probe.h"
> > +
> > +static const char		prvname[] = "sched";
> > +static const char		modname[] = "vmlinux";
> > +
> > +static probe_dep_t	probes[] = {
> 
> How about some #if 0 for probes we do not yet implement? (Admittedly, that's
> maybe a useless idea since we don't have much meaningful to put into the #if
> 0 other than the unimplemented probe's name.)

That is why none are listed like that :)  See the probe_args for that kind of
detail.

> > +	{ "dequeue",
> > +	  DTRACE_PROBESPEC_NAME,	"fbt::dequeue_task_*:entry" },
> > +	{ "enqueue",
> > +	  DTRACE_PROBESPEC_NAME,	"fbt::enqueue_task_*:entry" },
> > +	{ "off-cpu",
> > +	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_switch" },
> > +	{ "on-cpu",
> > +	  DTRACE_PROBESPEC_NAME,	"fbt::schedule_tail:entry" },
> > +	{ "surrender",
> > +	  DTRACE_PROBESPEC_NAME,	"fbt::do_sched_yield:entry" },
> > +	{ "tick",
> > +	  DTRACE_PROBESPEC_NAME,	"fbt::scheduler_tick:entry" },
> > +	{ "wakeup",
> > +	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_wakeup" },
> > +	{ NULL, }
> > +};
> > +
> > +static probe_arg_t probe_args[] = {
> > +#if 0
> > +	{ "change-pri", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> > +	{ "change-pri", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> > +	{ "change-pri", 2, { 1, 0, "int", } },
> > +#endif
> > +	{ "dequeue", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> > +	{ "dequeue", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> > +	{ "dequeue", 2, { 1, 0, "cpuinfo_t *", } },
> > +	{ "enqueue", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> > +	{ "enqueue", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> > +	{ "enqueue", 2, { 1, 0, "cpuinfo_t *", } },
> > +	{ "dequeue", 3, { 2, 0, "int", } },
> > +	{ "off-cpu", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> > +	{ "off-cpu", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> > +	{ "on-cpu", },
> > +#if 0
> > +	{ "preempt", },
> > +	{ "remain-cpu", },
> > +	{ "sleep", },
> > +#endif
> > +	{ "surrender", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> > +	{ "surrender", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> > +	{ "tick", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> > +	{ "tick", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> > +	{ "wakeup", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> > +	{ "wakeup", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> > +	{ NULL, }
> > +};
> > +
> > +static const dtrace_pattr_t	pattr = {
> > +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> > +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> > +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> > +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> > +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> > +};
> > +
> > +/*
> > + * Provide all the "sched" SDT probes.
> > + */
> > +static int populate(dtrace_hdl_t *dtp)
> > +{
> > +	return dt_sdt_populate(dtp, prvname, modname, &dt_sched, &pattr,				       probe_args, probes);
> > +}
> 
> Missing line feed?

Oops, yes.

> > +
> > +/*
> > + * Generate a BPF trampoline for a SDT probe.
> > + *
> > + * The trampoline function is called when a SDT probe triggers, and it must
> > + * satisfy the following prototype:
> > + *
> > + *	int dt_sched(void *data)
> > + *
> > + * The trampoline will populate a dt_dctx_t struct and then call the function
> > + * that implements the compiled D clause.  It returns the value that it gets
> > + * back from that function.
> > + */
> > +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> > +{
> > +	dt_irlist_t	*dlp = &pcb->pcb_ir;
> > +	dt_probe_t	*prp = pcb->pcb_probe;
> > +
> > +	if (strcmp(prp->desc->prb, "dequeue") == 0) {
> > +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
> > +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> > +		/*
> > +		 * FIXME: arg1 should be a pointer to cpuinfo_t for the CPU
> > +		 *	  associated with the runqueue.
> > +		 */
> > +		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), 0));
> > +	} else if (strcmp(prp->desc->prb, "enqueue") == 0) {
> > +#define ENQUEUE_HEAD	0x10

As Nick suggested, I will document his.

> > +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
> > +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> > +		/*
> > +		 * FIXME: arg1 should be a pointer to cpuinfo_t for the CPU
> > +		 *	  associated with the runqueue.
> > +		 */
> > +		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), 0));
> > +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(2)));
> > +		emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_0, ENQUEUE_HEAD));
> > +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
> > +	} else if (strcmp(prp->desc->prb, "off-cpu") == 0) {
> > +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(2)));
> > +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> > +	} else if (strcmp(prp->desc->prb, "surrender") == 0 ||
> > +		   strcmp(prp->desc->prb, "tick") == 0) {
> > +		emit(dlp, BPF_CALL_HELPER(BPF_FUNC_get_current_task));
> > +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +dt_provimpl_t	dt_sched = {
> > +	.name		= prvname,
> > +	.prog_type	= BPF_PROG_TYPE_UNSPEC,
> > +	.populate	= &populate,
> > +	.enable		= &dt_sdt_enable,
> > +	.trampoline	= &trampoline,
> > +	.probe_info	= &dt_sdt_probe_info,
> > +};
> > diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> > index 068c65a2..252a73d4 100644
> > --- a/libdtrace/dt_provider.h
> > +++ b/libdtrace/dt_provider.h
> > @@ -73,6 +73,7 @@ extern dt_provimpl_t dt_fbt;
> >   extern dt_provimpl_t dt_proc;
> >   extern dt_provimpl_t dt_profile;
> >   extern dt_provimpl_t dt_rawtp;
> > +extern dt_provimpl_t dt_sched;
> >   extern dt_provimpl_t dt_sdt;
> >   extern dt_provimpl_t dt_syscall;
> >   extern dt_provimpl_t dt_uprobe;
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list