[DTrace-devel] [PATCH v2 1/2] Implement the sched provider

Eugene Loh eugene.loh at oracle.com
Tue May 23 19:57:24 UTC 2023


Still no tests in this patch?

Should there be a comment that this is a partial implementation (not all 
probes)?

How do I map what I see in probes[] to the DTv1 DTRACE_SCHED() 
instrumentation points we used to have in kernel/sched/core.c? Or, if 
that is the wrong way to think about it, how about a few words about the 
change?

On 5/22/23 18:51, Kris Van Hees via DTrace-devel wrote:
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
>   libdtrace/Build           |   2 +
>   libdtrace/dt_open.c       |   1 +
>   libdtrace/dt_prov_sched.c | 140 ++++++++++++++++++++++++++++++++++++++
>   libdtrace/dt_provider.h   |   1 +
>   4 files changed, 144 insertions(+)
>   create mode 100644 libdtrace/dt_prov_sched.c
>
> diff --git a/libdtrace/Build b/libdtrace/Build
> index cf18a043..89972c66 100644
> --- a/libdtrace/Build
> +++ b/libdtrace/Build
> @@ -52,6 +52,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
>   			  dt_prov_proc.c \
>   			  dt_prov_profile.c \
>   			  dt_prov_rawtp.c \
> +			  dt_prov_sched.c \
>   			  dt_prov_sdt.c \
>   			  dt_prov_syscall.c \
>   			  dt_prov_uprobe.c \
> @@ -96,6 +97,7 @@ dt_prov_fbt.c_CFLAGS := -Wno-pedantic
>   dt_prov_proc.c_CFLAGS := -Wno-pedantic
>   dt_prov_profile.c_CFLAGS := -Wno-pedantic
>   dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
> +dt_prov_sched.c_CFLAGS := -Wno-pedantic
>   dt_prov_sdt.c_CFLAGS := -Wno-pedantic
>   dt_prov_syscall.c_CFLAGS := -Wno-pedantic
>   dt_prov_uprobe.c_CFLAGS := -Wno-pedantic
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index d13b859a..110d45bc 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -71,6 +71,7 @@ static const dt_provimpl_t *dt_providers[] = {
>   	&dt_proc,
>   	&dt_profile,
>   	&dt_rawtp,
> +	&dt_sched,
>   	&dt_sdt,
>   	&dt_syscall,
>   	&dt_uprobe,
> diff --git a/libdtrace/dt_prov_sched.c b/libdtrace/dt_prov_sched.c
> new file mode 100644
> index 00000000..c6a09747
> --- /dev/null
> +++ b/libdtrace/dt_prov_sched.c
> @@ -0,0 +1,140 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The 'sched' SDT provider for DTrace-specific probes.
> + */
> +#include <assert.h>
> +#include <errno.h>
> +
> +#include "dt_dctx.h"
> +#include "dt_cg.h"
> +#include "dt_provider_sdt.h"
> +#include "dt_probe.h"
> +
> +static const char		prvname[] = "sched";
> +static const char		modname[] = "vmlinux";
> +
> +static probe_dep_t	probes[] = {

How about some #if 0 for probes we do not yet implement? (Admittedly, 
that's maybe a useless idea since we don't have much meaningful to put 
into the #if 0 other than the unimplemented probe's name.)

> +	{ "dequeue",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::dequeue_task_*:entry" },
> +	{ "enqueue",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::enqueue_task_*:entry" },
> +	{ "off-cpu",
> +	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_switch" },
> +	{ "on-cpu",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::schedule_tail:entry" },
> +	{ "surrender",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::do_sched_yield:entry" },
> +	{ "tick",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::scheduler_tick:entry" },
> +	{ "wakeup",
> +	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_wakeup" },
> +	{ NULL, }
> +};
> +
> +static probe_arg_t probe_args[] = {
> +#if 0
> +	{ "change-pri", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> +	{ "change-pri", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> +	{ "change-pri", 2, { 1, 0, "int", } },
> +#endif
> +	{ "dequeue", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> +	{ "dequeue", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> +	{ "dequeue", 2, { 1, 0, "cpuinfo_t *", } },
> +	{ "enqueue", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> +	{ "enqueue", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> +	{ "enqueue", 2, { 1, 0, "cpuinfo_t *", } },
> +	{ "dequeue", 3, { 2, 0, "int", } },
> +	{ "off-cpu", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> +	{ "off-cpu", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> +	{ "on-cpu", },
> +#if 0
> +	{ "preempt", },
> +	{ "remain-cpu", },
> +	{ "sleep", },
> +#endif
> +	{ "surrender", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> +	{ "surrender", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> +	{ "tick", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> +	{ "tick", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> +	{ "wakeup", 0, { 0, 0, "struct task_struct *", "lwpsinfo_t *" } },
> +	{ "wakeup", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
> +	{ NULL, }
> +};
> +
> +static const dtrace_pattr_t	pattr = {
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +};
> +
> +/*
> + * Provide all the "sched" SDT probes.
> + */
> +static int populate(dtrace_hdl_t *dtp)
> +{
> +	return dt_sdt_populate(dtp, prvname, modname, &dt_sched, &pattr,				       probe_args, probes);
> +}

Missing line feed?

> +
> +/*
> + * Generate a BPF trampoline for a SDT probe.
> + *
> + * The trampoline function is called when a SDT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + *	int dt_sched(void *data)
> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause.  It returns the value that it gets
> + * back from that function.
> + */
> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> +	dt_irlist_t	*dlp = &pcb->pcb_ir;
> +	dt_probe_t	*prp = pcb->pcb_probe;
> +
> +	if (strcmp(prp->desc->prb, "dequeue") == 0) {
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
> +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +		/*
> +		 * FIXME: arg1 should be a pointer to cpuinfo_t for the CPU
> +		 *	  associated with the runqueue.
> +		 */
> +		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), 0));
> +	} else if (strcmp(prp->desc->prb, "enqueue") == 0) {
> +#define ENQUEUE_HEAD	0x10
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
> +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +		/*
> +		 * FIXME: arg1 should be a pointer to cpuinfo_t for the CPU
> +		 *	  associated with the runqueue.
> +		 */
> +		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), 0));
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(2)));
> +		emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_0, ENQUEUE_HEAD));
> +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
> +	} else if (strcmp(prp->desc->prb, "off-cpu") == 0) {
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(2)));
> +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +	} else if (strcmp(prp->desc->prb, "surrender") == 0 ||
> +		   strcmp(prp->desc->prb, "tick") == 0) {
> +		emit(dlp, BPF_CALL_HELPER(BPF_FUNC_get_current_task));
> +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +	}
> +
> +	return 0;
> +}
> +
> +dt_provimpl_t	dt_sched = {
> +	.name		= prvname,
> +	.prog_type	= BPF_PROG_TYPE_UNSPEC,
> +	.populate	= &populate,
> +	.enable		= &dt_sdt_enable,
> +	.trampoline	= &trampoline,
> +	.probe_info	= &dt_sdt_probe_info,
> +};
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index 068c65a2..252a73d4 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -73,6 +73,7 @@ extern dt_provimpl_t dt_fbt;
>   extern dt_provimpl_t dt_proc;
>   extern dt_provimpl_t dt_profile;
>   extern dt_provimpl_t dt_rawtp;
> +extern dt_provimpl_t dt_sched;
>   extern dt_provimpl_t dt_sdt;
>   extern dt_provimpl_t dt_syscall;
>   extern dt_provimpl_t dt_uprobe;



More information about the DTrace-devel mailing list