[DTrace-devel] [PATCH 2/7] Implement the sched provider

Eugene Loh eugene.loh at oracle.com
Fri May 12 16:03:26 UTC 2023


First of all, if this patch implements a new provider, where are the 
tests?  E.g., for "dtrace -l", "dtrace -lv" (for typed args), and of 
course for actual probe firings.

Also, is this what the sched provider will look like, or shall we 
anticipate changes in the provider soon?

The patch mentions quite a few unimplemented probes.  Could it add 
comments on what the plans are for implementing those probes?

And...

On 5/9/23 18:29, Kris Van Hees via DTrace-devel wrote:
> diff --git a/libdtrace/dt_prov_sched.c b/libdtrace/dt_prov_sched.c
> @@ -0,0 +1,250 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The 'sched' SDT provider for DTrace specific probes.

Hyphenate "DTrace-specific".

> + */
> +
> +/*
> + * Provide all the "sched" SDT probes.
> + */

Okay, but the comment doesn't really say anything.

Also, is there an opportunity to share a lot of code for populate() with 
the lockstat and proc providers?

> +static int populate(dtrace_hdl_t *dtp)
> +{
> +	dt_provider_t	*prv;
> +	int		i;
> +	int		n = 0;
> +
> +	prv = dt_provider_create(dtp, prvname, &dt_sched, &pattr);
> +	if (prv == NULL)
> +		return 0;
> +
> +	/*
> +	 * Create "sched" probes based on the probe_args list.  Since each
> +	 * probe will have at least one entry (with argno == 0), we can use
> +	 * those entries to identify the probe names.
> +	 */
> +	for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
> +		probe_arg_t	*arg = &probe_args[i];
> +
> +		if (arg->argno == 0 &&
> +		    dt_probe_insert(dtp, prv, prvname, modname, "", arg->name,
> +				    NULL))
> +			n++;
> +	}
> +
> +	return n;
> +}
> +
> +static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp)

Is this what the enable() function will look like, or are some changes 
anticipated for it?  Any chance for much code sharing with the lockstat 
provider?

> +{
> +	dt_probe_t		*uprp = NULL;
> +	dtrace_probedesc_t	pd;
> +
> +	if (strcmp(prp->desc->prb, "on-cpu") == 0) {
> +		pd.id = DTRACE_IDNONE;
> +		pd.prv = "fbt";
> +		pd.mod = "";
> +		pd.fun = "schedule_tail";
> +		pd.prb = "entry";
> +
> +		uprp = dt_probe_lookup(dtp, &pd);
> +		assert(uprp != NULL);
> +
> +		dt_probe_add_dependent(dtp, uprp, prp);
> +		dt_probe_enable(dtp, uprp);
> +	} else if (strcmp(prp->desc->prb, "preempt") == 0 ||
> +		   strcmp(prp->desc->prb, "sleep") == 0) {
> +		pd.id = DTRACE_IDNONE;
> +		pd.prv = "sdt";
> +		pd.mod = "sched";
> +		pd.fun = "";
> +		pd.prb = "sched_switch";
> +
> +		uprp = dt_probe_lookup(dtp, &pd);
> +		assert(uprp != NULL);
> +
> +		dt_probe_add_dependent(dtp, uprp, prp);
> +		dt_probe_enable(dtp, uprp);
> +	}
> +
> +	/*
> +	 * Finally, ensure we're in the list of enablings as well.
> +	 * (This ensures that, among other things, the probes map
> +	 * gains entries for us.)
> +	 */
> +	if (!dt_in_list(&dtp->dt_enablings, prp))
> +		dt_list_append(&dtp->dt_enablings, prp);
> +}
> +
> +/*
> + * Generate a BPF trampoline for a SDT probe.
> + *
> + * The trampoline function is called when a SDT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + *	int dt_proc(void *data)
> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause.  It returns the value that it gets
> + * back from that function.
> + */
> +static void trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> +	dtrace_hdl_t	*dtp = pcb->pcb_hdl;

Is dtp used?

> +	dt_irlist_t	*dlp = &pcb->pcb_ir;
> +	dt_probe_t	*prp = pcb->pcb_probe;
> +
> +	if (strcmp(prp->desc->prb, "preempt") == 0) {
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(3)));
> +                emit(dlp, BPF_BRANCH_IMM(BPF_JLT, BPF_REG_0, 1 << 8, pcb->pcb_exitlbl));
> +	} else if (strcmp(prp->desc->prb, "sleep") == 0) {
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(3)));
> +                emit(dlp, BPF_BRANCH_IMM(BPF_JGE, BPF_REG_0, 1 << 8, pcb->pcb_exitlbl));
> +	}

Inconsistent use of space/tab for indentation.

> +}
> +
> +static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
> +		      int *argcp, dt_argdesc_t **argvp)

Opportunity to share code for this function with lockstat and proc 
providers?

> +{
> +	int		i;
> +	int		pidx = -1;
> +	int		argc = 0;
> +	dt_argdesc_t	*argv = NULL;
> +
> +	for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
> +		probe_arg_t	*arg = &probe_args[i];
> +
> +		if (strcmp(arg->name, prp->desc->prb) == 0) {
> +			if (pidx == -1) {
> +				pidx = i;
> +
> +				if (arg->argdesc.native == NULL)
> +					break;
> +			}
> +
> +			argc++;
> +		}
> +	}
> +
> +	if (argc == 0)
> +		goto done;
> +
> +	argv = dt_zalloc(dtp, argc * sizeof(dt_argdesc_t));
> +	if (!argv)
> +		return -ENOMEM;
> +
> +	for (i = pidx; i < pidx + argc; i++) {
> +		probe_arg_t	*arg = &probe_args[i];
> +
> +		argv[arg->argno] = arg->argdesc;
> +	}
> +
> +done:
> +	*argcp = argc;
> +	*argvp = argv;
> +
> +	return 0;
> +}
> +
> +dt_provimpl_t	dt_sched = {
> +	.name		= prvname,
> +	.prog_type	= BPF_PROG_TYPE_UNSPEC,
> +	.populate	= &populate,
> +	.enable		= &enable,
> +	.trampoline	= &trampoline,
> +	.probe_info	= &probe_info,
> +};
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index da1b4e57..f3b21f55 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -84,6 +84,7 @@ extern dt_provimpl_t dt_fbt;
>   extern dt_provimpl_t dt_proc;
>   extern dt_provimpl_t dt_profile;
>   extern dt_provimpl_t dt_rawtp;
> +extern dt_provimpl_t dt_sched;
>   extern dt_provimpl_t dt_sdt;
>   extern dt_provimpl_t dt_syscall;
>   extern dt_provimpl_t dt_uprobe;



More information about the DTrace-devel mailing list