[DTrace-devel] [PATCH 1/8] proc: convert to use standard SDT provider implementation

Eugene Loh eugene.loh at oracle.com
Mon Mar 10 21:47:58 UTC 2025


Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
albeit a few minor comments on the commit message.

On 3/7/25 16:33, Kris Van Hees via DTrace-devel wrote:
> The prov provider was the first SDT-based provider implememted in this

s/prov /proc /
s/implememted/implemented/

> version, and therefore handled the enabling of probes with custom code.

"this version" seems rather ambiguous.  version of what?
How about s/this version/this port of DTrace to eBPF/ or something?

> When the other SDT-based providers (sched, ...) were implemented, a
> generic SDT-framework was developed.
>
> The proc provider now uses the SDT-framework.

"Now" sometimes means "up until this patch."  How about using the Linux 
convention of describing changes "in imperative mood."  E.g., "Switch 
the proc provider over to the SDT framework."

> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
>   libdtrace/dt_prov_proc.c | 316 +++++----------------------------------
>   1 file changed, 36 insertions(+), 280 deletions(-)
>
> diff --git a/libdtrace/dt_prov_proc.c b/libdtrace/dt_prov_proc.c
> index 2e514860..15fde6c9 100644
> --- a/libdtrace/dt_prov_proc.c
> +++ b/libdtrace/dt_prov_proc.c
> @@ -8,77 +8,45 @@
>    */
>   #include <assert.h>
>   #include <errno.h>
> -#include <stdio.h>
> -#include <stdlib.h>
> -#include <string.h>
> -#include <unistd.h>
> -#include <sys/ioctl.h>
> -#include <linux/bpf.h>
> -#include <linux/perf_event.h>
> -#include <sys/stat.h>
> -#include <sys/types.h>
> -
> -#include <bpf_asm.h>
>   
>   #include "dt_dctx.h"
>   #include "dt_cg.h"
> +#include "dt_provider_sdt.h"
>   #include "dt_probe.h"
> -#include "dt_pt_regs.h"
>   
>   static const char		prvname[] = "proc";
>   static const char		modname[] = "vmlinux";
>   
> -/*
> - * The proc-provider probes make use of probes that are already provided by
> - * other providers.  As such, the proc probes are 'dependent probes' because
> - * they depend on underlying probes to get triggered and they also depend on
> - * argument data provided by the underlying probe to manufacture their own
> - * arguments.
> - *
> - * As a type of SDT probes, proc probes are defined with a signature (list of
> - * arguments - possibly empty) that may use translator support to provide the
> - * actual argument values.  Therefore, obtaining the value of arguments for
> - * a proc probe goes through two layers of processing:
> - *
> - *  (1) the arguments of the underlying probe are reworked to match the
> - *	expected layout of raw arguments for the proc probe
> - *  (2) an argument mapping table (and supporting translators) is used to get
> - *	the value of an arguument based on the raw variable data of the proc
> - *	probe
> - *
> - * To accomplish this, proc probes generate a trampoline that rewrites the
> - * arguments of the underlying probe.  (The dependent probe support code in the
> - * underlying probe saves the arguments of the underying probe in the mstate
> - * before executing the trampoline and clauses of the dependent probe, and it
> - * restores them afterwards in case there are multiple dependent probes.)
> - *
> - * Because proc probes dependent on an underlying probe that may be too generic
> - * (e.g. proc:::exec-success depending on syscall::execve*:return), the
> - * trampoline code can include a pre-condition (much like a predicate) that can
> - * bypass execution unless the condition is met (e.g. proc:::exec-success
> - * requires syscall::execve*:return's arg1 to be 0).
> - *
> - * FIXME:
> - * The dependent probe support should include a priority specification to drive
> - * the order in which dependent probes are added to the underlying probe.  This
> - * is needed to enforce specific probe firing semantics (e.g. proc:::start must
> - * always precede proc:::lwp-start).
> - */
> -
> -typedef struct probe_arg {
> -	const char	*name;			/* name of probe */
> -	int		argno;			/* argument number */
> -	dt_argdesc_t	argdesc;		/* argument description */
> -} probe_arg_t;
> +static probe_dep_t	probes[] = {
> +	{ "create",
> +	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_process_fork" },
> +	{ "exec",
> +	  DTRACE_PROBESPEC_NAME,	"syscall::execve*:entry" },
> +	{ "exec-failure",
> +	  DTRACE_PROBESPEC_NAME,	"syscall::execve*:return" },
> +	{ "exec-success",
> +	  DTRACE_PROBESPEC_NAME,	"syscall::execve*:return" },
> +	{ "exit",
> +	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_process_exit" },
> +	{ "lwp-create",
> +	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_process_fork" },
> +	{ "lwp-exit",
> +	  DTRACE_PROBESPEC_NAME,	"rawtp:sched::sched_process_exit" },
> +	{ "lwp-start",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::schedule_tail:return" },
> +	{ "signal-clear",
> +	  DTRACE_PROBESPEC_NAME,	"syscall::rt_sigtimedwait:return" },
> +	{ "signal-discard",
> +	  DTRACE_PROBESPEC_NAME,	"rawtp:signal::signal_generate" },
> +	{ "signal-handle",
> +	  DTRACE_PROBESPEC_NAME,	"rawtp:signal::signal_deliver" },
> +	{ "signal-send",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::complete_signal:entry" },
> +	{ "start",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::schedule_tail:return" },
> +	{ NULL, }
> +};
>   
> -/*
> - * Probe signature specifications
> - *
> - * This table *must* group the arguments of probes.  I.e. the arguments of a
> - * given probe must be listed in consecutive records.
> - * A single probe entry that mentions only name of the probe indicates a probe
> - * that provides no arguments.
> - */
>   static probe_arg_t probe_args[] = {
>   	{ "create", 0, { 0, 0, "struct task_struct *", "psinfo_t *" } },
>   	{ "exec", 0, { 0, DT_NF_USERLAND, "string", } },
> @@ -100,6 +68,7 @@ static probe_arg_t probe_args[] = {
>   	{ "signal-send", 1, { 0, 0, "struct task_struct *", "psinfo_t *" } },
>   	{ "signal-send", 2, { 1, 0, "int", } },
>   	{ "start", },
> +	{ NULL, },
>   };
>   
>   static const dtrace_pattr_t	pattr = {
> @@ -115,173 +84,8 @@ static const dtrace_pattr_t	pattr = {
>    */
>   static int populate(dtrace_hdl_t *dtp)
>   {
> -	dt_provider_t	*prv;
> -	int		i;
> -	int		n = 0;
> -
> -	prv = dt_provider_create(dtp, prvname, &dt_proc, &pattr, NULL);
> -	if (prv == NULL)
> -		return -1;			/* errno already set */
> -
> -	/*
> -	 * Create "proc" probes based on the probe_args list.  Since each probe
> -	 * will have at least one entry (with argno == 0), we can use those
> -	 * entries to identify the probe names.
> -	 */
> -	for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
> -		probe_arg_t	*arg = &probe_args[i];
> -
> -		if (arg->argno == 0 &&
> -		    dt_probe_insert(dtp, prv, prvname, modname, "", arg->name,
> -				    NULL))
> -			n++;
> -	}
> -
> -	return n;
> -}
> -
> -static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp)
> -{
> -	dt_probe_t		*uprp = NULL;
> -	dtrace_probedesc_t	pd;
> -
> -	if (strcmp(prp->desc->prb, "create") == 0 ||
> -	    strcmp(prp->desc->prb, "lwp-create") == 0) {
> -		pd.id = DTRACE_IDNONE;
> -		pd.prv = "rawtp";
> -		pd.mod = "sched";
> -		pd.fun = "";
> -		pd.prb = "sched_process_fork";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -	} else if (strcmp(prp->desc->prb, "exec") == 0) {
> -		pd.id = DTRACE_IDNONE;
> -		pd.prv = "syscall";
> -		pd.mod = "";
> -		pd.fun = "execve";
> -		pd.prb = "entry";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -
> -		pd.fun = "execveat";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -	} else if (strcmp(prp->desc->prb, "exec-failure") == 0 ||
> -		   strcmp(prp->desc->prb, "exec-success") == 0) {
> -		pd.id = DTRACE_IDNONE;
> -		pd.prv = "syscall";
> -		pd.mod = "";
> -		pd.fun = "execve";
> -		pd.prb = "return";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -
> -		pd.fun = "execveat";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -	} else if (strcmp(prp->desc->prb, "exit") == 0 ||
> -		   strcmp(prp->desc->prb, "lwp-exit") == 0) {
> -		pd.id = DTRACE_IDNONE;
> -		pd.prv = "rawtp";
> -		pd.mod = "";
> -		pd.fun = "";
> -		pd.prb = "sched_process_exit";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -	} else if (strcmp(prp->desc->prb, "signal-clear") == 0) {
> -		pd.id = DTRACE_IDNONE;
> -		pd.prv = "syscall";
> -		pd.mod = "";
> -		pd.fun = "rt_sigtimedwait";
> -		pd.prb = "return";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -	} else if (strcmp(prp->desc->prb, "signal-discard") == 0) {
> -		pd.id = DTRACE_IDNONE;
> -		pd.prv = "rawtp";
> -		pd.mod = "signal";
> -		pd.fun = "";
> -		pd.prb = "signal_generate";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -	} else if (strcmp(prp->desc->prb, "signal-handle") == 0) {
> -		pd.id = DTRACE_IDNONE;
> -		pd.prv = "rawtp";
> -		pd.mod = "signal";
> -		pd.fun = "";
> -		pd.prb = "signal_deliver";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -	} else if (strcmp(prp->desc->prb, "signal-send") == 0) {
> -		pd.id = DTRACE_IDNONE;
> -		pd.prv = "fbt";
> -		pd.mod = "";
> -		pd.fun = "complete_signal";
> -		pd.prb = "entry";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -	} else if (strcmp(prp->desc->prb, "start") == 0 ||
> -		   strcmp(prp->desc->prb, "lwp-start") == 0) {
> -		pd.id = DTRACE_IDNONE;
> -		pd.prv = "fbt";
> -		pd.mod = "";
> -		pd.fun = "schedule_tail";
> -		pd.prb = "return";
> -
> -		uprp = dt_probe_lookup(dtp, &pd);
> -		assert(uprp != NULL);
> -
> -		dt_probe_add_dependent(dtp, uprp, prp);
> -		dt_probe_enable(dtp, uprp);
> -	}
> -
> -	/*
> -	 * Finally, ensure we're in the list of enablings as well.
> -	 * (This ensures that, among other things, the probes map
> -	 * gains entries for us.)
> -	 */
> -	if (!dt_in_list(&dtp->dt_enablings, prp))
> -		dt_list_append(&dtp->dt_enablings, prp);
> +	return dt_sdt_populate(dtp, prvname, modname, &dt_proc, &pattr,
> +			       probe_args, probes);
>   }
>   
>   /*
> @@ -434,61 +238,13 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
>   	return 0;
>   }
>   
> -static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
> -		      int *argcp, dt_argdesc_t **argvp)
> -{
> -	int		i;
> -	int		pidx = -1;
> -	int		argc = 0;
> -	dt_argdesc_t	*argv = NULL;
> -
> -	for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
> -		probe_arg_t	*arg = &probe_args[i];
> -
> -		if (strcmp(arg->name, prp->desc->prb) == 0) {
> -			if (pidx == -1) {
> -				pidx = i;
> -
> -				if (arg->argdesc.native == NULL)
> -					break;
> -			}
> -
> -			argc++;
> -		}
> -	}
> -
> -	if (argc == 0)
> -		goto done;
> -
> -	argv = dt_zalloc(dtp, argc * sizeof(dt_argdesc_t));
> -	if (!argv)
> -		return -ENOMEM;
> -
> -	for (i = pidx; i < pidx + argc; i++) {
> -		probe_arg_t	*arg = &probe_args[i];
> -		dt_argdesc_t	*argd = &arg->argdesc;
> -		dt_argdesc_t	*parg = &argv[arg->argno];
> -
> -		*parg = *argd;
> -		if (argd->native)
> -			parg->native = strdup(argd->native);
> -		if (argd->xlate)
> -			parg->xlate = strdup(argd->xlate);
> -	}
> -
> -done:
> -	*argcp = argc;
> -	*argvp = argv;
> -
> -	return 0;
> -}
> -
>   dt_provimpl_t	dt_proc = {
>   	.name		= prvname,
>   	.prog_type	= BPF_PROG_TYPE_UNSPEC,
>   	.populate	= &populate,
> -	.enable		= &enable,
> +	.enable		= &dt_sdt_enable,
>   	.load_prog	= &dt_bpf_prog_load,
>   	.trampoline	= &trampoline,
> -	.probe_info	= &probe_info,
> +	.probe_info	= &dt_sdt_probe_info,
> +	.destroy	= &dt_sdt_destroy,
>   };



More information about the DTrace-devel mailing list