[DTrace-devel] [PATCH 6/8] fbt: performance improvements

Kris Van Hees kris.van.hees at oracle.com
Wed Mar 12 05:33:46 UTC 2025


On Wed, Mar 12, 2025 at 01:17:51AM -0400, Eugene Loh wrote:
> Sorry for the slow progress.  Anyhow, with this patch, I get failures on
> test/unittest/lockstat/tst.lockstat-summary.d on x86 UEK7 systems.  Stuff
> like:
> 
>         dtrace: could not enable tracing: BPF program load for
> 'fbt:vmlinux:native_queued_spin_lock_slowp: Invalid argument
> 
> Well, in dt_prov_lockstat.c, I see:
>         { "spin-spin", DTRACE_PROBESPEC_FUNC, "fbt::queued_spin_lock_*" },
>         { "spin-spin", DTRACE_PROBESPEC_FUNC,
> "fbt::native_queued_spin_lock_*" },
> 
> And on those problematic systems, I see:
>         $ sudo build/run-dtrace -lP fbt |& grep native_queued
>         98429        fbt           vmlinux native_queued_spin_lock_slowpath
> return
>         98428        fbt           vmlinux native_queued_spin_lock_slowpath
> entry
>          9433        fbt           vmlinux
> native_queued_spin_lock_slowpath.part.0 return
>          9432        fbt           vmlinux
> native_queued_spin_lock_slowpath.part.0 entry
> 
> In contrast, on UEK8, "dtrace -lP fbt" does not include the .part.0 probes.
> 
> Back on the UEK7 systems, if I modify dt_prov_lockstat.c like this:
>     -   { "spin-spin", DTRACE_PROBESPEC_FUNC,
> "fbt::native_queued_spin_lock_*" },
>     +   { "spin-spin", DTRACE_PROBESPEC_FUNC,
> "fbt::native_queued_spin_lock_slowpath" },
> the test passes.
> 
> Is that the right change to make?  If so, shall I submit a patch, or should
> it go with your patch series?

No change should be needed to the lockstat provider.  You uncovered a bug
in my patch - I'll fix it.

In short, while the provide() function filters out function names that have
a '.' in them for the pdp->fun specification string, it fails to do so for
the case where globbing requires us to loop over possible function names that
could match pdp->fun (or all function names if pdp->fun == "").  We need to
exclude names with "." in them there also because the FBT provider does not
allow probing of those synmbols.

I think I'll just defer that check to provide_probe() since I can do it in a
single place for all cases.

Incidentally, I also just noticed that dt_modsym_mark_traceable(dtp); is being
done too early.  We only really need that to be done once we get to looking at
function symbols.  I'll move it - that way we avoid marking function traceable
for probes that cannot be FBT probes because of probe name or module name.

I'll send out a v2 tomorrow with that fix.

> On 3/7/25 16:34, Kris Van Hees wrote:
> > Up until now, FBT probes were registered for every symbol that was
> > listed as traceable.  Most tracing session do not use most or even
> > any of these, and the process of registering them all was quite
> > slow.
> > 
> > Going forward, FBT probes are registered on demand.
> > 
> > If any FBT probes are to be registered, the first will incur the
> > cost of reading the entire list of traceable symbols.  Any further
> > FBT probe registration will be able to be satisfied based on that
> > initial processing.  The performance improvement is therefore quite
> > significant for tracing sessions that do not trigger any FBT probe
> > registration, and if FBT probes are used, the improvement is still
> > quite noticable because only the probes that are actually needed
> > get registered.
> > 
> > Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> > ---
> >   libdtrace/dt_module.c   |  78 +++++++++++++++
> >   libdtrace/dt_module.h   |   2 +
> >   libdtrace/dt_prov_fbt.c | 217 +++++++++++++++++++++++++++-------------
> >   3 files changed, 228 insertions(+), 69 deletions(-)
> > 
> > diff --git a/libdtrace/dt_module.c b/libdtrace/dt_module.c
> > index 2e915e2f..e7553a07 100644
> > --- a/libdtrace/dt_module.c
> > +++ b/libdtrace/dt_module.c
> > @@ -22,6 +22,7 @@
> >   #include <port.h>
> >   #include <zlib.h>
> > +#include <tracefs.h>
> >   #include <dt_kernel_module.h>
> >   #include <dt_module.h>
> > @@ -1044,6 +1045,83 @@ dt_kern_module_find_ctf(dtrace_hdl_t *dtp, dt_module_t *dmp)
> >   	}
> >   }
> > +#define PROBE_LIST		TRACEFS "available_filter_functions"
> > +
> > +/*
> > + * Determine which kernel functions are traceable and mark them.
> > + */
> > +void
> > +dt_modsym_mark_traceable(dtrace_hdl_t *dtp)
> > +{
> > +	FILE			*f;
> > +	char			*buf = NULL;
> > +	size_t			len = 0;
> > +
> > +	if (dt_symtab_traceable(dtp->dt_exec->dm_kernsyms))
> > +		return;
> > +
> > +	f = fopen(PROBE_LIST, "r");
> > +	if (f == NULL)
> > +		return;
> > +
> > +	while (getline(&buf, &len, f) >= 0) {
> > +		char			*p;
> > +		dt_symbol_t		*sym = NULL;
> > +
> > +		/*
> > +		 * Here buf is either "funcname\n" or "funcname [modname]\n".
> > +		 * The last line may not have a linefeed.
> > +		 */
> > +		p = strchr(buf, '\n');
> > +		if (p) {
> > +			*p = '\0';
> > +			if (p > buf && *(--p) == ']')
> > +				*p = '\0';
> > +		}
> > +
> > +		/*
> > +		 * Now buf is either "funcname" or "funcname [modname".  If
> > +		 * there is no module name provided, we will use the default.
> > +		 */
> > +		p = strchr(buf, ' ');
> > +		if (p) {
> > +			*p++ = '\0';
> > +			if (*p == '[')
> > +				p++;
> > +		}
> > +
> > +#define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
> > +		/* Weed out __ftrace_invalid_address___* entries. */
> > +		if (strstarts(buf, "__ftrace_invalid_address__") ||
> > +		    strstarts(buf, "__probestub_") ||
> > +		    strstarts(buf, "__traceiter_"))
> > +			continue;
> > +#undef strstarts
> > +
> > +		/*
> > +		 * If we have a module name, look for the symbol in that
> > +		 * module.
> > +		 * If not, perform a general symbol lookup to find its first
> > +		 * instance.
> > +		 */
> > +		if (p) {
> > +			dt_module_t	*dmp = dt_module_lookup_by_name(dtp, p);
> > +
> > +			if (dmp)
> > +				sym = dt_module_symbol_by_name(dtp, dmp, buf);
> > +		} else
> > +			sym = dt_symbol_by_name(dtp, buf);
> > +
> > +		if (sym)
> > +			dt_symbol_set_traceable(sym);
> > +	}
> > +
> > +	free(buf);
> > +	fclose(f);
> > +
> > +	dt_symtab_set_traceable(dtp->dt_exec->dm_kernsyms);
> > +}
> > +
> >   /*
> >    * Symbol data can be collected in three ways:
> >    *  - kallmodsyms
> > diff --git a/libdtrace/dt_module.h b/libdtrace/dt_module.h
> > index 56df17a6..dd3ad17c 100644
> > --- a/libdtrace/dt_module.h
> > +++ b/libdtrace/dt_module.h
> > @@ -25,6 +25,8 @@ extern dt_ident_t *dt_module_extern(dtrace_hdl_t *, dt_module_t *,
> >   extern const char *dt_module_modelname(dt_module_t *);
> > +extern void dt_modsym_mark_traceable(dtrace_hdl_t *);
> > +
> >   #ifdef	__cplusplus
> >   }
> >   #endif
> > diff --git a/libdtrace/dt_prov_fbt.c b/libdtrace/dt_prov_fbt.c
> > index eef93879..d837e14d 100644
> > --- a/libdtrace/dt_prov_fbt.c
> > +++ b/libdtrace/dt_prov_fbt.c
> > @@ -41,10 +41,8 @@
> >   #include "dt_pt_regs.h"
> >   static const char		prvname[] = "fbt";
> > -static const char		modname[] = "vmlinux";
> >   #define KPROBE_EVENTS		TRACEFS "kprobe_events"
> > -#define PROBE_LIST		TRACEFS "available_filter_functions"
> >   #define FBT_GROUP_FMT		GROUP_FMT "_%s"
> >   #define FBT_GROUP_DATA		GROUP_DATA, prp->desc->prb
> > @@ -61,19 +59,11 @@ dt_provimpl_t			dt_fbt_fprobe;
> >   dt_provimpl_t			dt_fbt_kprobe;
> >   /*
> > - * Scan the PROBE_LIST file and add entry and return probes for every function
> > - * that is listed.
> > + * Create the fbt provider.
> >    */
> >   static int populate(dtrace_hdl_t *dtp)
> >   {
> >   	dt_provider_t		*prv;
> > -	FILE			*f;
> > -	char			*buf = NULL;
> > -	char			*p;
> > -	const char		*mod = modname;
> > -	size_t			n;
> > -	dtrace_syminfo_t	sip;
> > -	dtrace_probedesc_t	pd;
> >   	dt_fbt = BPF_HAS(dtp, BPF_FEAT_FENTRY) ? dt_fbt_fprobe : dt_fbt_kprobe;
> > @@ -81,79 +71,166 @@ static int populate(dtrace_hdl_t *dtp)
> >   	if (prv == NULL)
> >   		return -1;			/* errno already set */
> > -	f = fopen(PROBE_LIST, "r");
> > -	if (f == NULL)
> > +	return 0;
> > +}
> > +
> > +/* Create a probe (if it does not exist yet). */
> > +static int provide_probe(dtrace_hdl_t *dtp, const dtrace_probedesc_t *pdp)
> > +{
> > +	dt_provider_t	*prv = dt_provider_lookup(dtp, pdp->prv);
> > +
> > +	if (prv == NULL)
> > +		return 0;
> > +	if (dt_probe_lookup(dtp, pdp) != NULL)
> >   		return 0;
> > +	if (dt_tp_probe_insert(dtp, prv, pdp->prv, pdp->mod, pdp->fun, pdp->prb))
> > +		return 1;
> > -	while (getline(&buf, &n, f) >= 0) {
> > -		/*
> > -		 * Here buf is either "funcname\n" or "funcname [modname]\n".
> > -		 * The last line may not have a linefeed.
> > -		 */
> > -		p = strchr(buf, '\n');
> > -		if (p) {
> > -			*p = '\0';
> > -			if (p > buf && *(--p) == ']')
> > -				*p = '\0';
> > +	return 0;
> > +}
> > +
> > +/*
> > + * Try to provide probes for the given probe description.  The caller ensures
> > + * that the provider name in probe desxcription (if any) is a match for this
> > + * provider.  When this is called, we already know that this provider matches
> > + * the provider component of the probe specification.
> > + */
> > +#define FBT_ENTRY	1
> > +#define FBT_RETURN	2
> > +
> > +static int provide(dtrace_hdl_t *dtp, const dtrace_probedesc_t *pdp)
> > +{
> > +	int			n = 0;
> > +	int			prb = 0;
> > +	dt_module_t		*dmp = NULL;
> > +	dt_symbol_t		*sym = NULL;
> > +	dt_htab_next_t		*it = NULL;
> > +	dtrace_probedesc_t	pd;
> > +
> > +	dt_modsym_mark_traceable(dtp);
> > +
> > +	/*
> > +	 * Nothing to do if a probe name is specified and cannot match 'entry'
> > +	 * or 'return'.
> > +	 */
> > +	if (dt_gmatch("entry", pdp->prb))
> > +		prb |= FBT_ENTRY;
> > +	if (dt_gmatch("return", pdp->prb))
> > +		prb |= FBT_RETURN;
> > +	if (prb == 0)
> > +		return 0;
> > +
> > +	/* Synthetic function names are not supported for FBT. */
> > +	if (strchr(pdp->fun, '.'))
> > +		return 0;
> > +
> > +	/*
> > +	 * If we have an explicit module name, check it.  If not found, we can
> > +	 * ignore this request.
> > +	 */
> > +	if (pdp->mod[0] != '\0' && strchr(pdp->mod, '*') == NULL) {
> > +		dmp = dt_module_lookup_by_name(dtp, pdp->mod);
> > +		if (dmp == NULL)
> > +			return 0;
> > +	}
> > +
> > +	/*
> > +	 * If we have an explicit function name, we start with a basic symbol
> > +	 * name lookup.
> > +	 */
> > +	if (pdp->fun[0] != '\0' && strchr(pdp->fun, '*') == NULL) {
> > +		/* If we have a module, use it. */
> > +		if (dmp != NULL) {
> > +			sym = dt_module_symbol_by_name(dtp, dmp, pdp->fun);
> > +			if (sym == NULL)
> > +				return 0;
> > +			if (!dt_symbol_traceable(sym))
> > +				return 0;
> > +
> > +			pd.id = DTRACE_IDNONE;
> > +			pd.prv = pdp->prv;
> > +			pd.mod = dmp->dm_name;
> > +			pd.fun = pdp->fun;
> > +
> > +			if (prb & FBT_ENTRY) {
> > +				pd.prb = "entry";
> > +				n += provide_probe(dtp, &pd);
> > +			}
> > +			if (prb & FBT_RETURN) {
> > +				pd.prb = "return";
> > +				n += provide_probe(dtp, &pd);
> > +			}
> > +
> > +			return n;
> >   		}
> > -		/*
> > -		 * Now buf is either "funcname" or "funcname [modname".  If
> > -		 * there is no module name provided, we will use the default.
> > -		 */
> > -		p = strchr(buf, ' ');
> > -		if (p) {
> > -			*p++ = '\0';
> > -			if (*p == '[')
> > -				p++;
> > +		sym = dt_symbol_by_name(dtp, pdp->fun);
> > +		while (sym != NULL) {
> > +			const char	*mod = dt_symbol_module(sym)->dm_name;
> > +
> > +			if (dt_symbol_traceable(sym) &&
> > +			    dt_gmatch(mod, pdp->mod)) {
> > +				pd.id = DTRACE_IDNONE;
> > +				pd.prv = pdp->prv;
> > +				pd.mod = mod;
> > +				pd.fun = pdp->fun;
> > +
> > +				if (prb & FBT_ENTRY) {
> > +					pd.prb = "entry";
> > +					n += provide_probe(dtp, &pd);
> > +				}
> > +				if (prb & FBT_RETURN) {
> > +					pd.prb = "return";
> > +					n += provide_probe(dtp, &pd);
> > +				}
> > +
> > +			}
> > +			sym = dt_symbol_by_name_next(sym);
> >   		}
> > -		/* Weed out synthetic symbol names (that are invalid). */
> > -		if (strchr(buf, '.') != NULL)
> > +		return n;
> > +	}
> > +
> > +	/*
> > +	 * No explicit function name.  We need to go through all possible
> > +	 * symbol names and see if they match.
> > +	 */
> > +	while ((sym = dt_htab_next(dtp->dt_kernsyms, &it)) != NULL) {
> > +		dt_module_t	*smp;
> > +		const char	*fun;
> > +
> > +		/* Ensure the symbol can be traced. */
> > +		if (!dt_symbol_traceable(sym))
> >   			continue;
> > -#define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
> > -		/* Weed out __ftrace_invalid_address___* entries. */
> > -		if (strstarts(buf, "__ftrace_invalid_address__") ||
> > -		    strstarts(buf, "__probestub_") ||
> > -		    strstarts(buf, "__traceiter_"))
> > +		/* Match the function name. */
> > +		fun = dt_symbol_name(sym);
> > +		if (!dt_gmatch(fun, pdp->fun))
> >   			continue;
> > -#undef strstarts
> > -		/*
> > -		 * If we did not see a module name, perform a symbol lookup to
> > -		 * try to determine the module name.
> > -		 */
> > -		if (!p) {
> > -			if (dtrace_lookup_by_name(dtp, DTRACE_OBJ_KMODS, buf,
> > -						  NULL, &sip) == 0)
> > -				mod = sip.object;
> > -		} else
> > -			mod = p;
> > +		/* Validate the module name. */
> > +		smp = dt_symbol_module(sym);
> > +		if (dmp) {
> > +			if (smp != dmp)
> > +				continue;
> > +		} else if (!dt_gmatch(smp->dm_name, pdp->mod))
> > +			continue;
> > -		/*
> > -		 * Due to the lack of module names in
> > -		 * TRACEFS/available_filter_functions, there are some duplicate
> > -		 * function names.  We need to make sure that we do not create
> > -		 * duplicate probes for these.
> > -		 */
> >   		pd.id = DTRACE_IDNONE;
> > -		pd.prv = prvname;
> > -		pd.mod = mod;
> > -		pd.fun = buf;
> > -		pd.prb = "entry";
> > -		if (dt_probe_lookup(dtp, &pd) != NULL)
> > -			continue;
> > +		pd.prv = pdp->prv;
> > +		pd.mod = smp->dm_name;
> > +		pd.fun = fun;
> > -		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "entry"))
> > -			n++;
> > -		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "return"))
> > -			n++;
> > +		if (prb & FBT_ENTRY) {
> > +			pd.prb = "entry";
> > +			n += provide_probe(dtp, &pd);
> > +		}
> > +		if (prb & FBT_RETURN) {
> > +			pd.prb = "return";
> > +			n += provide_probe(dtp, &pd);
> > +		}
> >   	}
> > -	free(buf);
> > -	fclose(f);
> > -
> >   	return n;
> >   }
> > @@ -447,6 +524,7 @@ dt_provimpl_t	dt_fbt_fprobe = {
> >   	.prog_type	= BPF_PROG_TYPE_TRACING,
> >   	.stack_skip	= 4,
> >   	.populate	= &populate,
> > +	.provide	= &provide,
> >   	.load_prog	= &fprobe_prog_load,
> >   	.trampoline	= &fprobe_trampoline,
> >   	.attach		= &dt_tp_probe_attach_raw,
> > @@ -459,6 +537,7 @@ dt_provimpl_t	dt_fbt_kprobe = {
> >   	.name		= prvname,
> >   	.prog_type	= BPF_PROG_TYPE_KPROBE,
> >   	.populate	= &populate,
> > +	.provide	= &provide,
> >   	.load_prog	= &dt_bpf_prog_load,
> >   	.trampoline	= &kprobe_trampoline,
> >   	.attach		= &kprobe_attach,



More information about the DTrace-devel mailing list