[DTrace-devel] [PATCH v3 dtrace 2/4] fbt: support "."-suffixed functions for kprobes

Alan Maguire alan.maguire at oracle.com
Thu Nov 28 09:58:38 UTC 2024


On 28/11/2024 02:22, Kris Van Hees wrote:
> I am still workimg through these patches (and writing some additional
> support patches).  But one annoying detail has come up...  kprobes do
> not provide argument type data whereas we do have that for fprobes.
> So, if there is a function (like find_bio_block) that can be probed
> with fprobe and there is also a find_bio_block.isra.0 optimized form
> that can be probed with probe, they *cannot* be represented by the
> same DTrace probe because one has known argument data and the other
> does not.  And it does not seem safe to assume that the optimized
> form will have the same signature as the regular function.
> 
> Even using the dependent probe mechanism won't help here because that
> is based on being able to convert the arguments of the OS level probe
> intp arguments for the dependent DTrace probe.  But in this case the
> OS level probe (kprobe on find_bio_block.isra.0) has no defined
> arguments (i.e. they are unknown - lack of prototype data) yet the
> dependent DTrace probe expects to be able to populate arguments.
> 
> I think that the only way to really get around this (at least until
> prototype information is made available for the optimized functions,
> if ever) is to have two distinct probes, and requiring the user to
> specify them explicitly, e.g.
> 
> 	fbt::find_bio_block:entry, fbt::find_bio_block.*:entry
> 
> Another option would be to introduce a fbt-opt provider so you can
> write:
> 
> 	fbt::find_bio_block:entry, fbt-opt::find_bio_block:entry
> 
> or even (although it might conflict with USDT probes):
> 
> 	fbt*::find_bio_block:entry
> 
> and the fbt-opt provider can place kprobes on any find_bio_block.*
> symbols and they all would get reported as the same
> fbt-opt::find_bio_block:entry probe firing.
> 
> Thoughts?
>

I'm sort of wondering if we go even further and introduce a separate
kprobe provider. As well as not providing argument types, kprobes also
have a feature fentry does not - we can probe at function+offset. So
conceptually they are a separate thing already really, and though we
fall back to kprobes to provide fbt support when fprobe isn't available,
it might be worth having an explicitly kprobe-based provider since we
can offer different functionality with it. If we had a separate provider
(which still hid the "."-suffixes as these are unstable) we could
support the sched tracing case stably (using
kprobe::finish_task_switch:return) while not getting in the way of
fprobe-based fbt, _and_ covering tracing for those cases that
fprobe-based fbt cannot reach.

With this approach nothing needs to change from the fentry-based fprobe
side; no program-dependent logic handling cases where an unsupported by
fprobe fbt probe is used etc.

> 
> On Wed, Oct 16, 2024 at 04:54:07PM +0100, Alan Maguire wrote:
>> gcc adds suffixes when it carries out optimizations, but often
>> these leave parameters to functions intact.  Many of these functions
>> (like finish_task_switch()) are important for tracing (and they
>> are present in available_filter_functions so are traceable) so it is
>> valuable to support probing them.  For kprobes, the probe name removes
>> the "." suffix, so store it as additional tp event data for kprobe
>> attach (attach time is the only time we need the full function name).
>>
>> fprobe does not support such functions, so the kprobe impl is always
>> used in such cases.
>>
>> Signed-off-by: Alan Maguire <alan.maguire at oracle.com>
>> ---
>>  libdtrace/dt_prov_fbt.c | 76 ++++++++++++++++++++++++++++++++---------
>>  1 file changed, 59 insertions(+), 17 deletions(-)
>>
>> diff --git a/libdtrace/dt_prov_fbt.c b/libdtrace/dt_prov_fbt.c
>> index 21f63ddf..06ea78ca 100644
>> --- a/libdtrace/dt_prov_fbt.c
>> +++ b/libdtrace/dt_prov_fbt.c
>> @@ -21,6 +21,7 @@
>>  #include <assert.h>
>>  #include <errno.h>
>>  #include <fcntl.h>
>> +#include <port.h>
>>  #include <stdio.h>
>>  #include <stdlib.h>
>>  #include <string.h>
>> @@ -63,8 +64,8 @@ static const dtrace_pattr_t	pattr = {
>>   */
>>  static int populate(dtrace_hdl_t *dtp)
>>  {
>> -	dt_provider_t		*prv;
>> -	dt_provimpl_t		*impl;
>> +	dt_provider_t		*default_prv, *kprobe_prv = NULL;
>> +	dt_provimpl_t		*default_impl;
>>  	FILE			*f;
>>  	char			*buf = NULL;
>>  	char			*p;
>> @@ -73,17 +74,25 @@ static int populate(dtrace_hdl_t *dtp)
>>  	dtrace_syminfo_t	sip;
>>  	dtrace_probedesc_t	pd;
>>  
>> -	impl = BPF_HAS(dtp, BPF_FEAT_FENTRY) ? &dt_fbt_fprobe : &dt_fbt_kprobe;
>> +	default_impl = BPF_HAS(dtp, BPF_FEAT_FENTRY) ? &dt_fbt_fprobe : &dt_fbt_kprobe;
>>  
>> -	prv = dt_provider_create(dtp, prvname, impl, &pattr, NULL);
>> -	if (prv == NULL)
>> +	default_prv = dt_provider_create(dtp, prvname, default_impl, &pattr, NULL);
>> +	if (default_prv == NULL)
>>  		return -1;			/* errno already set */
>> +	if (default_impl == &dt_fbt_kprobe)
>> +		kprobe_prv = default_prv;
>>  
>>  	f = fopen(PROBE_LIST, "r");
>>  	if (f == NULL)
>>  		return 0;
>>  
>>  	while (getline(&buf, &n, f) >= 0) {
>> +		dt_provimpl_t	*impl = default_impl;
>> +		dt_provider_t	*prv = default_prv;
>> +		char		fun[DTRACE_FUNCNAMELEN] = {};
>> +		void		*entry_data = NULL;
>> +		void		*return_data = NULL;
>> +
>>  		/*
>>  		 * Here buf is either "funcname\n" or "funcname [modname]\n".
>>  		 * The last line may not have a linefeed.
>> @@ -106,10 +115,6 @@ static int populate(dtrace_hdl_t *dtp)
>>  				p++;
>>  		}
>>  
>> -		/* Weed out synthetic symbol names (that are invalid). */
>> -		if (strchr(buf, '.') != NULL)
>> -			continue;
>> -
>>  #define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
>>  		/* Weed out __ftrace_invalid_address___* entries. */
>>  		if (strstarts(buf, "__ftrace_invalid_address__") ||
>> @@ -129,6 +134,31 @@ static int populate(dtrace_hdl_t *dtp)
>>  		} else
>>  			mod = p;
>>  
>> +		/* '.'-suffixed functions (e.g. foo.isra.0) must always be
>> +		 * kprobe-backed as fprobe does not support them.  They
>> +		 * are represented with their unsuffixed names however, so
>> +		 * the full name including suffix is stored as data associated
>> +		 * with the tp event.
>> +		 */
>> +		strlcpy(fun, buf, sizeof(fun));
>> +		if (strchr(buf, '.') != NULL) {
>> +			char *suffix;
>> +
>> +			impl = &dt_fbt_kprobe;
>> +			if (!kprobe_prv) {
>> +				kprobe_prv = dt_provider_create(dtp, prvname,
>> +								impl, &pattr,
>> +								NULL);
>> +				if (kprobe_prv == NULL)
>> +					return -1;
>> +			}
>> +			prv = kprobe_prv;
>> +			suffix = strchr(fun, '.');
>> +			*suffix = '\0';
>> +			entry_data = strdup(buf);
>> +			return_data = strdup(buf);
>> +		}
>> +
>>  		/*
>>  		 * Due to the lack of module names in
>>  		 * TRACEFS/available_filter_functions, there are some duplicate
>> @@ -138,14 +168,16 @@ static int populate(dtrace_hdl_t *dtp)
>>  		pd.id = DTRACE_IDNONE;
>>  		pd.prv = prvname;
>>  		pd.mod = mod;
>> -		pd.fun = buf;
>> +		pd.fun = fun;
>>  		pd.prb = "entry";
>>  		if (dt_probe_lookup(dtp, &pd) != NULL)
>>  			continue;
>>  
>> -		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "entry"))
>> +		if (dt_tp_probe_insert_data(dtp, prv, prvname, mod, fun,
>> +					    "entry", entry_data))
>>  			n++;
>> -		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "return"))
>> +		if (dt_tp_probe_insert_data(dtp, prv, prvname, mod, fun,
>> +					    "return", return_data))
>>  			n++;
>>  	}
>>  
>> @@ -363,10 +395,11 @@ static int kprobe_trampoline(dt_pcb_t *pcb, uint_t exitlbl)
>>  static int kprobe_attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
>>  {
>>  	if (!dt_tp_probe_has_info(prp)) {
>> -		char	*fn;
>> -		FILE	*f;
>> -		size_t	len;
>> -		int	fd, rc = -1;
>> +		char		*fn;
>> +		const char	*fun;
>> +		FILE		*f;
>> +		size_t		len;
>> +		int		fd, rc = -1;
>>  
>>  		/*
>>  		 * Register the kprobe with the tracing subsystem.  This will
>> @@ -376,9 +409,18 @@ static int kprobe_attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
>>  		if (fd == -1)
>>  			return -ENOENT;
>>  
>> +		/* If actual function name contained a '.', it is stored in
>> +		 * tp data; use non-suffix name for event as event names
>> +		 * cannot contain a '.'.  User-visible probe name does
>> +		 * not contain a '.'.
>> +		 */
>> +		fun = dt_tp_get_event_data(prp);
>> +		if (!fun)
>> +			fun = prp->desc->fun;
>> +
>>  		rc = dprintf(fd, "%c:" FBT_GROUP_FMT "/%s %s\n",
>>  			     prp->desc->prb[0] == 'e' ? 'p' : 'r',
>> -			     FBT_GROUP_DATA, prp->desc->fun, prp->desc->fun);
>> +			     FBT_GROUP_DATA, prp->desc->fun, fun);
>>  		close(fd);
>>  		if (rc == -1)
>>  			return -ENOENT;
>> -- 
>> 2.43.5
>>




More information about the DTrace-devel mailing list