[DTrace-devel] [PATCH v2 1/2] Omit an aggregation record if [u][sym|mod] translation fails

Eugene Loh eugene.loh at oracle.com
Thu Oct 30 00:16:12 UTC 2025


Thanks for the review.  Yes, ignoring untranslated addresses that, 
nonetheless, are interesting is a problem.  Since related patches have 
relieved the need for this solution, I'm withdrawing this patch.

On 8/12/25 10:06, Kris Van Hees wrote:
> On Tue, Jun 03, 2025 at 05:08:34PM -0400, eugene.loh at oracle.com wrote:
>> From: Eugene Loh <eugene.loh at oracle.com>
>>
>> An aggregation key can be a sym(), mod(), usym(), or umod()
>> translation of an address.  It is passed from producer to user
>> space as an address, and the consumer must translate the address.
>> It is possible for the translation to fail.
>>
>> Omit a record if the translation fails.  This addresses failures
>> seen before OL9 in
>>      test/unittest/profile-n/tst.ufunc.sh
>>      test/unittest/profile-n/tst.usym.sh
>>
>> The problem was that the kernel's aggregation buffers are snapshot
>> multiple times.  If a translation ever fails, the raw address is
>> used instead.  Later on, when the aggregation is printed, if the
>> translation is successful, the raw key will report a count of 0.
>>
>> E.g., we snapshot an aggregation.  The translation of address
>> 0xabcedf fails and so the key remains 0xabcdef.  Later the
>> aggregation is printed and, therefore, again snapshot.  The
>> values are cleared.  This time the translation of the address
>> -- say to the function starting at 0xabcd00 -- is successful.
>> That aggregation key is successfully and correctly reported, but
>> the the raw 0xabcdef will mysteriously be reported with a count 0.
> More in detail: whenever a snapshot of the aggregation data is taken, its
> key/value pairs are stored in a hashtable.  For keys that contain address
> resolution elements like usym(), ufunc(), ... the address in the key is
> replaced with the canonical address for the resolving function (start
> address of a function for ufunc(), ...).
>
> Therefore, if the resolving does not work (no symbol, module, ... found),
> the key with the reported address is used, and a hash entry is added for
> it.
>
> If, at a later snapshot, the address *can* be resolved, the ley gets
> modified with the canonical address in place of the reported address, and
> the data gets hashed using that key.
>
> Before every snapshot is taken, all aggregation data is cleared -- but the
> hash table entries remain -- it is just their data that is cleared.  So,
> the later snapshot that managed to get the address resolved will cause the
> hash entry with the unresolved address to never get data populated, and that
> is why is remains 0 from that point forward.
>
>> It may be argued that some users would like to see addresses that
>> could not be translated, but typically those unsuccessful addresses
>> are not very meaningful.
> The problem with this approach is that you drop data.  If e.g. you were to
> collect this aggregation for a task that does not have symbolic address
> data, all addresses would remain unresolved, and you would be ignoring them
> all.  That is certainly not an acceptable approach.
>
> With the on-demand snapshot code, I expect that this problem is less likely
> to occur.  But it is certainly still a possibility - incidentally, this has
> been a problem with the DTrace implementation from the Solaris days.  It has
> the same potential problem.
>
> Two approaches come to mind:
> (1) account for aggregation values to be 0 (or MIN or MAX for their respective
>      agg funcs), and omit those entries when the aggregation data is printed
> (2) detect this situation (hash entry exists for both unresolved and resolved
>      key), and if so, remove the unresolved hash entry
>
> Both have some performance impact and/or make for more complex code.  In the
> end, perhaps we could choose not to do anything about this and allow it to be
> an artifact of address resolution issues on the system?  If anything, they
> should be easy to spot because they are always going to be entries that have
> (essentially) no value.
>
>> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
>> ---
>>   libdtrace/dt_aggregate.c | 51 ++++++++++++++++++++++++++++------------
>>   1 file changed, 36 insertions(+), 15 deletions(-)
>>
>> diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
>> index a18de3a75..32b0faced 100644
>> --- a/libdtrace/dt_aggregate.c
>> +++ b/libdtrace/dt_aggregate.c
>> @@ -317,61 +317,76 @@ dt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs)
>>   	return 0;
>>   }
>>   
>> -static void
>> +static int
>>   dt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data)
>>   {
>>   	uint64_t tgid = data[0];
>>   	uint64_t *pc = &data[1];
>>   	pid_t pid;
>>   	GElf_Sym sym;
>> +	int rc = 0;
>>   
>>   	if (dtp->dt_vector != NULL)
>> -		return;
>> +		return -1;
>>   
>>   	pid = dt_proc_grab_lock(dtp, tgid, DTRACE_PROC_WAITING |
>>   	    DTRACE_PROC_SHORTLIVED);
>>   	if (pid < 0)
>> -		return;
>> +		return -1;
>>   
>>   	if (dt_Plookup_by_addr(dtp, pid, *pc, NULL, &sym) == 0)
>>   		*pc = sym.st_value;
>> +	else
>> +		rc = -1;
>>   
>>   	dt_proc_release_unlock(dtp, pid);
>> +
>> +	return rc;
>>   }
>>   
>> -static void
>> +static int
>>   dt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data)
>>   {
>>   	uint64_t tgid = data[0];
>>   	uint64_t *pc = &data[1];
>>   	pid_t pid;
>>   	const prmap_t *map;
>> +	int rc = 0;
>>   
>>   	if (dtp->dt_vector != NULL)
>> -		return;
>> +		return -1;
>>   
>>   	pid = dt_proc_grab_lock(dtp, tgid, DTRACE_PROC_WAITING |
>>   	    DTRACE_PROC_SHORTLIVED);
>>   	if (pid < 0)
>> -		return;
>> +		return -1;
>>   
>>   	if ((map = dt_Paddr_to_map(dtp, pid, *pc)) != NULL)
>>   		*pc = map->pr_vaddr;
>> +	else
>> +		rc = -1;
>>   
>>   	dt_proc_release_unlock(dtp, pid);
>> +
>> +	return rc;
>>   }
>>   
>> -static void
>> +static int
>>   dt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data)
>>   {
>>   	GElf_Sym sym;
>>   	uint64_t *pc = data;
>> +	int rc = 0;
>>   
>>   	if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0)
>>   		*pc = sym.st_value;
>> +	else
>> +		rc = -1;
>> +
>> +	return rc;
>>   }
>>   
>> -static void
>> +static int
>>   dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr)
>>   {
>>   	dt_module_t *dmp;
>> @@ -385,7 +400,7 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr)
>>   		 * appear more than once in aggregation output).  It seems
>>   		 * unlikely that anyone will ever notice or care...
>>   		 */
>> -		return;
>> +		return -1;
>>   	}
>>   
>>   	for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL;
>> @@ -400,7 +415,7 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr)
>>   			    dtrace_addr_range_cmp) != NULL) {
>>   
>>   			*addr = dmp->dm_text_addrs[0].dar_va;
>> -			return;
>> +			return 0;
>>   		}
>>   
>>   		if (dmp->dm_data_addrs != NULL &&
>> @@ -413,9 +428,11 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr)
>>   			else
>>   				*addr = dmp->dm_data_addrs[0].dar_va;
>>   
>> -			return;
>> +			return 0;
>>   		}
>>   	}
>> +
>> +	return -1;
>>   }
>>   
>>   static dtrace_aggid_t
>> @@ -574,16 +591,20 @@ dt_aggregate_snap_one(dtrace_hdl_t *dtp, int aggid, int cpu, const char *key,
>>   
>>   		switch(agg->dtagd_krecs[i].dtrd_action) {
>>   		case DTRACEACT_USYM:
>> -			dt_aggregate_usym(dtp, p);
>> +			if (dt_aggregate_usym(dtp, p) == -1)
>> +				return 0;
>>   			break;
>>   		case DTRACEACT_UMOD:
>> -			dt_aggregate_umod(dtp, p);
>> +			if (dt_aggregate_umod(dtp, p) == -1)
>> +				return 0;
>>   			break;
>>   		case DTRACEACT_SYM:
>> -			dt_aggregate_sym(dtp, p);
>> +			if (dt_aggregate_sym(dtp, p) == -1)
>> +				return 0;
>>   			break;
>>   		case DTRACEACT_MOD:
>> -			dt_aggregate_mod(dtp, p);
>> +			if (dt_aggregate_mod(dtp, p) == -1)
>> +				return 0;
>>   			break;
>>   		default:
>>   			break;
>> -- 
>> 2.43.5
>>



More information about the DTrace-devel mailing list