[DTrace-devel] [PATCH v2] Snapshot aggregations just in time

Eugene Loh eugene.loh at oracle.com
Mon Aug 11 19:30:46 UTC 2025


Reviewed-by: Eugene Loh <eugene.loh at oracle.com>

On 8/11/25 12:35, Kris Van Hees via DTrace-devel wrote:
> Author: Eugene Loh <eugene.loh at oracle.com>
>
> Currently, dtrace periodically calls dtrace_work(), which in turn calls
> dtrace_consume(), which among other things calls dtrace_aggregate_snap().
> But aggregations are kept in entirety in the kernel's BPF maps.  There
> is no need to snapshot the aggregations into user space unless we're
> actually going to do something with aggregations.
>
> Snapshot aggregations just in time -- that is, if there is a clear(),
> trunc(), or printa() or if aggregations are to be printed at the end
> of a dtrace session.
>
> Skip the aggrate-slow test.  Just-in-time snapshots mean the semantics
> of aggrate have changed.  A fast aggrate means nothing.  A slow aggrate
> means we are supposed to use stale aggregation data, which would be
> baffling.  A future patch is advised to deprecate aggrate entirely.
>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> Changes since v1:
> 	- Changed from using dt_haveagg to using dtat_flags in dt_aggregate
> ---
>   libdtrace/dt_aggregate.c                 | 13 +++++++++++++
>   libdtrace/dt_aggregate.h                 |  1 +
>   libdtrace/dt_consume.c                   |  9 +++++++--
>   libdtrace/dtrace.h                       |  1 +
>   test/unittest/options/tst.aggrate-slow.d |  1 +
>   5 files changed, 23 insertions(+), 2 deletions(-)
>
> diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
> index f9e364435..1fc8294de 100644
> --- a/libdtrace/dt_aggregate.c
> +++ b/libdtrace/dt_aggregate.c
> @@ -73,6 +73,12 @@ dt_aggregate_set_option(dtrace_hdl_t *dtp, uintptr_t opt)
>   	dtp->dt_aggregate->dtat_flags |= opt;
>   }
>   
> +void
> +dt_aggregate_clear_option(dtrace_hdl_t *dtp, uintptr_t opt)
> +{
> +	dtp->dt_aggregate->dtat_flags &= ~opt;
> +}
> +
>   static int
>   dt_aggregate_countcmp(int64_t *lhs, int64_t *rhs)
>   {
> @@ -764,6 +770,10 @@ dtrace_aggregate_snap(dtrace_hdl_t *dtp)
>   			dtp->dt_lastagg = now;
>   	}
>   
> +	if (agp->dtat_flags & DTRACE_A_VALID)
> +		return DTRACE_WORKSTATUS_OKAY;
> +	agp->dtat_flags |= DTRACE_A_VALID;
> +
>   	dtrace_aggregate_clear(dtp);
>   
>   	for (i = 0; i < dtp->dt_conf.num_online_cpus; i++) {
> @@ -1848,6 +1858,9 @@ dtrace_aggregate_print(dtrace_hdl_t *dtp, FILE *fp,
>   {
>   	dtrace_print_aggdata_t pd;
>   
> +	dtp->dt_aggregate->dtat_flags &= ~DTRACE_A_VALID;
> +	dtrace_aggregate_snap(dtp);
> +
>   	if (dtp->dt_maxaggdsize == 0)
>   		return 0;
>   
> diff --git a/libdtrace/dt_aggregate.h b/libdtrace/dt_aggregate.h
> index 0dc126e9e..af2b7f9cc 100644
> --- a/libdtrace/dt_aggregate.h
> +++ b/libdtrace/dt_aggregate.h
> @@ -46,6 +46,7 @@ typedef enum dt_aggfid {
>   
>   extern int dt_aggregate_init(dtrace_hdl_t *);
>   extern void dt_aggregate_set_option(dtrace_hdl_t *, uintptr_t);
> +extern void dt_aggregate_clear_option(dtrace_hdl_t *, uintptr_t);
>   extern int dt_aggregate_go(dtrace_hdl_t *);
>   extern int dt_aggregate_clear_one(const dtrace_aggdata_t *, void *);
>   extern void dt_aggregate_destroy(dtrace_hdl_t *);
> diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
> index b0bf73d10..edfe5bbae 100644
> --- a/libdtrace/dt_consume.c
> +++ b/libdtrace/dt_consume.c
> @@ -2352,11 +2352,15 @@ dt_consume_one_probe(dtrace_hdl_t *dtp, FILE *fp, char *data, uint32_t size,
>   				i++;
>   				continue;
>   			case DT_ACT_CLEAR:
> +				if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
> +					return DTRACE_WORKSTATUS_ERROR;
>   				if (dt_clear(dtp, data, rec) != 0)
>   					return DTRACE_WORKSTATUS_ERROR;
>   
>   				continue;
>   			case DT_ACT_TRUNC:
> +				if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
> +					return DTRACE_WORKSTATUS_ERROR;
>   				if (i == epd->dtdd_nrecs - 1)
>   					return dt_set_errno(dtp, EDT_BADTRUNC);
>   
> @@ -2518,6 +2522,8 @@ dt_consume_one_probe(dtrace_hdl_t *dtp, FILE *fp, char *data, uint32_t size,
>   			func = dtrace_fprintf;
>   			break;
>   		case DTRACEACT_PRINTA:
> +			if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
> +				return DTRACE_WORKSTATUS_ERROR;
>   			if (rec->dtrd_format != NULL)
>   				func = dtrace_fprinta;
>   			else
> @@ -3095,8 +3101,7 @@ dtrace_consume(dtrace_hdl_t *dtp, FILE *fp, dtrace_consume_probe_f *pf,
>   		}
>   	}
>   
> -	if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
> -		return DTRACE_WORKSTATUS_ERROR;
> +	dt_aggregate_clear_option(dtp, DTRACE_A_VALID);
>   
>   	/*
>   	 * If dtp->dt_beganon is not -1, we did not process the BEGIN probe
> diff --git a/libdtrace/dtrace.h b/libdtrace/dtrace.h
> index c85f3d810..ef8f730ab 100644
> --- a/libdtrace/dtrace.h
> +++ b/libdtrace/dtrace.h
> @@ -365,6 +365,7 @@ extern int dtrace_handle_setopt(dtrace_hdl_t *dtp,
>   #define	DTRACE_A_PERCPU		0x0001
>   #define	DTRACE_A_KEEPDELTA	0x0002
>   #define	DTRACE_A_ANONYMOUS	0x0004
> +#define	DTRACE_A_VALID		0x0008
>   
>   #define	DTRACE_AGGWALK_ERROR		-1	/* error while processing */
>   #define	DTRACE_AGGWALK_NEXT		0	/* proceed to next element */
> diff --git a/test/unittest/options/tst.aggrate-slow.d b/test/unittest/options/tst.aggrate-slow.d
> index e2a0f2cb2..cd91c0a6c 100644
> --- a/test/unittest/options/tst.aggrate-slow.d
> +++ b/test/unittest/options/tst.aggrate-slow.d
> @@ -9,6 +9,7 @@
>    * When the aggrate is slower than the switchrate and the pace of printa()
>    * actions, multiple printa() should all reflect the same stale count.
>    */
> +/* @@skip: aggrate makes no sense */
>   /* @@trigger: periodic_output */
>   /* @@nosort */
>   



More information about the DTrace-devel mailing list