[DTrace-devel] [PATCH v2] Snapshot aggregations just in time
Eugene Loh
eugene.loh at oracle.com
Mon Aug 11 19:30:46 UTC 2025
Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
On 8/11/25 12:35, Kris Van Hees via DTrace-devel wrote:
> Author: Eugene Loh <eugene.loh at oracle.com>
>
> Currently, dtrace periodically calls dtrace_work(), which in turn calls
> dtrace_consume(), which among other things calls dtrace_aggregate_snap().
> But aggregations are kept in entirety in the kernel's BPF maps. There
> is no need to snapshot the aggregations into user space unless we're
> actually going to do something with aggregations.
>
> Snapshot aggregations just in time -- that is, if there is a clear(),
> trunc(), or printa() or if aggregations are to be printed at the end
> of a dtrace session.
>
> Skip the aggrate-slow test. Just-in-time snapshots mean the semantics
> of aggrate have changed. A fast aggrate means nothing. A slow aggrate
> means we are supposed to use stale aggregation data, which would be
> baffling. A future patch is advised to deprecate aggrate entirely.
>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> Changes since v1:
> - Changed from using dt_haveagg to using dtat_flags in dt_aggregate
> ---
> libdtrace/dt_aggregate.c | 13 +++++++++++++
> libdtrace/dt_aggregate.h | 1 +
> libdtrace/dt_consume.c | 9 +++++++--
> libdtrace/dtrace.h | 1 +
> test/unittest/options/tst.aggrate-slow.d | 1 +
> 5 files changed, 23 insertions(+), 2 deletions(-)
>
> diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
> index f9e364435..1fc8294de 100644
> --- a/libdtrace/dt_aggregate.c
> +++ b/libdtrace/dt_aggregate.c
> @@ -73,6 +73,12 @@ dt_aggregate_set_option(dtrace_hdl_t *dtp, uintptr_t opt)
> dtp->dt_aggregate->dtat_flags |= opt;
> }
>
> +void
> +dt_aggregate_clear_option(dtrace_hdl_t *dtp, uintptr_t opt)
> +{
> + dtp->dt_aggregate->dtat_flags &= ~opt;
> +}
> +
> static int
> dt_aggregate_countcmp(int64_t *lhs, int64_t *rhs)
> {
> @@ -764,6 +770,10 @@ dtrace_aggregate_snap(dtrace_hdl_t *dtp)
> dtp->dt_lastagg = now;
> }
>
> + if (agp->dtat_flags & DTRACE_A_VALID)
> + return DTRACE_WORKSTATUS_OKAY;
> + agp->dtat_flags |= DTRACE_A_VALID;
> +
> dtrace_aggregate_clear(dtp);
>
> for (i = 0; i < dtp->dt_conf.num_online_cpus; i++) {
> @@ -1848,6 +1858,9 @@ dtrace_aggregate_print(dtrace_hdl_t *dtp, FILE *fp,
> {
> dtrace_print_aggdata_t pd;
>
> + dtp->dt_aggregate->dtat_flags &= ~DTRACE_A_VALID;
> + dtrace_aggregate_snap(dtp);
> +
> if (dtp->dt_maxaggdsize == 0)
> return 0;
>
> diff --git a/libdtrace/dt_aggregate.h b/libdtrace/dt_aggregate.h
> index 0dc126e9e..af2b7f9cc 100644
> --- a/libdtrace/dt_aggregate.h
> +++ b/libdtrace/dt_aggregate.h
> @@ -46,6 +46,7 @@ typedef enum dt_aggfid {
>
> extern int dt_aggregate_init(dtrace_hdl_t *);
> extern void dt_aggregate_set_option(dtrace_hdl_t *, uintptr_t);
> +extern void dt_aggregate_clear_option(dtrace_hdl_t *, uintptr_t);
> extern int dt_aggregate_go(dtrace_hdl_t *);
> extern int dt_aggregate_clear_one(const dtrace_aggdata_t *, void *);
> extern void dt_aggregate_destroy(dtrace_hdl_t *);
> diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
> index b0bf73d10..edfe5bbae 100644
> --- a/libdtrace/dt_consume.c
> +++ b/libdtrace/dt_consume.c
> @@ -2352,11 +2352,15 @@ dt_consume_one_probe(dtrace_hdl_t *dtp, FILE *fp, char *data, uint32_t size,
> i++;
> continue;
> case DT_ACT_CLEAR:
> + if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
> + return DTRACE_WORKSTATUS_ERROR;
> if (dt_clear(dtp, data, rec) != 0)
> return DTRACE_WORKSTATUS_ERROR;
>
> continue;
> case DT_ACT_TRUNC:
> + if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
> + return DTRACE_WORKSTATUS_ERROR;
> if (i == epd->dtdd_nrecs - 1)
> return dt_set_errno(dtp, EDT_BADTRUNC);
>
> @@ -2518,6 +2522,8 @@ dt_consume_one_probe(dtrace_hdl_t *dtp, FILE *fp, char *data, uint32_t size,
> func = dtrace_fprintf;
> break;
> case DTRACEACT_PRINTA:
> + if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
> + return DTRACE_WORKSTATUS_ERROR;
> if (rec->dtrd_format != NULL)
> func = dtrace_fprinta;
> else
> @@ -3095,8 +3101,7 @@ dtrace_consume(dtrace_hdl_t *dtp, FILE *fp, dtrace_consume_probe_f *pf,
> }
> }
>
> - if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
> - return DTRACE_WORKSTATUS_ERROR;
> + dt_aggregate_clear_option(dtp, DTRACE_A_VALID);
>
> /*
> * If dtp->dt_beganon is not -1, we did not process the BEGIN probe
> diff --git a/libdtrace/dtrace.h b/libdtrace/dtrace.h
> index c85f3d810..ef8f730ab 100644
> --- a/libdtrace/dtrace.h
> +++ b/libdtrace/dtrace.h
> @@ -365,6 +365,7 @@ extern int dtrace_handle_setopt(dtrace_hdl_t *dtp,
> #define DTRACE_A_PERCPU 0x0001
> #define DTRACE_A_KEEPDELTA 0x0002
> #define DTRACE_A_ANONYMOUS 0x0004
> +#define DTRACE_A_VALID 0x0008
>
> #define DTRACE_AGGWALK_ERROR -1 /* error while processing */
> #define DTRACE_AGGWALK_NEXT 0 /* proceed to next element */
> diff --git a/test/unittest/options/tst.aggrate-slow.d b/test/unittest/options/tst.aggrate-slow.d
> index e2a0f2cb2..cd91c0a6c 100644
> --- a/test/unittest/options/tst.aggrate-slow.d
> +++ b/test/unittest/options/tst.aggrate-slow.d
> @@ -9,6 +9,7 @@
> * When the aggrate is slower than the switchrate and the pace of printa()
> * actions, multiple printa() should all reflect the same stale count.
> */
> +/* @@skip: aggrate makes no sense */
> /* @@trigger: periodic_output */
> /* @@nosort */
>
More information about the DTrace-devel
mailing list