[DTrace-devel] [PATCH v2] Snapshot aggregations just in time

Kris Van Hees kris.van.hees at oracle.com
Mon Aug 11 16:35:45 UTC 2025


Author: Eugene Loh <eugene.loh at oracle.com>

Currently, dtrace periodically calls dtrace_work(), which in turn calls
dtrace_consume(), which among other things calls dtrace_aggregate_snap().
But aggregations are kept in entirety in the kernel's BPF maps.  There
is no need to snapshot the aggregations into user space unless we're
actually going to do something with aggregations.

Snapshot aggregations just in time -- that is, if there is a clear(),
trunc(), or printa() or if aggregations are to be printed at the end
of a dtrace session.

Skip the aggrate-slow test.  Just-in-time snapshots mean the semantics
of aggrate have changed.  A fast aggrate means nothing.  A slow aggrate
means we are supposed to use stale aggregation data, which would be
baffling.  A future patch is advised to deprecate aggrate entirely.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
---
Changes since v1:
	- Changed from using dt_haveagg to using dtat_flags in dt_aggregate
---
 libdtrace/dt_aggregate.c                 | 13 +++++++++++++
 libdtrace/dt_aggregate.h                 |  1 +
 libdtrace/dt_consume.c                   |  9 +++++++--
 libdtrace/dtrace.h                       |  1 +
 test/unittest/options/tst.aggrate-slow.d |  1 +
 5 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
index f9e364435..1fc8294de 100644
--- a/libdtrace/dt_aggregate.c
+++ b/libdtrace/dt_aggregate.c
@@ -73,6 +73,12 @@ dt_aggregate_set_option(dtrace_hdl_t *dtp, uintptr_t opt)
 	dtp->dt_aggregate->dtat_flags |= opt;
 }
 
+void
+dt_aggregate_clear_option(dtrace_hdl_t *dtp, uintptr_t opt)
+{
+	dtp->dt_aggregate->dtat_flags &= ~opt;
+}
+
 static int
 dt_aggregate_countcmp(int64_t *lhs, int64_t *rhs)
 {
@@ -764,6 +770,10 @@ dtrace_aggregate_snap(dtrace_hdl_t *dtp)
 			dtp->dt_lastagg = now;
 	}
 
+	if (agp->dtat_flags & DTRACE_A_VALID)
+		return DTRACE_WORKSTATUS_OKAY;
+	agp->dtat_flags |= DTRACE_A_VALID;
+
 	dtrace_aggregate_clear(dtp);
 
 	for (i = 0; i < dtp->dt_conf.num_online_cpus; i++) {
@@ -1848,6 +1858,9 @@ dtrace_aggregate_print(dtrace_hdl_t *dtp, FILE *fp,
 {
 	dtrace_print_aggdata_t pd;
 
+	dtp->dt_aggregate->dtat_flags &= ~DTRACE_A_VALID;
+	dtrace_aggregate_snap(dtp);
+
 	if (dtp->dt_maxaggdsize == 0)
 		return 0;
 
diff --git a/libdtrace/dt_aggregate.h b/libdtrace/dt_aggregate.h
index 0dc126e9e..af2b7f9cc 100644
--- a/libdtrace/dt_aggregate.h
+++ b/libdtrace/dt_aggregate.h
@@ -46,6 +46,7 @@ typedef enum dt_aggfid {
 
 extern int dt_aggregate_init(dtrace_hdl_t *);
 extern void dt_aggregate_set_option(dtrace_hdl_t *, uintptr_t);
+extern void dt_aggregate_clear_option(dtrace_hdl_t *, uintptr_t);
 extern int dt_aggregate_go(dtrace_hdl_t *);
 extern int dt_aggregate_clear_one(const dtrace_aggdata_t *, void *);
 extern void dt_aggregate_destroy(dtrace_hdl_t *);
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index b0bf73d10..edfe5bbae 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -2352,11 +2352,15 @@ dt_consume_one_probe(dtrace_hdl_t *dtp, FILE *fp, char *data, uint32_t size,
 				i++;
 				continue;
 			case DT_ACT_CLEAR:
+				if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
+					return DTRACE_WORKSTATUS_ERROR;
 				if (dt_clear(dtp, data, rec) != 0)
 					return DTRACE_WORKSTATUS_ERROR;
 
 				continue;
 			case DT_ACT_TRUNC:
+				if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
+					return DTRACE_WORKSTATUS_ERROR;
 				if (i == epd->dtdd_nrecs - 1)
 					return dt_set_errno(dtp, EDT_BADTRUNC);
 
@@ -2518,6 +2522,8 @@ dt_consume_one_probe(dtrace_hdl_t *dtp, FILE *fp, char *data, uint32_t size,
 			func = dtrace_fprintf;
 			break;
 		case DTRACEACT_PRINTA:
+			if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
+				return DTRACE_WORKSTATUS_ERROR;
 			if (rec->dtrd_format != NULL)
 				func = dtrace_fprinta;
 			else
@@ -3095,8 +3101,7 @@ dtrace_consume(dtrace_hdl_t *dtp, FILE *fp, dtrace_consume_probe_f *pf,
 		}
 	}
 
-	if (dtrace_aggregate_snap(dtp) == DTRACE_WORKSTATUS_ERROR)
-		return DTRACE_WORKSTATUS_ERROR;
+	dt_aggregate_clear_option(dtp, DTRACE_A_VALID);
 
 	/*
 	 * If dtp->dt_beganon is not -1, we did not process the BEGIN probe
diff --git a/libdtrace/dtrace.h b/libdtrace/dtrace.h
index c85f3d810..ef8f730ab 100644
--- a/libdtrace/dtrace.h
+++ b/libdtrace/dtrace.h
@@ -365,6 +365,7 @@ extern int dtrace_handle_setopt(dtrace_hdl_t *dtp,
 #define	DTRACE_A_PERCPU		0x0001
 #define	DTRACE_A_KEEPDELTA	0x0002
 #define	DTRACE_A_ANONYMOUS	0x0004
+#define	DTRACE_A_VALID		0x0008
 
 #define	DTRACE_AGGWALK_ERROR		-1	/* error while processing */
 #define	DTRACE_AGGWALK_NEXT		0	/* proceed to next element */
diff --git a/test/unittest/options/tst.aggrate-slow.d b/test/unittest/options/tst.aggrate-slow.d
index e2a0f2cb2..cd91c0a6c 100644
--- a/test/unittest/options/tst.aggrate-slow.d
+++ b/test/unittest/options/tst.aggrate-slow.d
@@ -9,6 +9,7 @@
  * When the aggrate is slower than the switchrate and the pace of printa()
  * actions, multiple printa() should all reflect the same stale count.
  */
+/* @@skip: aggrate makes no sense */
 /* @@trigger: periodic_output */
 /* @@nosort */
 
-- 
2.45.2




More information about the DTrace-devel mailing list