[DTrace-devel] [PATCH] Fix records for aggregation descriptions

Kris Van Hees kris.van.hees at oracle.com
Fri Aug 12 07:01:21 UTC 2022


The implementation of aggregations erronously registered the value of
an aggregation as a series of 64-bit values, each with their own record
description.  However, aggregatios in their most generic form should
comprise the following records:

(1) variable ID
(2) keys (values of the index tuple, if any)
(3) latch sequence number
(4) aggregation data

(1) and (2) are records that identify the aggregation, whereas (3) and
(4) are data that is generated by the producer.

Change the aggregation record descriptions to match the layout described
above.

Note that the variable ID is now stored as a 32-bit value whereas DTrace
used to store it as a 64-bit value.  This change is valid because the
variable ID is an identifier ID and therefore cannot exceed the range of
a 32-bit integer,

Support for aggregtion keys will be added in later patches.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 include/dtrace/metadesc.h |  5 ++--
 libdtrace/dt_aggregate.c  | 17 ++++++-------
 libdtrace/dt_cg.c         |  9 +++++++
 libdtrace/dt_consume.c    | 17 +++++--------
 libdtrace/dt_impl.h       |  3 +++
 libdtrace/dt_map.c        | 53 ++++++++++++++++++++++-----------------
 libdtrace/dt_printf.c     | 13 +++++-----
 7 files changed, 66 insertions(+), 51 deletions(-)

diff --git a/include/dtrace/metadesc.h b/include/dtrace/metadesc.h
index 20a5b8a0..69906a88 100644
--- a/include/dtrace/metadesc.h
+++ b/include/dtrace/metadesc.h
@@ -58,8 +58,9 @@ typedef struct dtrace_aggdesc {
 	uint64_t dtagd_sig;			/* aggregation signature */
 	uint64_t dtagd_normal;			/* aggregation normalization */
 	uint32_t dtagd_size;			/* size in bytes */
-	int dtagd_nrecs;			/* number of records */
-	dtrace_recdesc_t *dtagd_recs;		/* record descriptions */
+	int dtagd_nrecs;			/* number of key records */
+	dtrace_recdesc_t *dtagd_recs;		/* key record descriptions */
+	dtrace_recdesc_t *dtagd_drecs;		/* data record descriptions */
 } dtrace_aggdesc_t;
 
 typedef struct dtrace_fmtdesc {
diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
index 559a497e..4ed614a9 100644
--- a/libdtrace/dt_aggregate.c
+++ b/libdtrace/dt_aggregate.c
@@ -420,7 +420,6 @@ typedef struct dt_snapstate {
 static void
 dt_agg_one_copy(dt_ident_t *aid, int64_t *dst, int64_t *src, uint_t realsz)
 {
-	src++;  /* skip latch sequence number */
 	memcpy(dst, src, realsz);
 }
 
@@ -432,14 +431,15 @@ dt_agg_one_agg(dt_ident_t *aid, int64_t *dst, int64_t *src, uint_t realsz)
 	if (*src == 0)
 		return;
 
-	src++;  /* skip latch sequence number */
 	switch (((dt_ident_t *)aid->di_iarg)->di_id) {
 	case DT_AGG_MAX:
+		*dst++ = *src++;	/* copy latch sequence number */
 		if (*src > *dst)
 			*dst = *src;
 
 		break;
 	case DT_AGG_MIN:
+		*dst++ = *src++;	/* copy latch sequence number */
 		if (*src < *dst)
 			*dst = *src;
 
@@ -471,8 +471,8 @@ dt_aggregate_snap_one(dt_idhash_t *dhp, dt_ident_t *aid, dt_snapstate_t *st)
 	/* point to the latch sequence number */
 	src = (int64_t *)(st->buf + aid->di_offset);
 
-	/* real size excludes latch sequence number and second data copy, if any */
-	realsz = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
+	/* real size excludes second data copy, if any */
+	realsz = aid->di_size / DT_AGG_NUM_COPIES;
 
 	/* See if we already have an entry for this aggregation. */
 	for (h = agh->dtah_hash[ndx]; h != NULL; h = h->dtahe_next) {
@@ -508,7 +508,6 @@ dt_aggregate_snap_one(dt_idhash_t *dhp, dt_ident_t *aid, dt_snapstate_t *st)
 		return dt_set_errno(st->dtp, EDT_NOMEM);
 	}
 
-	src++;
 	memcpy(agd->dtada_data, src, realsz);
 	agd->dtada_size = realsz;
 	agd->dtada_desc = agg;
@@ -659,8 +658,8 @@ dt_aggregate_keycmp(const void *lhs, const void *rhs)
 	if ((rval = dt_aggregate_hashcmp(lhs, rhs)) != 0)
 		return rval;
 
-	nrecs = lagg->dtagd_nrecs - 1;
-	assert(nrecs == ragg->dtagd_nrecs - 1);
+	nrecs = lagg->dtagd_nrecs;
+	assert(nrecs == ragg->dtagd_nrecs);
 
 	keypos = dt_keypos + 1 >= nrecs ? 0 : dt_keypos;
 
@@ -1089,7 +1088,7 @@ dt_aggwalk_rval(dtrace_hdl_t *dtp, dt_ahashent_t *h, int rval)
 		uint32_t size, offs = 0;
 
 		aggdesc = h->dtahe_data.dtada_desc;
-		rec = &aggdesc->dtagd_recs[aggdesc->dtagd_nrecs - 1];
+		rec = &aggdesc->dtagd_drecs[DT_AGGDATA_RECORD];
 		size = rec->dtrd_size;
 		data = &h->dtahe_data;
 
@@ -1770,7 +1769,7 @@ dtrace_aggregate_clear(dtrace_hdl_t *dtp)
 
 	for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
 		aggdesc = h->dtahe_data.dtada_desc;
-		rec = &aggdesc->dtagd_recs[aggdesc->dtagd_nrecs - 1];
+		rec = &aggdesc->dtagd_drecs[DT_AGGDATA_RECORD];
 		data = &h->dtahe_data;
 
 		memset(&data->dtada_data[rec->dtrd_offset], 0, rec->dtrd_size);
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index ebd41906..869a44c3 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -6712,6 +6712,7 @@ static dt_cg_aggfunc_f *_dt_cg_agg[DT_AGG_NUM] = {
 static void
 dt_cg_agg(dt_pcb_t *pcb, dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 {
+	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
 	dt_ident_t	*aid, *fid;
 	dt_cg_aggfunc_f	*aggfp;
 
@@ -6744,6 +6745,14 @@ dt_cg_agg(dt_pcb_t *pcb, dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 	assert(aggfp != NULL);
 
 	(*aggfp)(pcb, aid, dnp, dlp, drp);
+
+	/*
+	 * Add this aggid if we see it for the first time.  We do this after
+	 * the BPF code generation because aggfp() sets aid->di_size.
+	 */
+	if (dt_aggid_lookup(dtp, aid->di_id, NULL) == -1)
+		dt_aggid_add(dtp, aid);
+
 	dt_aggid_add(pcb->pcb_hdl, aid);
 }
 
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index 973084d6..ccc54476 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -1001,8 +1001,8 @@ dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
 		return dt_printf(dtp, fp, "  %-*s", width, s);
 	}
 
-        /* print the bytes raw */
-        return dt_print_rawbytes(dtp, fp, addr, nbytes);
+	/* print the bytes raw */
+	return dt_print_rawbytes(dtp, fp, addr, nbytes);
 }
 
 #ifdef FIXME
@@ -1697,7 +1697,7 @@ dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
 static int
 dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
 {
-	int			i, aggact = 0;
+	int			i;
 	dtrace_print_aggdata_t	*pd = arg;
 	const dtrace_aggdata_t	*aggdata = aggsdata[0];
 	dtrace_aggdesc_t	*agg = aggdata->dtada_desc;
@@ -1709,18 +1709,13 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
 
 	/*
 	 * Iterate over each record description in the key, printing the traced
-	 * data.
+	 * data.  The first record is skipped because it holds the variable ID.
 	 */
-	for (i = 0; i < agg->dtagd_nrecs; i++) {
+	for (i = 1; i < agg->dtagd_nrecs; i++) {
 		rec = &agg->dtagd_recs[i];
 		addr = aggdata->dtada_data + rec->dtrd_offset;
 		size = rec->dtrd_size;
 
-		if (DTRACEACT_ISAGG(rec->dtrd_action)) {
-			aggact = i;
-			break;
-		}
-
 		if (dt_print_datum(dtp, fp, rec, addr, size, 1, 0) < 0)
 			return DTRACE_AGGWALK_ERROR;
 
@@ -1734,7 +1729,7 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
 
 		aggdata = aggsdata[i];
 		agg = aggdata->dtada_desc;
-		rec = &agg->dtagd_recs[aggact];
+		rec = &agg->dtagd_drecs[DT_AGGDATA_RECORD];
 		addr = aggdata->dtada_data + rec->dtrd_offset;
 		size = agg->dtagd_size;
 
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 0e3cf218..704cfed0 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -188,6 +188,9 @@ typedef struct dt_ahash {
 	size_t		dtah_size;		/* size of hash table */
 } dt_ahash_t;
 
+#define DT_AGGDATA_COUNTER	0
+#define DT_AGGDATA_RECORD	1
+
 /*
  * Why do we need (only) 4 slots?  The maximum amount of string arguments to
  * any function is 2, and if the result is a string as well, that means we may
diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
index 0098e374..0a2563ad 100644
--- a/libdtrace/dt_map.c
+++ b/libdtrace/dt_map.c
@@ -241,11 +241,9 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
 {
 	dtrace_id_t		max;
 	dtrace_aggdesc_t	*agg;
-	dtrace_recdesc_t	*recs;
+	dtrace_recdesc_t	*recs, *drecs;
 	dtrace_aggid_t		id = aid->di_id;
-	dt_ident_t		*fid = aid->di_iarg;
-	int			i;
-	uint_t			off = 0;
+	int			nrecs = ((dt_idsig_t *)aid->di_data)->dis_argc;
 
 	while (id >= (max = dtp->dt_maxagg) || dtp->dt_adesc == NULL) {
 		dtrace_id_t		nmax = max ? (max << 1) : 1;
@@ -274,20 +272,16 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
 	if (agg == NULL)
 		return dt_set_errno(dtp, EDT_NOMEM);
 
-	/*
-	 * Note the relationship between the aggregation storage
-	 * size (di_size) and the aggregation data size (dtagd_size):
-	 *     di_size = dtagd_size * (num copies) + (size of latch seq #)
-	 */
 	agg->dtagd_id = id;
 	agg->dtagd_name = aid->di_name;
 	agg->dtagd_sig = ((dt_idsig_t *)aid->di_data)->dis_auxinfo;
 	agg->dtagd_normal = 1;
 	agg->dtagd_varid = aid->di_id;
 	agg->dtagd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
-	agg->dtagd_nrecs = agg->dtagd_size / sizeof(uint64_t);
+	agg->dtagd_nrecs = 1 + nrecs;		/* var ID and keys */
 
-	recs = dt_calloc(dtp, agg->dtagd_nrecs, sizeof(dtrace_recdesc_t));
+	/* We allocate all records at once (varid, keys, data counter, data). */
+	recs = dt_calloc(dtp, 1 + nrecs + 2, sizeof(dtrace_recdesc_t));
 	if (recs == NULL) {
 		dt_free(dtp, agg);
 		return dt_set_errno(dtp, EDT_NOMEM);
@@ -295,18 +289,30 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
 
 	agg->dtagd_recs = recs;
 
-	for (i = 0; i < agg->dtagd_nrecs; i++) {
-		dtrace_recdesc_t	*rec = &recs[i];
-
-		rec->dtrd_action = fid->di_id;
-		rec->dtrd_size = sizeof(uint64_t);
-		rec->dtrd_offset = off;
-		rec->dtrd_alignment = sizeof(uint64_t);
-		rec->dtrd_format = NULL;
-		rec->dtrd_arg = 1;
-
-		off += sizeof(uint64_t);
-	}
+	/* Set up the variable ID record (first record in the keys). */
+	recs[0].dtrd_action = DTRACEACT_DIFEXPR;
+	recs[0].dtrd_size = sizeof(uint32_t);
+	recs[0].dtrd_offset = 0;
+	recs[0].dtrd_alignment = sizeof(uint32_t);
+	recs[0].dtrd_format = NULL;
+	recs[0].dtrd_arg = 1;
+
+	/* Set up the data records. */
+	agg->dtagd_drecs = drecs = &recs[nrecs];
+
+	drecs[DT_AGGDATA_COUNTER].dtrd_action = ((dt_ident_t *) aid->di_iarg)->di_id;
+	drecs[DT_AGGDATA_COUNTER].dtrd_size = sizeof(uint64_t);
+	drecs[DT_AGGDATA_COUNTER].dtrd_offset = 0;
+	drecs[DT_AGGDATA_COUNTER].dtrd_alignment = sizeof(uint64_t);
+	drecs[DT_AGGDATA_COUNTER].dtrd_format = NULL;
+	drecs[DT_AGGDATA_COUNTER].dtrd_arg = 1;
+
+	drecs[DT_AGGDATA_RECORD].dtrd_action = ((dt_ident_t *) aid->di_iarg)->di_id;
+	drecs[DT_AGGDATA_RECORD].dtrd_size = agg->dtagd_size;
+	drecs[DT_AGGDATA_RECORD].dtrd_offset = sizeof(uint64_t);
+	drecs[DT_AGGDATA_RECORD].dtrd_alignment = sizeof(uint64_t);
+	drecs[DT_AGGDATA_RECORD].dtrd_format = NULL;
+	drecs[DT_AGGDATA_RECORD].dtrd_arg = 1;
 
 	dtp->dt_adesc[id] = agg;
 
@@ -338,6 +344,7 @@ dt_aggid_destroy(dtrace_hdl_t *dtp)
 
 	for (i = 0; i < dtp->dt_maxagg; i++) {
 		if (dtp->dt_adesc[i] != NULL) {
+			/* Freeing dtagd_recs also frees dtagd_drec. */
 			dt_free(dtp, dtp->dt_adesc[i]->dtagd_recs);
 			dt_free(dtp, dtp->dt_adesc[i]);
 		}
diff --git a/libdtrace/dt_printf.c b/libdtrace/dt_printf.c
index 4fb20b23..87ff47b6 100644
--- a/libdtrace/dt_printf.c
+++ b/libdtrace/dt_printf.c
@@ -1264,10 +1264,12 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
 	dtrace_aggdesc_t *agg;
 	caddr_t lim = (caddr_t)buf + len, limit;
 	char format[64] = "%";
-	int i, aggrec = 0, curagg = -1;
+	int i, curagg = -1;
 	uint64_t normal, sig;
 
 	/*
+	 * FIXME: Comment needs rewrite.
+	 *
 	 * If we are formatting an aggregation, set 'aggrec' to the index of
 	 * the final record description (the aggregation result) so we can use
 	 * this record index with any conversion where DT_PFCONV_AGG is set.
@@ -1285,7 +1287,6 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
 
 		curagg = naggvars > 1 ? 1 : 0;
 		aggdata = aggsdata[0];
-		aggrec = aggdata->dtada_desc->dtagd_nrecs - 1;
 		nrecs--;
 	}
 
@@ -1380,11 +1381,10 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
 			if (curagg < naggvars - 1)
 				curagg++;
 
-			rec = &agg->dtagd_recs[aggrec];
-			addr = aggdata->dtada_data;
+			rec = &agg->dtagd_drecs[DT_AGGDATA_RECORD];
+			addr = aggdata->dtada_data + rec->dtrd_offset;
 			limit = addr + aggdata->dtada_size;
 			normal = agg->dtagd_normal;
-			size = agg->dtagd_size;
 			sig = agg->dtagd_sig;
 			flags = DTRACE_BUFDATA_AGGVAL;
 		} else {
@@ -1409,10 +1409,11 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
 			addr = (caddr_t)buf + rec->dtrd_offset;
 			limit = lim;
 			normal = 1;
-			size = rec->dtrd_size;
 			sig = 0;
 		}
 
+		size = rec->dtrd_size;
+
 		if (addr + size > limit) {
 			dt_dprintf("bad size: addr=%p size=0x%x lim=%p\n",
 			    (void *)addr, rec->dtrd_size, (void *)lim);
-- 
2.34.1




More information about the DTrace-devel mailing list