[DTrace-devel] [PATCH 40/61] Fix records for aggregation descriptions

eugene.loh at oracle.com eugene.loh at oracle.com
Fri Jul 8 14:45:24 UTC 2022


From: Eugene Loh <eugene.loh at oracle.com>

There is an array of aggregation descriptions, dtp->dt_adesc.

In legacy DTrace, an aggregation description's records, in order, describe:

*)  The 8-byte, dtrace_aggvarid_t, aggregation variable ID.

*)  The aggregation's keys, if any -- one record per key.

*)  The aggregation's data.

In the port to BPF, the records were changed to be one per each 64-bit
word of aggregation data.  Problems include:

*)  This breaks some dependent code.

*)  There is no description of the keys, if any.

*)  There is no use for having a different record for each word of
    data... and there are potentially very many such words for
    *quantize() aggregations.

Change the aggregation description records back to legacy style.

One exception is that the legacy variable ID has changed from an 8-byte
dtrace_aggvarid_t to a 4-byte dtrace_aggid_t, to match the 4-byte
prefix at the beginning of "tuples".  E.g., see dt_cg_arglist().

In legacy DTrace, aggregation descriptions are requested from the
kernel.  In the BPF port, key and data sizes are discovered during
code generation.  Therefore, add function dt_aggid_add_rec_data()
to complement dt_aggid_add().  (Support for aggregation keys will
come in later patches.)

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_cg.c      | 20 ++++++++++++-
 libdtrace/dt_consume.c |  6 ++--
 libdtrace/dt_impl.h    |  3 +-
 libdtrace/dt_map.c     | 68 ++++++++++++++++++++++++++++--------------
 libdtrace/dt_printf.c  |  4 +--
 5 files changed, 71 insertions(+), 30 deletions(-)

diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 063ec113..61611c1d 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -6586,8 +6586,10 @@ static dt_cg_aggfunc_f *_dt_cg_agg[DT_AGG_NUM] = {
 static void
 dt_cg_agg(dt_pcb_t *pcb, dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 {
+	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
 	dt_ident_t	*aid, *fid;
 	dt_cg_aggfunc_f	*aggfp;
+	int		nrecs, off;
 
 	/*
 	 * If the aggregation has no aggregating function applied to it, then
@@ -6618,7 +6620,23 @@ dt_cg_agg(dt_pcb_t *pcb, dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 	assert(aggfp != NULL);
 
 	(*aggfp)(pcb, aid, dnp, dlp, drp);
-	dt_aggid_add(pcb->pcb_hdl, aid);
+
+	/* add this agg id if we are seeing it for the first time */
+	/* we do this after the BPF code generation since aggfp() sets aid->di_size */
+	if (dt_aggid_lookup(dtp, aid->di_id, NULL) == -1) {
+
+		/* count the number of records */
+		nrecs = 1;		/* variable ID */
+		nrecs++;		/* data */
+
+		/* add the aid */
+		dt_aggid_add(dtp, aid, nrecs);
+
+		off = sizeof(uint32_t);
+
+		/* add record for aggregation data */
+		off = dt_aggid_add_rec_data(dtp, aid->di_id, off);
+	}
 }
 
 void
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index 41f99970..28fed161 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -1712,7 +1712,7 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
 	 * Iterate over each record description in the key, printing the traced
 	 * data.
 	 */
-	for (i = 0; i < agg->dtagd_nrecs; i++) {
+	for (i = 1; i < agg->dtagd_nrecs; i++) {
 		rec = &agg->dtagd_recs[i];
 		addr = aggdata->dtada_data + rec->dtrd_offset;
 		size = rec->dtrd_size;
@@ -1736,8 +1736,8 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
 		aggdata = aggsdata[i];
 		agg = aggdata->dtada_desc;
 		rec = &agg->dtagd_recs[aggact];
-		addr = aggdata->dtada_data + rec->dtrd_offset;
-		size = agg->dtagd_size;
+		addr = aggdata->dtada_data;
+		size = rec->dtrd_size;
 
 		assert(DTRACEACT_ISAGG(rec->dtrd_action));
 		normal = aggdata->dtada_desc->dtagd_normal;
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index b0af2451..581f568f 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -779,7 +779,8 @@ extern void dt_epid_destroy(dtrace_hdl_t *);
 typedef void (*dt_cg_gap_f)(dt_pcb_t *, int);
 extern uint32_t dt_rec_add(dtrace_hdl_t *, dt_cg_gap_f, dtrace_actkind_t,
 			   uint32_t, uint16_t, dt_pfargv_t *, uint64_t);
-extern int dt_aggid_add(dtrace_hdl_t *, const dt_ident_t *);
+extern int dt_aggid_add(dtrace_hdl_t *, const dt_ident_t *, int);
+extern int dt_aggid_add_rec_data(dtrace_hdl_t *, dtrace_aggid_t, int);
 extern int dt_aggid_lookup(dtrace_hdl_t *, dtrace_aggid_t, dtrace_aggdesc_t **);
 extern void dt_aggid_destroy(dtrace_hdl_t *);
 
diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
index 96660fd0..d7e8a8c5 100644
--- a/libdtrace/dt_map.c
+++ b/libdtrace/dt_map.c
@@ -237,15 +237,12 @@ dt_rec_add(dtrace_hdl_t *dtp, dt_cg_gap_f gapf, dtrace_actkind_t kind,
 }
 
 int
-dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
+dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid, int nrecs)
 {
 	dtrace_id_t		max;
 	dtrace_aggdesc_t	*agg;
 	dtrace_recdesc_t	*recs;
 	dtrace_aggid_t		id = aid->di_id;
-	dt_ident_t		*fid = aid->di_iarg;
-	int			i;
-	uint_t			off = 0;
 
 	while (id >= (max = dtp->dt_maxagg) || dtp->dt_adesc == NULL) {
 		dtrace_id_t		nmax = max ? (max << 1) : 1;
@@ -274,18 +271,13 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
 	if (agg == NULL)
 		return dt_set_errno(dtp, EDT_NOMEM);
 
-	/*
-	 * Note the relationship between the aggregation storage
-	 * size (di_size) and the aggregation data size (dtagd_size):
-	 *     di_size = dtagd_size * (num copies) + (size of latch seq #)
-	 */
 	agg->dtagd_id = id;
 	agg->dtagd_name = aid->di_name;
 	agg->dtagd_sig = ((dt_idsig_t *)aid->di_data)->dis_auxinfo;
 	agg->dtagd_normal = 1;
 	agg->dtagd_varid = aid->di_id;
-	agg->dtagd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
-	agg->dtagd_nrecs = agg->dtagd_size / sizeof(uint64_t);
+	agg->dtagd_size = -1;		/* to be set in dt_aggid_add_rec_data() */
+	agg->dtagd_nrecs = nrecs;
 
 	recs = dt_calloc(dtp, agg->dtagd_nrecs, sizeof(dtrace_recdesc_t));
 	if (recs == NULL) {
@@ -295,22 +287,52 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
 
 	agg->dtagd_recs = recs;
 
-	for (i = 0; i < agg->dtagd_nrecs; i++) {
-		dtrace_recdesc_t	*rec = &recs[i];
+	recs[0].dtrd_action = DTRACEACT_DIFEXPR;  /* does this matter? */
+	recs[0].dtrd_size = sizeof(uint32_t);
+	recs[0].dtrd_offset = 0;
+	recs[0].dtrd_alignment = sizeof(uint64_t);
+	recs[0].dtrd_format = NULL;
+	recs[0].dtrd_arg = 1;
+
+	/* prepopulate some data fields */
+	recs[nrecs - 1].dtrd_action = ((dt_ident_t *) aid->di_iarg)->di_id;
+	recs[nrecs - 1].dtrd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
+ 
+	dtp->dt_adesc[id] = agg;
 
-		rec->dtrd_action = fid->di_id;
-		rec->dtrd_size = sizeof(uint64_t);
-		rec->dtrd_offset = off;
-		rec->dtrd_alignment = sizeof(uint64_t);
-		rec->dtrd_format = NULL;
-		rec->dtrd_arg = 1;
+	return 0;
+}
 
-		off += sizeof(uint64_t);
-	}
+int
+dt_aggid_add_rec_data(dtrace_hdl_t *dtp, dtrace_aggid_t id, int off)
+{
+	int nrecs;
+	dtrace_recdesc_t *rec;
 
-	dtp->dt_adesc[id] = agg;
+	assert(id >= 0 && id < dtp->dt_maxagg && dtp->dt_adesc != NULL && dtp->dt_adesc[id] != NULL);
 
-	return 0;
+	nrecs = dtp->dt_adesc[id]->dtagd_nrecs;
+	rec = &dtp->dt_adesc[id]->dtagd_recs[nrecs - 1];
+
+	/* account for the optional TLS key used in tuples */
+	off += sizeof(uint64_t);
+
+	/* align for data */
+	off = (off + 7) & (-8);  // FIXME: write more elegantly
+
+	/* set last record (data) */
+	rec->dtrd_offset = off;
+	rec->dtrd_alignment = sizeof(uint64_t);
+	rec->dtrd_format = NULL;
+	rec->dtrd_arg = 1;  /* does this matter? */
+
+	/* overall size */
+	/* determine and set the overall size */
+	off += rec->dtrd_size;
+	off = (off + 7) & (-8);  // FIXME: write more elegantly
+	dtp->dt_adesc[id]->dtagd_size = off;
+
+	return off;
 }
 
 int
diff --git a/libdtrace/dt_printf.c b/libdtrace/dt_printf.c
index e7171dcc..cfbf394c 100644
--- a/libdtrace/dt_printf.c
+++ b/libdtrace/dt_printf.c
@@ -1384,7 +1384,6 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
 			addr = aggdata->dtada_data;
 			limit = addr + aggdata->dtada_size;
 			normal = agg->dtagd_normal;
-			size = agg->dtagd_size;
 			sig = agg->dtagd_sig;
 			flags = DTRACE_BUFDATA_AGGVAL;
 		} else {
@@ -1409,10 +1408,11 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
 			addr = (caddr_t)buf + rec->dtrd_offset;
 			limit = lim;
 			normal = 1;
-			size = rec->dtrd_size;
 			sig = 0;
 		}
 
+		size = rec->dtrd_size;
+
 		if (addr + size > limit) {
 			dt_dprintf("bad size: addr=%p size=0x%x lim=%p\n",
 			    (void *)addr, rec->dtrd_size, (void *)lim);
-- 
2.18.4




More information about the DTrace-devel mailing list