[DTrace-devel] [PATCH 39/61] Move normalization value to aggregation descriptions

eugene.loh at oracle.com eugene.loh at oracle.com
Fri Jul 8 14:45:23 UTC 2022


From: Eugene Loh <eugene.loh at oracle.com>

The normalization value for an aggregation was kept in a consumer
hash table dt_aggregate.dtat_hash.  An aggregation with keys, however,
could have many entries (for different key values) in the hash table.
So to update a normalization value, we must snapshot and walk the
aggregations to look for all copies of the aggregation's
normalization value that must be updated.

Store the normalization value in the aggregation description dt_adesc.
This means only one copy of the value needs to be kept for any
aggregation and that normalize() no longer requires a snapshot of
the aggregation data.

Note that a snapshot in legacy DTrace meant pulling in deltas in
the aggregations, while in the port to BPF a snapshot means reading
in all data.

Note also that the semantics of normalize() had some undocumented
(and presumably unintended) timing issues.  Consider

        # dtrace -qn '
          tick-4s { @a    = sum(30);
                    @b[4] = sum(30); }
          tick-5s { normalize(@a, 3);
                    normalize(@b, 3); }
          tick-6s { @a    = sum(3000);
                    @b[6] = sum(3000); exit(0); }'

We call normalize() on both @a and @b with the same value 3.  Some
data arrives before the normalize() call, some after.  With legacy
DTrace, the output data is

        @a            1010
        @b[4]           10
        @b[6]         3000

This patch changes the behavior so that a normalization value for
an aggregation applies to all keys of that aggregation, in a manner
that is less sensitive to such timing issues.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 include/dtrace/metadesc.h |  1 +
 libdtrace/dt_aggregate.c  |  6 +++--
 libdtrace/dt_consume.c    | 48 +++++++++------------------------------
 libdtrace/dt_map.c        |  1 +
 libdtrace/dt_printf.c     |  2 +-
 libdtrace/dtrace.h        |  1 -
 6 files changed, 18 insertions(+), 41 deletions(-)

diff --git a/include/dtrace/metadesc.h b/include/dtrace/metadesc.h
index 0f12c4f3..97c11b7a 100644
--- a/include/dtrace/metadesc.h
+++ b/include/dtrace/metadesc.h
@@ -56,6 +56,7 @@ typedef struct dtrace_aggdesc {
 	int dtagd_flags;			/* aggregation flags */
 	dtrace_aggid_t dtagd_id;		/* aggregation ID */
 	uint64_t dtagd_sig;			/* aggregation signature */
+	uint64_t dtagd_normal;			/* aggregation normalization */
 	uint32_t dtagd_size;			/* size in bytes */
 	int dtagd_nrecs;			/* number of records */
 	dtrace_recdesc_t *dtagd_recs;		/* record descriptions */
diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
index 4a675366..13595515 100644
--- a/libdtrace/dt_aggregate.c
+++ b/libdtrace/dt_aggregate.c
@@ -513,7 +513,6 @@ dt_aggregate_snap_one(dt_idhash_t *dhp, dt_ident_t *aid, dt_snapstate_t *st)
 	agd->dtada_size = realsz;
 	agd->dtada_desc = agg;
 	agd->dtada_hdl = st->dtp;
-	agd->dtada_normal = 1;
 
 	h->dtahe_hval = hval;
 	h->dtahe_size = realsz;
@@ -1119,12 +1118,16 @@ dt_aggwalk_rval(dtrace_hdl_t *dtp, dt_ahashent_t *h, int rval)
 		return dt_set_errno(dtp, EDT_DIRABORT);
 
 	case DTRACE_AGGWALK_NORMALIZE:
+#if 1
+		assert(0);
+#else
 		if (h->dtahe_data.dtada_normal == 0) {
 			h->dtahe_data.dtada_normal = 1;
 			return dt_set_errno(dtp, EDT_BADRVAL);
 		}
 
 		return 0;
+#endif
 
 	case DTRACE_AGGWALK_REMOVE: {
 		dtrace_aggdata_t *aggdata = &h->dtahe_data;
@@ -1526,7 +1529,6 @@ dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggid_t *aggvars,
 				aggdata->dtada_size = agg->dtagd_size;
 				aggdata->dtada_desc = agg;
 				aggdata->dtada_hdl = dtp;
-				aggdata->dtada_normal = 1;
 				zaggdata[i].dtahe_hval = 0;
 				zaggdata[i].dtahe_size = agg->dtagd_size;
 				break;
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index dbfcf9de..41f99970 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -1455,37 +1455,16 @@ dt_print_pcap(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
 	return 0;
 }
 
-typedef struct dt_normal {
-	dtrace_aggid_t	dtnd_id;
-	uint64_t	dtnd_normal;
-} dt_normal_t;
-
-static int
-dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
-{
-	dt_normal_t		*normal = arg;
-	dtrace_aggdesc_t	*agg = aggdata->dtada_desc;
-	dtrace_aggid_t		id = normal->dtnd_id;
-
-	if (agg->dtagd_nrecs == 0)
-		return DTRACE_AGGWALK_NEXT;
-
-	if (agg->dtagd_varid != id)
-		return DTRACE_AGGWALK_NEXT;
-
-	((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
-	return DTRACE_AGGWALK_NORMALIZE;
-}
-
 /*
  * This function is also used to denormalize aggregations, because that is
- * equivalent to normalizing them using normal = 1.
+ * equivalent to normalizing with value = 1.
  */
 static int
 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
 {
 	int		act = rec->dtrd_arg;
-	dt_normal_t	normal;
+	dtrace_aggid_t	id;
+	uint64_t	value;
 	caddr_t		addr;
 
 	/*
@@ -1497,7 +1476,7 @@ dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
 	if (rec->dtrd_size != sizeof(dtrace_aggid_t))
 		return dt_set_errno(dtp, EDT_BADNORMAL);
 
-	normal.dtnd_id = *((dtrace_aggid_t *)addr);
+	id = *((dtrace_aggid_t *)addr);
 
 	if (act == DT_ACT_NORMALIZE) {
 		rec++;
@@ -1512,29 +1491,24 @@ dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
 
 		switch (rec->dtrd_size) {
 		case sizeof(uint64_t):
-			normal.dtnd_normal = *((uint64_t *)addr);
+			value = *((uint64_t *)addr);
 			break;
 		case sizeof(uint32_t):
-			normal.dtnd_normal = *((uint32_t *)addr);
+			value = *((uint32_t *)addr);
 			break;
 		case sizeof(uint16_t):
-			normal.dtnd_normal = *((uint16_t *)addr);
+			value = *((uint16_t *)addr);
 			break;
 		case sizeof(uint8_t):
-			normal.dtnd_normal = *((uint8_t *)addr);
+			value = *((uint8_t *)addr);
 			break;
 		default:
 			return dt_set_errno(dtp, EDT_BADNORMAL);
 		}
 	} else
-		normal.dtnd_normal = 1;
+		value = 1;
 
-	/*
-	 * Retrieve a snapshot of the aggregation data, and apply the normal
-	 * to all aggregations that need it.
-	 */
-	dtrace_aggregate_snap(dtp);
-	dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
+	dtp->dt_adesc[id]->dtagd_normal = value;
 
 	return 0;
 }
@@ -1766,7 +1740,7 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
 		size = agg->dtagd_size;
 
 		assert(DTRACEACT_ISAGG(rec->dtrd_action));
-		normal = aggdata->dtada_normal;
+		normal = aggdata->dtada_desc->dtagd_normal;
 
 		if (dt_print_datum(dtp, fp, rec, addr, size, normal,
 				   agg->dtagd_sig) < 0)
diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
index ea635a16..96660fd0 100644
--- a/libdtrace/dt_map.c
+++ b/libdtrace/dt_map.c
@@ -282,6 +282,7 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
 	agg->dtagd_id = id;
 	agg->dtagd_name = aid->di_name;
 	agg->dtagd_sig = ((dt_idsig_t *)aid->di_data)->dis_auxinfo;
+	agg->dtagd_normal = 1;
 	agg->dtagd_varid = aid->di_id;
 	agg->dtagd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
 	agg->dtagd_nrecs = agg->dtagd_size / sizeof(uint64_t);
diff --git a/libdtrace/dt_printf.c b/libdtrace/dt_printf.c
index f0c09178..e7171dcc 100644
--- a/libdtrace/dt_printf.c
+++ b/libdtrace/dt_printf.c
@@ -1383,7 +1383,7 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
 			rec = &agg->dtagd_recs[aggrec];
 			addr = aggdata->dtada_data;
 			limit = addr + aggdata->dtada_size;
-			normal = aggdata->dtada_normal;
+			normal = agg->dtagd_normal;
 			size = agg->dtagd_size;
 			sig = agg->dtagd_sig;
 			flags = DTRACE_BUFDATA_AGGVAL;
diff --git a/libdtrace/dtrace.h b/libdtrace/dtrace.h
index a05cc4a9..8a3daf6a 100644
--- a/libdtrace/dtrace.h
+++ b/libdtrace/dtrace.h
@@ -364,7 +364,6 @@ struct dtrace_aggdata {
 	dtrace_hdl_t *dtada_hdl;		/* handle to DTrace library */
 	dtrace_aggdesc_t *dtada_desc;		/* aggregation description */
 	caddr_t dtada_data;			/* pointer to raw data */
-	uint64_t dtada_normal;			/* the normal -- 1 for denorm */
 	size_t dtada_size;			/* total size of the data */
 	caddr_t *dtada_percpu;			/* per CPU data, if avail */
 };
-- 
2.18.4




More information about the DTrace-devel mailing list