[DTrace-devel] [PATCH v2] Add support for aggpercpu option

Thu Jan 25 01:47:06 UTC 2024

From: Eugene Loh <eugene.loh at oracle.com>

The aggpercpu option appears in the documentation.  Further, the
current implementation has vestigial code that suggests the option
was once supported.

On the other hand, its behavior is not particularly described in
the documentation.  Further, it seems the option has no effect on
Solaris or with the legacy Linux implementation.  An easy workaround
is to add cpu as a key to an aggregation;  this workaround makes the
feature superfluous.

Another challenge is that it's hard to know what behavior makes most
sense if aggpercpu is combined with other features.  For quantize()
output, should the same row values be used for all CPUs as for the
overall aggregation?  What should the output format look like if
aggpercpu is combined with a printa() that has multiple aggregations?

Just implement some reasonable version of aggpercpu support and leave
intricate scenarios for the user to handle.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_aggregate.c            |  64 ++++++++++---
 libdtrace/dt_consume.c              |   9 ++
 test/unittest/aggs/tst.aggpercpu.sh | 143 ++++++++++++++++++++++++++++
 3 files changed, 204 insertions(+), 12 deletions(-)
 create mode 100755 test/unittest/aggs/tst.aggpercpu.sh

diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
index e6275fe3..10efff49 100644
--- a/libdtrace/dt_aggregate.c
+++ b/libdtrace/dt_aggregate.c
@@ -438,6 +438,32 @@ dt_agg_one_agg(dt_ident_t *aid, dtrace_recdesc_t *rec, char *dst,
 	}
 }
 
+static void
+dt_aggregate_clear_one_percpu(const dtrace_aggdata_t *agd,
+			      dtrace_recdesc_t *rec, int max_cpus)
+{
+	dtrace_actkind_t	act = rec->dtrd_action;
+	uint32_t		siz = rec->dtrd_size;
+	int			i;
+
+	for (i = 0; i < max_cpus; i++) {
+		int64_t	*vals;
+
+		vals = (int64_t *) &agd->dtada_percpu[i][rec->dtrd_offset];
+		switch (act) {
+		case DT_AGG_MIN:
+			*vals = INT64_MAX;
+			break;
+		case DT_AGG_MAX:
+			*vals = INT64_MIN;
+			break;
+		default:
+			memset(vals, 0, siz);
+			break;
+		}
+	}
+}
+
 int
 dt_aggregate_clear_one(const dtrace_aggdata_t *agd, void *arg)
 {
@@ -447,7 +473,7 @@ dt_aggregate_clear_one(const dtrace_aggdata_t *agd, void *arg)
 	int64_t			*vals = (int64_t *)
 					&agd->dtada_data[rec->dtrd_offset];
 	uint64_t		agen;
-	int			i, max_cpus = dtp->dt_conf.max_cpuid + 1;
+	int			max_cpus = dtp->dt_conf.max_cpuid + 1;
 
 	/*
 	 * We can pass the entire key because we know that the first uint32_t
@@ -460,24 +486,18 @@ dt_aggregate_clear_one(const dtrace_aggdata_t *agd, void *arg)
 	switch (rec->dtrd_action) {
 	case DT_AGG_MIN:
 		*vals = INT64_MAX;
-		if (agd->dtada_percpu)
-			for (i = 0; i < max_cpus; i++)
-				*((uint64_t*)agd->dtada_percpu[i]) = INT64_MAX;
 		break;
 	case DT_AGG_MAX:
 		*vals = INT64_MIN;
-		if (agd->dtada_percpu)
-			for (i = 0; i < max_cpus; i++)
-				*((uint64_t*)agd->dtada_percpu[i]) = INT64_MIN;
 		break;
 	default:
 		memset(vals, 0, rec->dtrd_size);
-		if (agd->dtada_percpu)
-			for (i = 0; i < max_cpus; i++)
-				memset(agd->dtada_percpu[i], 0, rec->dtrd_size);
 		break;
 	}
 
+	if (agd->dtada_percpu)
+		dt_aggregate_clear_one_percpu(agd, rec, max_cpus);
+
 	return DTRACE_AGGWALK_NEXT;
 }
 
@@ -543,6 +563,9 @@ dt_aggregate_snap_one(dtrace_hdl_t *dtp, int aggid, int cpu, const char *key,
 		assert(aid != NULL);
 		dt_agg_one_agg(aid, &agg->dtagd_drecs[DT_AGGDATA_RECORD],
 			       agd->dtada_data, data);
+		if (agd->dtada_percpu != NULL)
+			dt_agg_one_agg(aid, &agg->dtagd_drecs[DT_AGGDATA_RECORD],
+				       agd->dtada_percpu[cpu], data);
 
 		return 0;
 
@@ -578,8 +601,25 @@ hashnext:
 
 	memcpy(ptr, data, size);
 	agd->dtada_data = ptr;
+	if (dtp->dt_aggregate.dtat_flags & DTRACE_A_PERCPU) {
+		int i, max_cpus = dtp->dt_conf.max_cpuid + 1;
+		dtrace_recdesc_t	*rec = &agg->dtagd_drecs[DT_AGGDATA_RECORD];
+
+		agd->dtada_percpu = dt_alloc(dtp, max_cpus * sizeof(caddr_t));
+		if (agd->dtada_percpu == NULL)
+			return dt_set_errno(dtp, EDT_NOMEM);
+
+		for (i = 0; i < max_cpus; i++) {
+			agd->dtada_percpu[i] = dt_alloc(dtp, size);
+			if (agd->dtada_percpu[i] == NULL)
+				return dt_set_errno(dtp, EDT_NOMEM);
+		}
+
+		dt_aggregate_clear_one_percpu(agd, rec, max_cpus);
+		memcpy(agd->dtada_percpu[cpu], data, size);
+	}
 
-	/* Add the new entru to the hashtable. */
+	/* Add the new entry to the hashtable. */
 	if (agh->dtah_hash[ndx] != NULL)
 		agh->dtah_hash[ndx]->dtahe_prev = h;
 
@@ -1564,7 +1604,7 @@ dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggid_t *aggvars,
 
 		if ((zdata = dt_zalloc(dtp, zsize)) == NULL) {
 			/*
-			 * If we failed to allocated some zero-filled data, we
+			 * If we failed to allocate some zero-filled data, we
 			 * need to zero out the remaining dtada_data pointers
 			 * to prevent the wrong data from being freed below.
 			 */
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index 343d745f..f0775381 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -1760,6 +1760,15 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
 		if (dt_print_datum(dtp, fp, rec, aggdata->dtada_data, normal,
 				   agg->dtagd_sig) < 0)
 			return DTRACE_AGGWALK_ERROR;
+		if (aggdata->dtada_percpu != NULL) {
+			int j, max_cpus = aggdata->dtada_hdl->dt_conf.max_cpuid + 1;
+			for (j = 0; j < max_cpus; j++) {
+				if (dt_printf(dtp, fp, "\n    [CPU %d]", aggdata->dtada_hdl->dt_conf.cpus[j].cpu_id) < 0)
+					return DTRACE_AGGWALK_ERROR;
+				if (dt_print_datum(dtp, fp, rec, aggdata->dtada_percpu[j], normal, agg->dtagd_sig) < 0)
+					return DTRACE_AGGWALK_ERROR;
+			}
+		}
 
 		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
 				      DTRACE_BUFDATA_AGGVAL) < 0)
diff --git a/test/unittest/aggs/tst.aggpercpu.sh b/test/unittest/aggs/tst.aggpercpu.sh
new file mode 100755
index 00000000..1f47aa3d
--- /dev/null
+++ b/test/unittest/aggs/tst.aggpercpu.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+DIRNAME="$tmpdir/aggpercpu.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+#
+# Run a D script that fires on every CPU,
+# forcing DTrace to aggregate results over all CPUs.
+#
+
+$dtrace -xaggpercpu -qn '
+    profile-600ms
+    {
+        printf("cpu %d\n", cpu);
+        @xcnt = count();
+        @xavg = avg(10 * cpu + 3);
+        @xstd = stddev(20 * cpu + 8);
+        @xmin = min(30 * cpu - 10);
+        @xmax = max(40 * cpu - 15);
+        @xsum = sum(50 * cpu);
+    }
+    tick-900ms
+    {
+        exit(0)
+    }
+' > dtrace.out
+if [ $? -ne 0 ]; then
+    echo DTrace failed
+    cat dtrace.out
+    exit 1
+fi
+
+#
+# Examine the results.
+#
+
+awk '
+    # The expected value for the aggregation is aggval.
+    # The expected value on a CPU is (m * cpu + b).
+    function check(label, aggval, m, b) {
+        # Check the aggregation over all CPUs.
+        getline;
+        print "check:", $0;
+        if ($1 != aggval) { printf("ERROR: %s, expect %d got %d\n", label, aggval, $1) };
+
+        # Check the per-CPU values.
+        for (i = 1; i <= ncpu; i++) {
+            getline;
+            print "check:", $0;
+            if (match($0, "^    \\[CPU ") != 1 ||
+                strtonum($2) != cpu[i] ||
+                strtonum($3) != m * cpu[i] + b)
+                printf("ERROR: %s, agg per cpu %d, line: %s\n", label, cpu[i], $0);
+        }
+    }
+
+    BEGIN {
+        xcnt = xavg = xstm = xstd = xsum = 0;
+        xmin = +1000000000;
+        xmax = -1000000000;
+        ncpu = 0;
+    }
+
+    # The first "cpu" lines provide the inputs to the aggregations.
+    /^cpu [0-9]*$/ {
+	cpu[++ncpu] = strtonum($NF);
+
+        xcnt += 1;
+
+        x = 10 * $2 + 3;
+        xavg += x;
+
+        x = 20 * $2 + 8;
+        xstm += x;
+        xstd += x * x;
+
+        x = 30 * $2 - 10;
+        if (xmin > x) { xmin = x };
+
+        x = 40 * $2 - 15;
+        if (xmax < x) { xmax = x };
+
+        x = 50 * $2;
+        xsum += x;
+
+        next;
+    }
+
+    # The remaining lines are the aggregation results.
+    {
+        # First we finish computing our estimates for avg and stddev.
+        # (The other results require no further action.)
+
+        xavg /= xcnt;
+
+        xstm /= xcnt;
+        xstd /= xcnt;
+        xstd -= xstm * xstm;
+        xstd = int(sqrt(xstd));
+
+        # Sort the cpus.
+
+        asort(cpu);
+
+        # Now read the results and compare.
+
+        check("cnt", xcnt,  0,   1);
+        check("avg", xavg, 10,   3);
+        check("std", xstd,  0,   0);
+        check("min", xmin, 30, -10);
+        check("max", xmax, 40, -15);
+        check("sum", xsum, 50,   0);
+
+        printf("done\n");
+    }
+' dtrace.out > awk.out
+if [ $? -ne 0 ]; then
+    echo awk failed
+    cat dtrace.out
+    exit 1
+fi
+
+if grep -q ERROR awk.out ; then
+    echo ERROR found
+    echo "=================================================="
+    cat dtrace.out
+    echo "=================================================="
+    cat awk.out
+    echo "=================================================="
+    exit 1
+fi
+
+echo success
+exit 0
-- 
2.18.4