[DTrace-devel] [PATCH] Add support for aggpercpu option
Kris Van Hees
kris.van.hees at oracle.com
Wed Jan 24 20:42:37 UTC 2024
On Tue, Nov 07, 2023 at 11:40:39PM -0500, eugene.loh at oracle.com wrote:
>
> The aggpercpu option appears in the documentation. Further, the
> current implementation has vestigial code that suggests the option
> was once supported.
>
> On the other hand, its behavior is not particularly described in
> the documentation. Further, it seems the option has no effect on
> Solaris or with the legacy Linux implementation. An easy workaround
> is to add cpu as a key to an aggregation; this workaround makes the
> feature superfluous.
>
> Another challenge is that it's hard to know what behavior makes most
> sense if aggpercpu is combined with other features. For quantize()
> output, should the same row values be used for all CPUs as for the
> overall aggregation? What should the output format look like if
> aggpercpu is combined with a printa() that has multiple aggregations?
>
> Just implement some reasonable version of aggpercpu support and leave
> intricate scenarios for the user to handle.
>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
Some suggested changes below.
> ---
> libdtrace/dt_aggregate.c | 45 ++++++++-
> libdtrace/dt_consume.c | 9 ++
> test/unittest/aggs/tst.aggpercpu.sh | 143 ++++++++++++++++++++++++++++
> 3 files changed, 192 insertions(+), 5 deletions(-)
> create mode 100755 test/unittest/aggs/tst.aggpercpu.sh
>
> diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
> index e6275fe3..3d4c9b4a 100644
> --- a/libdtrace/dt_aggregate.c
> +++ b/libdtrace/dt_aggregate.c
> @@ -462,19 +462,19 @@ dt_aggregate_clear_one(const dtrace_aggdata_t *agd, void *arg)
> *vals = INT64_MAX;
> if (agd->dtada_percpu)
> for (i = 0; i < max_cpus; i++)
> - *((uint64_t*)agd->dtada_percpu[i]) = INT64_MAX;
> + *((uint64_t*)&agd->dtada_percpu[i][rec->dtrd_offset]) = INT64_MAX;
> break;
> case DT_AGG_MAX:
> *vals = INT64_MIN;
> if (agd->dtada_percpu)
> for (i = 0; i < max_cpus; i++)
> - *((uint64_t*)agd->dtada_percpu[i]) = INT64_MIN;
> + *((uint64_t*)&agd->dtada_percpu[i][rec->dtrd_offset]) = INT64_MIN;
> break;
> default:
> memset(vals, 0, rec->dtrd_size);
> if (agd->dtada_percpu)
> for (i = 0; i < max_cpus; i++)
> - memset(agd->dtada_percpu[i], 0, rec->dtrd_size);
> + memset(&agd->dtada_percpu[i][rec->dtrd_offset], 0, rec->dtrd_size);
> break;
> }
>
> @@ -543,6 +543,9 @@ dt_aggregate_snap_one(dtrace_hdl_t *dtp, int aggid, int cpu, const char *key,
> assert(aid != NULL);
> dt_agg_one_agg(aid, &agg->dtagd_drecs[DT_AGGDATA_RECORD],
> agd->dtada_data, data);
> + if (agd->dtada_percpu != NULL)
> + dt_agg_one_agg(aid, &agg->dtagd_drecs[DT_AGGDATA_RECORD],
> + agd->dtada_percpu[cpu], data);
>
> return 0;
>
> @@ -578,8 +581,40 @@ hashnext:
>
> memcpy(ptr, data, size);
> agd->dtada_data = ptr;
> + if (dtp->dt_aggregate.dtat_flags & DTRACE_A_PERCPU) {
> + int i, max_cpus = dtp->dt_conf.max_cpuid + 1;
> + dtrace_recdesc_t *rec = &agg->dtagd_drecs[DT_AGGDATA_RECORD];
> + dtrace_actkind_t act = rec->dtrd_action;
> + uint32_t siz = rec->dtrd_size;
> +
> + agd->dtada_percpu = dt_alloc(dtp, max_cpus * sizeof(caddr_t));
> + if (agd->dtada_percpu == NULL)
> + return dt_set_errno(dtp, EDT_NOMEM);
> +
> + for (i = 0; i < max_cpus; i++) {
> + int64_t *vals;
> +
> + agd->dtada_percpu[i] = dt_alloc(dtp, size);
> + if (agd->dtada_percpu[i] == NULL)
> + return dt_set_errno(dtp, EDT_NOMEM);
> +
> + vals = (int64_t *) &agd->dtada_percpu[i][rec->dtrd_offset];
> + switch (act) {
> + case DT_AGG_MIN:
> + *vals = INT64_MAX;
> + break;
> + case DT_AGG_MAX:
> + *vals = INT64_MIN;
> + break;
> + default:
> + memset(vals, 0, siz);
> + break;
> + }
> + }
> + memcpy(agd->dtada_percpu[cpu], data, size);
> + }
I wonder whether it might be better to just allocate everything in this loop,
and then call dt_aggregate_clear_one() to do the actual initialization? Or
split out the initialization code from dt_aggregate_clear_one() and call it
from there and from here? It contains enough complexity in indirections and
array indexing that I think that would be warranted.
> - /* Add the new entru to the hashtable. */
> + /* Add the new entry to the hashtable. */
> if (agh->dtah_hash[ndx] != NULL)
> agh->dtah_hash[ndx]->dtahe_prev = h;
>
> @@ -1564,7 +1599,7 @@ dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggid_t *aggvars,
>
> if ((zdata = dt_zalloc(dtp, zsize)) == NULL) {
> /*
> - * If we failed to allocated some zero-filled data, we
> + * If we failed to allocate some zero-filled data, we
> * need to zero out the remaining dtada_data pointers
> * to prevent the wrong data from being freed below.
> */
> diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
> index a67c0f7b..1f2f137b 100644
> --- a/libdtrace/dt_consume.c
> +++ b/libdtrace/dt_consume.c
> @@ -1745,6 +1745,15 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
> if (dt_print_datum(dtp, fp, rec, aggdata->dtada_data, normal,
> agg->dtagd_sig) < 0)
> return DTRACE_AGGWALK_ERROR;
> + if (aggdata->dtada_percpu != NULL) {
> + int j, max_cpus = aggdata->dtada_hdl->dt_conf.max_cpuid + 1;
> + for (j = 0; j < max_cpus; j++) {
> + if (dt_printf(dtp, fp, "\nagg per cpu %d", aggdata->dtada_hdl->dt_conf.cpus[j].cpu_id) < 0)
I think this makes the output look rather weird. How about:
if (dt_printf(dtp, fp, "\n [CPU %d]", aggdata->dtada_hdl->dt_conf.cpus[j].cpu_id) < 0)
That makes it more clear I think that this is the data entry for that specific
CPU (for the aggregation for which the overall aggregated value was just
printed above). As far as I can see that still works OK with aggregations
that are indexed.
> + return DTRACE_AGGWALK_ERROR;
> + if (dt_print_datum(dtp, fp, rec, aggdata->dtada_percpu[j], normal, agg->dtagd_sig) < 0)
> + return DTRACE_AGGWALK_ERROR;
> + }
> + }
>
> if (dt_buffered_flush(dtp, NULL, rec, aggdata,
> DTRACE_BUFDATA_AGGVAL) < 0)
> diff --git a/test/unittest/aggs/tst.aggpercpu.sh b/test/unittest/aggs/tst.aggpercpu.sh
> new file mode 100755
> index 00000000..e78d6103
> --- /dev/null
> +++ b/test/unittest/aggs/tst.aggpercpu.sh
> @@ -0,0 +1,143 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +
> +dtrace=$1
> +
> +DIRNAME="$tmpdir/aggpercpu.$$.$RANDOM"
> +mkdir -p $DIRNAME
> +cd $DIRNAME
> +
> +#
> +# Run a D script that fires on every CPU,
> +# forcing DTrace to aggregate results over all CPUs.
> +#
> +
> +$dtrace -xaggpercpu -qn '
> + profile-600ms
> + {
> + printf("cpu %d\n", cpu);
> + @xcnt = count();
> + @xavg = avg(10 * cpu + 3);
> + @xstd = stddev(20 * cpu + 8);
> + @xmin = min(30 * cpu - 10);
> + @xmax = max(40 * cpu - 15);
> + @xsum = sum(50 * cpu);
> + }
> + tick-900ms
> + {
> + exit(0)
> + }
> +' > dtrace.out
> +if [ $? -ne 0 ]; then
> + echo DTrace failed
> + cat dtrace.out
> + exit 1
> +fi
> +
> +#
> +# Examine the results.
> +#
> +
> +awk '
> + # The expected value for the aggregation is aggval.
> + # The expected value on a CPU is (m * cpu + b).
> + function check(label, aggval, m, b) {
> + # Check the aggregation over all CPUs.
> + getline;
> + print "check:", $0;
> + if ($1 != aggval) { printf("ERROR: %s, expect %d got %d\n", label, aggval, $1) };
> +
> + # Check the per-CPU values.
> + for (i = 1; i <= ncpu; i++) {
> + getline;
> + print "check:", $0;
> + if (match($0, "^agg per cpu ") != 1 ||
> + strtonum($4) != cpu[i] ||
> + strtonum($5) != m * cpu[i] + b)
> + printf("ERROR: %s, agg per cpu %d, line: %s\n", label, cpu[i], $0);
> + }
> + }
> +
> + BEGIN {
> + xcnt = xavg = xstm = xstd = xsum = 0;
> + xmin = +1000000000;
> + xmax = -1000000000;
> + ncpu = 0;
> + }
> +
> + # The first "cpu" lines provide the inputs to the aggregations.
> + /^cpu [0-9]*$/ {
> + cpu[++ncpu] = strtonum($NF);
> +
> + xcnt += 1;
> +
> + x = 10 * $2 + 3;
> + xavg += x;
> +
> + x = 20 * $2 + 8;
> + xstm += x;
> + xstd += x * x;
> +
> + x = 30 * $2 - 10;
> + if (xmin > x) { xmin = x };
> +
> + x = 40 * $2 - 15;
> + if (xmax < x) { xmax = x };
> +
> + x = 50 * $2;
> + xsum += x;
> +
> + next;
> + }
> +
> + # The remaining lines are the aggregation results.
> + {
> + # First we finish computing our estimates for avg and stddev.
> + # (The other results require no further action.)
> +
> + xavg /= xcnt;
> +
> + xstm /= xcnt;
> + xstd /= xcnt;
> + xstd -= xstm * xstm;
> + xstd = int(sqrt(xstd));
> +
> + # Sort the cpus.
> +
> + asort(cpu);
> +
> + # Now read the results and compare.
> +
> + check("cnt", xcnt, 0, 1);
> + check("avg", xavg, 10, 3);
> + check("std", xstd, 0, 0);
> + check("min", xmin, 30, -10);
> + check("max", xmax, 40, -15);
> + check("sum", xsum, 50, 0);
> +
> + printf("done\n");
> + }
> +' dtrace.out > awk.out
> +if [ $? -ne 0 ]; then
> + echo awk failed
> + cat dtrace.out
> + exit 1
> +fi
> +
> +if grep -q ERROR awk.out ; then
> + echo ERROR found
> + echo "=================================================="
> + cat dtrace.out
> + echo "=================================================="
> + cat awk.out
> + echo "=================================================="
> + exit 1
> +fi
> +
> +echo success
> +exit 0
> --
> 2.18.4
>
>
More information about the DTrace-devel
mailing list