[DTrace-devel] [PATCH 39/61] Move normalization value to aggregation descriptions

Kris Van Hees kris.van.hees at oracle.com
Sat Aug 6 06:02:49 UTC 2022


On Fri, Jul 08, 2022 at 10:45:23AM -0400, eugene.loh--- via DTrace-devel wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> The normalization value for an aggregation was kept in a consumer
> hash table dt_aggregate.dtat_hash.  An aggregation with keys, however,
> could have many entries (for different key values) in the hash table.
> So to update a normalization value, we must snapshot and walk the
> aggregations to look for all copies of the aggregation's
> normalization value that must be updated.
> 
> Store the normalization value in the aggregation description dt_adesc.
> This means only one copy of the value needs to be kept for any
> aggregation and that normalize() no longer requires a snapshot of
> the aggregation data.
> 
> Note that a snapshot in legacy DTrace meant pulling in deltas in
> the aggregations, while in the port to BPF a snapshot means reading
> in all data.
> 
> Note also that the semantics of normalize() had some undocumented
> (and presumably unintended) timing issues.  Consider
> 
>         # dtrace -qn '
>           tick-4s { @a    = sum(30);
>                     @b[4] = sum(30); }
>           tick-5s { normalize(@a, 3);
>                     normalize(@b, 3); }
>           tick-6s { @a    = sum(3000);
>                     @b[6] = sum(3000); exit(0); }'
> 
> We call normalize() on both @a and @b with the same value 3.  Some
> data arrives before the normalize() call, some after.  With legacy
> DTrace, the output data is
> 
>         @a            1010
>         @b[4]           10
>         @b[6]         3000
> 
> This patch changes the behavior so that a normalization value for
> an aggregation applies to all keys of that aggregation, in a manner
> that is less sensitive to such timing issues.

I think that this commit message could be trimmed quite a bit.  And the
change in behaviour should probably be explained elsewhere (and later,
when indexed aggregations are actually being introduced or have been
introduced).  It is a bit academimc at this point, and actually, moving the
normal into aggdesc does not change the behaviour for aggregations at this
point because only non-indexed aggregations exist right now.

So, I would just make this patch (and commit message) about moving the
normal into aggdesc.  And perhaps add a sentence to indicate that this will
have impact when indexed aggregations are added.  But leave the explanation
of that impact for a later patch.

> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>  include/dtrace/metadesc.h |  1 +
>  libdtrace/dt_aggregate.c  |  6 +++--
>  libdtrace/dt_consume.c    | 48 +++++++++------------------------------
>  libdtrace/dt_map.c        |  1 +
>  libdtrace/dt_printf.c     |  2 +-
>  libdtrace/dtrace.h        |  1 -
>  6 files changed, 18 insertions(+), 41 deletions(-)
> 
> diff --git a/include/dtrace/metadesc.h b/include/dtrace/metadesc.h
> index 0f12c4f3..97c11b7a 100644
> --- a/include/dtrace/metadesc.h
> +++ b/include/dtrace/metadesc.h
> @@ -56,6 +56,7 @@ typedef struct dtrace_aggdesc {
>  	int dtagd_flags;			/* aggregation flags */
>  	dtrace_aggid_t dtagd_id;		/* aggregation ID */
>  	uint64_t dtagd_sig;			/* aggregation signature */
> +	uint64_t dtagd_normal;			/* aggregation normalization */
>  	uint32_t dtagd_size;			/* size in bytes */
>  	int dtagd_nrecs;			/* number of records */
>  	dtrace_recdesc_t *dtagd_recs;		/* record descriptions */
> diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
> index 4a675366..13595515 100644
> --- a/libdtrace/dt_aggregate.c
> +++ b/libdtrace/dt_aggregate.c
> @@ -513,7 +513,6 @@ dt_aggregate_snap_one(dt_idhash_t *dhp, dt_ident_t *aid, dt_snapstate_t *st)
>  	agd->dtada_size = realsz;
>  	agd->dtada_desc = agg;
>  	agd->dtada_hdl = st->dtp;
> -	agd->dtada_normal = 1;
>  
>  	h->dtahe_hval = hval;
>  	h->dtahe_size = realsz;
> @@ -1119,12 +1118,16 @@ dt_aggwalk_rval(dtrace_hdl_t *dtp, dt_ahashent_t *h, int rval)
>  		return dt_set_errno(dtp, EDT_DIRABORT);
>  
>  	case DTRACE_AGGWALK_NORMALIZE:
> +#if 1
> +		assert(0);
> +#else
>  		if (h->dtahe_data.dtada_normal == 0) {
>  			h->dtahe_data.dtada_normal = 1;
>  			return dt_set_errno(dtp, EDT_BADRVAL);
>  		}
>  
>  		return 0;
> +#endif

Hm, what is the purpose of this?  I assume you added this to catch any
possible accidental calls to the agg walker for normalize().  This should be
removed from the patch, or rather, the patch should remove the
DTRACE_AGGWALK_NORMALIZE walker support.

>  	case DTRACE_AGGWALK_REMOVE: {
>  		dtrace_aggdata_t *aggdata = &h->dtahe_data;
> @@ -1526,7 +1529,6 @@ dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggid_t *aggvars,
>  				aggdata->dtada_size = agg->dtagd_size;
>  				aggdata->dtada_desc = agg;
>  				aggdata->dtada_hdl = dtp;
> -				aggdata->dtada_normal = 1;
>  				zaggdata[i].dtahe_hval = 0;
>  				zaggdata[i].dtahe_size = agg->dtagd_size;
>  				break;
> diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
> index dbfcf9de..41f99970 100644
> --- a/libdtrace/dt_consume.c
> +++ b/libdtrace/dt_consume.c
> @@ -1455,37 +1455,16 @@ dt_print_pcap(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
>  	return 0;
>  }
>  
> -typedef struct dt_normal {
> -	dtrace_aggid_t	dtnd_id;
> -	uint64_t	dtnd_normal;
> -} dt_normal_t;
> -
> -static int
> -dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
> -{
> -	dt_normal_t		*normal = arg;
> -	dtrace_aggdesc_t	*agg = aggdata->dtada_desc;
> -	dtrace_aggid_t		id = normal->dtnd_id;
> -
> -	if (agg->dtagd_nrecs == 0)
> -		return DTRACE_AGGWALK_NEXT;
> -
> -	if (agg->dtagd_varid != id)
> -		return DTRACE_AGGWALK_NEXT;
> -
> -	((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
> -	return DTRACE_AGGWALK_NORMALIZE;
> -}
> -
>  /*
>   * This function is also used to denormalize aggregations, because that is
> - * equivalent to normalizing them using normal = 1.
> + * equivalent to normalizing with value = 1.

I'd rather keep the original terminology.  After all, the proper term to
refer to the value used for normalization *is* "normal".

>   */
>  static int
>  dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
>  {
>  	int		act = rec->dtrd_arg;
> -	dt_normal_t	normal;
> +	dtrace_aggid_t	id;
> +	uint64_t	value;

	uint64_t	normal;

>  	caddr_t		addr;
>  
>  	/*
> @@ -1497,7 +1476,7 @@ dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
>  	if (rec->dtrd_size != sizeof(dtrace_aggid_t))
>  		return dt_set_errno(dtp, EDT_BADNORMAL);
>  
> -	normal.dtnd_id = *((dtrace_aggid_t *)addr);
> +	id = *((dtrace_aggid_t *)addr);
>  
>  	if (act == DT_ACT_NORMALIZE) {
>  		rec++;
> @@ -1512,29 +1491,24 @@ dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
>  
>  		switch (rec->dtrd_size) {
>  		case sizeof(uint64_t):
> -			normal.dtnd_normal = *((uint64_t *)addr);
> +			value = *((uint64_t *)addr);

			normal = ...

(and same for the rest)

>  			break;
>  		case sizeof(uint32_t):
> -			normal.dtnd_normal = *((uint32_t *)addr);
> +			value = *((uint32_t *)addr);
>  			break;
>  		case sizeof(uint16_t):
> -			normal.dtnd_normal = *((uint16_t *)addr);
> +			value = *((uint16_t *)addr);
>  			break;
>  		case sizeof(uint8_t):
> -			normal.dtnd_normal = *((uint8_t *)addr);
> +			value = *((uint8_t *)addr);
>  			break;
>  		default:
>  			return dt_set_errno(dtp, EDT_BADNORMAL);
>  		}
>  	} else
> -		normal.dtnd_normal = 1;
> +		value = 1;
>  
> -	/*
> -	 * Retrieve a snapshot of the aggregation data, and apply the normal
> -	 * to all aggregations that need it.
> -	 */
> -	dtrace_aggregate_snap(dtp);
> -	dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
> +	dtp->dt_adesc[id]->dtagd_normal = value;
>  
>  	return 0;
>  }
> @@ -1766,7 +1740,7 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
>  		size = agg->dtagd_size;
>  
>  		assert(DTRACEACT_ISAGG(rec->dtrd_action));
> -		normal = aggdata->dtada_normal;
> +		normal = aggdata->dtada_desc->dtagd_normal;
>  
>  		if (dt_print_datum(dtp, fp, rec, addr, size, normal,
>  				   agg->dtagd_sig) < 0)
> diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
> index ea635a16..96660fd0 100644
> --- a/libdtrace/dt_map.c
> +++ b/libdtrace/dt_map.c
> @@ -282,6 +282,7 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
>  	agg->dtagd_id = id;
>  	agg->dtagd_name = aid->di_name;
>  	agg->dtagd_sig = ((dt_idsig_t *)aid->di_data)->dis_auxinfo;
> +	agg->dtagd_normal = 1;
>  	agg->dtagd_varid = aid->di_id;
>  	agg->dtagd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
>  	agg->dtagd_nrecs = agg->dtagd_size / sizeof(uint64_t);
> diff --git a/libdtrace/dt_printf.c b/libdtrace/dt_printf.c
> index f0c09178..e7171dcc 100644
> --- a/libdtrace/dt_printf.c
> +++ b/libdtrace/dt_printf.c
> @@ -1383,7 +1383,7 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
>  			rec = &agg->dtagd_recs[aggrec];
>  			addr = aggdata->dtada_data;
>  			limit = addr + aggdata->dtada_size;
> -			normal = aggdata->dtada_normal;
> +			normal = agg->dtagd_normal;
>  			size = agg->dtagd_size;
>  			sig = agg->dtagd_sig;
>  			flags = DTRACE_BUFDATA_AGGVAL;
> diff --git a/libdtrace/dtrace.h b/libdtrace/dtrace.h
> index a05cc4a9..8a3daf6a 100644
> --- a/libdtrace/dtrace.h
> +++ b/libdtrace/dtrace.h
> @@ -364,7 +364,6 @@ struct dtrace_aggdata {
>  	dtrace_hdl_t *dtada_hdl;		/* handle to DTrace library */
>  	dtrace_aggdesc_t *dtada_desc;		/* aggregation description */
>  	caddr_t dtada_data;			/* pointer to raw data */
> -	uint64_t dtada_normal;			/* the normal -- 1 for denorm */
>  	size_t dtada_size;			/* total size of the data */
>  	caddr_t *dtada_percpu;			/* per CPU data, if avail */
>  };
> -- 
> 2.18.4
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list