[DTrace-devel] [PATCH 50/61] Remove DT_AGG_NUM_COPIES

Kris Van Hees kris.van.hees at oracle.com
Tue Aug 16 13:46:20 UTC 2022


On Fri, Jul 08, 2022 at 10:45:34AM -0400, eugene.loh--- via DTrace-devel wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> Originally, the DTrace port to BPF used two copies of aggregations,
> to allow lockless access to aggregation data for producer and consumer.
> 
> Then DT_AGG_NUM_COPIES was introduced and set to 1 since the consumer
> could not mmap BPF maps;  rather, it read data atomically using bpf()
> system calls.  The use of twice as much locked memory for aggregations
> was not justified.
> 
> Eliminate DT_AGG_NUM_COPIES, committing to the single-copy approach
> and simplifying the code.
> 
> The dual-copy version used a latch sequence number.  It is now simply
> a data counter;  importantly, a nonzero value indicates the presence
> of data.
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>

Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

... queued for dev

> ---
>  libdtrace/dt_aggregate.c | 13 +++----
>  libdtrace/dt_bpf.c       |  4 +-
>  libdtrace/dt_cg.c        | 82 ++++------------------------------------
>  libdtrace/dt_impl.h      | 14 -------
>  libdtrace/dt_map.c       |  2 +-
>  5 files changed, 14 insertions(+), 101 deletions(-)
> 
> diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
> index 8fa2755d..10df98da 100644
> --- a/libdtrace/dt_aggregate.c
> +++ b/libdtrace/dt_aggregate.c
> @@ -460,14 +460,14 @@ dt_aggregate_snap_one(dt_idhash_t *dhp, dt_ident_t *aid, dt_snapstate_t *st)
>  
>  	nrecs = agg->dtagd_nrecs;
>  
> -	/* point to the latch sequence number */
> +	/* point to the data counter */
>  	src = (int64_t *)(st->buf + aid->di_offset);
>  
> -	/* skip it if latch sequence number is 0 */
> +	/* skip it if data counter is 0 */
>  	if (*src == 0)
>  		return 0;
>  
> -	/* advance past the latch sequence number */
> +	/* advance past the data counter */
>  	src++;
>  
>  	datasz = agg->dtagd_recs[nrecs - 1].dtrd_size;
> @@ -980,7 +980,7 @@ dt_aggregate_bundlecmp(const void *lhs, const void *rhs)
>   * set for min(), so that any other value fed to the functions will register
>   * properly.
>   *
> - * The latch sequence number of the first aggregation is used as a flag to
> + * The data counter of the first aggregation is used as a flag to
>   * indicate whether an initial value was stored for any aggregation.
>   */
>  static int
> @@ -1003,12 +1003,9 @@ init_minmax(dt_idhash_t *dhp, dt_ident_t *aid, char *buf)
>  	/* Indicate that we are setting initial values. */
>  	*(int64_t *)buf = 1;
>  
> -	/* skip ptr[0], it is the latch sequence number */
> +	/* skip ptr[0], it is the data counter */
>  	ptr = (int64_t *)(buf + aid->di_offset);
>  	ptr[1] = value;
> -#if DT_AGG_NUM_COPIES == 2
> -	ptr[2] = value;
> -#endif
>  
>  	return 0;
>  }
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 8a5149d4..8a739891 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -196,9 +196,7 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
>   *		dt_state.h.
>   * - aggs:	Aggregation data buffer map, associated with each CPU.  The
>   *		map is implemented as a global per-CPU map with a singleton
> - *		element (key 0).  Every aggregation is stored with two copies
> - *		of its data to provide a lockless latch-based mechanism for
> - *		atomic reading and writing.
> + *		element (key 0).
>   * - specs:     Map associating speculation IDs with a dt_bpf_specs_t struct
>   *		giving the number of buffers speculated into for this
>   *		speculation, and the number drained by userspace.
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index fdd32ce9..4a22c1db 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -5437,25 +5437,20 @@ dt_cg_node(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>   * Macro to set the storage data (offset and size) for the aggregation
>   * identifier (if not set yet).
>   *
> - * We make room for a latch sequence number of sizeof(uint64_t).
> - *
> - * If DT_AGG_NUM_COPIES==2, we consume twice the required data size for
> - * a dual-copy mechanism to provide lockless, write-wait-free operation.
> + * We make room for a data counter of sizeof(uint64_t).
>   */
>  #define DT_CG_AGG_SET_STORAGE(aid, sz) \
>  	do { \
>  		if ((aid)->di_offset == -1) \
>  			dt_ident_set_storage((aid), sizeof(uint64_t), \
> -					     sizeof(uint64_t) + \
> -					     DT_AGG_NUM_COPIES * (sz)); \
> +					     sizeof(uint64_t) + (sz)); \
>  	} while (0)
>  
> -#if DT_AGG_NUM_COPIES == 1
>  /*
>   * Return a register that holds a pointer to the aggregation data to be
>   * updated.
>   *
> - * We update the latch seqcount (first value in the aggregation) to
> + * We update the data counter (first value in the aggregation) to
>   * signal that the aggregation has data.  The location of data for the
>   * given aggregation is stored in the register returned from this function.
>   */
> @@ -5496,80 +5491,17 @@ dt_cg_agg_buf_prepare(dt_ident_t *aid, int size, dt_irlist_t *dlp,
>  
>  	return rptr;
>  }
> -#else
> -/*
> - * Prepare the aggregation buffer for updating for a specific aggregation, and
> - * return a register that holds a pointer to the aggregation data to be
> - * updated.
> - *
> - * We update the latch seqcount (first value in the aggregation) to
> - * signal that reads should be directed to the alternate copy of the data.  We
> - * then determine the location of data for the given aggregation that can be
> - * updated.  This value is stored in the register returned from this function.
> - */
> -static int
> -dt_cg_agg_buf_prepare(dt_ident_t *aid, int size, dt_irlist_t *dlp,
> -		      dt_regset_t *drp)
> -{
> -	int		ragd, roff;
> -
> -	TRACE_REGSET("            Prep: Begin");
> -
> -	dt_regset_xalloc(drp, BPF_REG_0);
> -	ragd = dt_regset_alloc(drp);
> -	roff = dt_regset_alloc(drp);
> -	assert(ragd != -1 && roff != -1);
> -
> -	/*
> -	 *	agd = dctx->agg;	// lddw %r0, [%fp + DT_STK_DCTX]
> -	 *				// lddw %ragd, [%r0 + DCTX_AGG]
> -	 *	agd += aid->di_offset	// %ragd += aid->di_offset
> -	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DT_STK_DCTX));
> -	emit(dlp, BPF_LOAD(BPF_DW, ragd, BPF_REG_0, DCTX_AGG));
> -	emit(dlp, BPF_ALU64_IMM(BPF_ADD, ragd, aid->di_offset));
> -
> -	/*
> -	 *	off = (*agd & 1) * size	// lddw %roff, [%ragd + 0]
> -	 *	      + sizeof(uint64_t);	// and %roff, 1
> -	 *				// mul %roff, size
> -	 *				// add %roff, sizeof(uint64_t)
> -	 *	(*agd)++;		// mov %r0, 1
> -	 *				// xadd [%ragd + 0], %r0
> -	 *	agd += off;		// add %ragd, %roff
> -	 */
> -	emit(dlp, BPF_LOAD(BPF_DW, roff, ragd, 0));
> -	emit(dlp, BPF_ALU64_IMM(BPF_AND, roff, 1));
> -	emit(dlp, BPF_ALU64_IMM(BPF_MUL, roff, size));
> -	emit(dlp, BPF_ALU64_IMM(BPF_ADD, roff, sizeof(uint64_t)));
> -	emit(dlp, BPF_MOV_IMM(BPF_REG_0, 1));
> -	emit(dlp, BPF_XADD_REG(BPF_DW, ragd, 0, BPF_REG_0));
> -	emit(dlp, BPF_ALU64_REG(BPF_ADD, ragd, roff));
> -
> -	dt_regset_free(drp, roff);
> -	dt_regset_free(drp, BPF_REG_0);
> -
> -	TRACE_REGSET("            Prep: End  ");
> -
> -	return ragd;
> -}
> -#endif
>  
>  #define DT_CG_AGG_IMPL(aid, sz, dlp, drp, f, ...) \
>  	do {								\
> -		int	i, dreg;					\
> +		int	dreg;						\
>  									\
>  		TRACE_REGSET("        Upd: Begin ");			\
>  									\
> -		for (i = 0; i < DT_AGG_NUM_COPIES; i++) {		\
> -			if (i == 1)					\
> -				TRACE_REGSET("        Upd: Switch");	\
> -									\
> -			dreg = dt_cg_agg_buf_prepare((aid), (sz), (dlp), (drp));\
> +		dreg = dt_cg_agg_buf_prepare((aid), (sz), (dlp), (drp));\
>  									\
> -			(f)((dlp), (drp), dreg, ## __VA_ARGS__);	\
> -			dt_regset_free((drp), dreg);			\
> -		}							\
> +		(f)((dlp), (drp), dreg, ## __VA_ARGS__);		\
> +		dt_regset_free((drp), dreg);				\
>  									\
>  		TRACE_REGSET("        Upd: End   ");			\
>  	} while (0)
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index bf31bd76..be2b0121 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -205,20 +205,6 @@ typedef struct dt_tstring {
>  	int		in_use;			/* In use (1) or not (0) */
>  } dt_tstring_t;
>  
> -/*
> - * To provide a lock-free aggregation write mechanism for the producer,
> - * two copies of each aggregation can be used.  A latch sequence number
> - * for each aggregation can be incremented to indicate to the consumer
> - * which copy should be read and whether a copy has changed during reading.
> - *
> - * Until BPF maps can be mmapped, however, we cannot take advantage of
> - * this technique.  Using only a single-copy, of course, saves precious
> - * BPF map space.
> - *
> - * Pick DT_AGG_NUM_COPIES to be either 1 or 2.
> - */
> -#define DT_AGG_NUM_COPIES 1
> -
>  typedef struct dt_aggregate {
>  	char **dtat_cpu_buf;		/* per-CPU agg snapshot buffers */
>  	char *dtat_buf;			/* aggregation snapshot buffer */
> diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
> index bec12bee..d6c87ad2 100644
> --- a/libdtrace/dt_map.c
> +++ b/libdtrace/dt_map.c
> @@ -296,7 +296,7 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid, int nrecs)
>  
>  	/* prepopulate some data fields */
>  	recs[nrecs - 1].dtrd_action = ((dt_ident_t *) aid->di_iarg)->di_id;
> -	recs[nrecs - 1].dtrd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
> +	recs[nrecs - 1].dtrd_size = aid->di_size - sizeof(uint64_t);
>  
>  	/* update dt_aggsiz */
>  	if (dtp->dt_aggsiz < aid->di_size)
> -- 
> 2.18.4
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list