[DTrace-devel] [PATCH 50/61] Remove DT_AGG_NUM_COPIES

eugene.loh at oracle.com eugene.loh at oracle.com
Fri Jul 8 14:45:34 UTC 2022


From: Eugene Loh <eugene.loh at oracle.com>

Originally, the DTrace port to BPF used two copies of aggregations,
to allow lockless access to aggregation data for producer and consumer.

Then DT_AGG_NUM_COPIES was introduced and set to 1 since the consumer
could not mmap BPF maps;  rather, it read data atomically using bpf()
system calls.  The use of twice as much locked memory for aggregations
was not justified.

Eliminate DT_AGG_NUM_COPIES, committing to the single-copy approach
and simplifying the code.

The dual-copy version used a latch sequence number.  It is now simply
a data counter;  importantly, a nonzero value indicates the presence
of data.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_aggregate.c | 13 +++----
 libdtrace/dt_bpf.c       |  4 +-
 libdtrace/dt_cg.c        | 82 ++++------------------------------------
 libdtrace/dt_impl.h      | 14 -------
 libdtrace/dt_map.c       |  2 +-
 5 files changed, 14 insertions(+), 101 deletions(-)

diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
index 8fa2755d..10df98da 100644
--- a/libdtrace/dt_aggregate.c
+++ b/libdtrace/dt_aggregate.c
@@ -460,14 +460,14 @@ dt_aggregate_snap_one(dt_idhash_t *dhp, dt_ident_t *aid, dt_snapstate_t *st)
 
 	nrecs = agg->dtagd_nrecs;
 
-	/* point to the latch sequence number */
+	/* point to the data counter */
 	src = (int64_t *)(st->buf + aid->di_offset);
 
-	/* skip it if latch sequence number is 0 */
+	/* skip it if data counter is 0 */
 	if (*src == 0)
 		return 0;
 
-	/* advance past the latch sequence number */
+	/* advance past the data counter */
 	src++;
 
 	datasz = agg->dtagd_recs[nrecs - 1].dtrd_size;
@@ -980,7 +980,7 @@ dt_aggregate_bundlecmp(const void *lhs, const void *rhs)
  * set for min(), so that any other value fed to the functions will register
  * properly.
  *
- * The latch sequence number of the first aggregation is used as a flag to
+ * The data counter of the first aggregation is used as a flag to
  * indicate whether an initial value was stored for any aggregation.
  */
 static int
@@ -1003,12 +1003,9 @@ init_minmax(dt_idhash_t *dhp, dt_ident_t *aid, char *buf)
 	/* Indicate that we are setting initial values. */
 	*(int64_t *)buf = 1;
 
-	/* skip ptr[0], it is the latch sequence number */
+	/* skip ptr[0], it is the data counter */
 	ptr = (int64_t *)(buf + aid->di_offset);
 	ptr[1] = value;
-#if DT_AGG_NUM_COPIES == 2
-	ptr[2] = value;
-#endif
 
 	return 0;
 }
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index 8a5149d4..8a739891 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -196,9 +196,7 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
  *		dt_state.h.
  * - aggs:	Aggregation data buffer map, associated with each CPU.  The
  *		map is implemented as a global per-CPU map with a singleton
- *		element (key 0).  Every aggregation is stored with two copies
- *		of its data to provide a lockless latch-based mechanism for
- *		atomic reading and writing.
+ *		element (key 0).
  * - specs:     Map associating speculation IDs with a dt_bpf_specs_t struct
  *		giving the number of buffers speculated into for this
  *		speculation, and the number drained by userspace.
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index fdd32ce9..4a22c1db 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -5437,25 +5437,20 @@ dt_cg_node(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
  * Macro to set the storage data (offset and size) for the aggregation
  * identifier (if not set yet).
  *
- * We make room for a latch sequence number of sizeof(uint64_t).
- *
- * If DT_AGG_NUM_COPIES==2, we consume twice the required data size for
- * a dual-copy mechanism to provide lockless, write-wait-free operation.
+ * We make room for a data counter of sizeof(uint64_t).
  */
 #define DT_CG_AGG_SET_STORAGE(aid, sz) \
 	do { \
 		if ((aid)->di_offset == -1) \
 			dt_ident_set_storage((aid), sizeof(uint64_t), \
-					     sizeof(uint64_t) + \
-					     DT_AGG_NUM_COPIES * (sz)); \
+					     sizeof(uint64_t) + (sz)); \
 	} while (0)
 
-#if DT_AGG_NUM_COPIES == 1
 /*
  * Return a register that holds a pointer to the aggregation data to be
  * updated.
  *
- * We update the latch seqcount (first value in the aggregation) to
+ * We update the data counter (first value in the aggregation) to
  * signal that the aggregation has data.  The location of data for the
  * given aggregation is stored in the register returned from this function.
  */
@@ -5496,80 +5491,17 @@ dt_cg_agg_buf_prepare(dt_ident_t *aid, int size, dt_irlist_t *dlp,
 
 	return rptr;
 }
-#else
-/*
- * Prepare the aggregation buffer for updating for a specific aggregation, and
- * return a register that holds a pointer to the aggregation data to be
- * updated.
- *
- * We update the latch seqcount (first value in the aggregation) to
- * signal that reads should be directed to the alternate copy of the data.  We
- * then determine the location of data for the given aggregation that can be
- * updated.  This value is stored in the register returned from this function.
- */
-static int
-dt_cg_agg_buf_prepare(dt_ident_t *aid, int size, dt_irlist_t *dlp,
-		      dt_regset_t *drp)
-{
-	int		ragd, roff;
-
-	TRACE_REGSET("            Prep: Begin");
-
-	dt_regset_xalloc(drp, BPF_REG_0);
-	ragd = dt_regset_alloc(drp);
-	roff = dt_regset_alloc(drp);
-	assert(ragd != -1 && roff != -1);
-
-	/*
-	 *	agd = dctx->agg;	// lddw %r0, [%fp + DT_STK_DCTX]
-	 *				// lddw %ragd, [%r0 + DCTX_AGG]
-	 *	agd += aid->di_offset	// %ragd += aid->di_offset
-	 */
-	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DT_STK_DCTX));
-	emit(dlp, BPF_LOAD(BPF_DW, ragd, BPF_REG_0, DCTX_AGG));
-	emit(dlp, BPF_ALU64_IMM(BPF_ADD, ragd, aid->di_offset));
-
-	/*
-	 *	off = (*agd & 1) * size	// lddw %roff, [%ragd + 0]
-	 *	      + sizeof(uint64_t);	// and %roff, 1
-	 *				// mul %roff, size
-	 *				// add %roff, sizeof(uint64_t)
-	 *	(*agd)++;		// mov %r0, 1
-	 *				// xadd [%ragd + 0], %r0
-	 *	agd += off;		// add %ragd, %roff
-	 */
-	emit(dlp, BPF_LOAD(BPF_DW, roff, ragd, 0));
-	emit(dlp, BPF_ALU64_IMM(BPF_AND, roff, 1));
-	emit(dlp, BPF_ALU64_IMM(BPF_MUL, roff, size));
-	emit(dlp, BPF_ALU64_IMM(BPF_ADD, roff, sizeof(uint64_t)));
-	emit(dlp, BPF_MOV_IMM(BPF_REG_0, 1));
-	emit(dlp, BPF_XADD_REG(BPF_DW, ragd, 0, BPF_REG_0));
-	emit(dlp, BPF_ALU64_REG(BPF_ADD, ragd, roff));
-
-	dt_regset_free(drp, roff);
-	dt_regset_free(drp, BPF_REG_0);
-
-	TRACE_REGSET("            Prep: End  ");
-
-	return ragd;
-}
-#endif
 
 #define DT_CG_AGG_IMPL(aid, sz, dlp, drp, f, ...) \
 	do {								\
-		int	i, dreg;					\
+		int	dreg;						\
 									\
 		TRACE_REGSET("        Upd: Begin ");			\
 									\
-		for (i = 0; i < DT_AGG_NUM_COPIES; i++) {		\
-			if (i == 1)					\
-				TRACE_REGSET("        Upd: Switch");	\
-									\
-			dreg = dt_cg_agg_buf_prepare((aid), (sz), (dlp), (drp));\
+		dreg = dt_cg_agg_buf_prepare((aid), (sz), (dlp), (drp));\
 									\
-			(f)((dlp), (drp), dreg, ## __VA_ARGS__);	\
-			dt_regset_free((drp), dreg);			\
-		}							\
+		(f)((dlp), (drp), dreg, ## __VA_ARGS__);		\
+		dt_regset_free((drp), dreg);				\
 									\
 		TRACE_REGSET("        Upd: End   ");			\
 	} while (0)
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index bf31bd76..be2b0121 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -205,20 +205,6 @@ typedef struct dt_tstring {
 	int		in_use;			/* In use (1) or not (0) */
 } dt_tstring_t;
 
-/*
- * To provide a lock-free aggregation write mechanism for the producer,
- * two copies of each aggregation can be used.  A latch sequence number
- * for each aggregation can be incremented to indicate to the consumer
- * which copy should be read and whether a copy has changed during reading.
- *
- * Until BPF maps can be mmapped, however, we cannot take advantage of
- * this technique.  Using only a single-copy, of course, saves precious
- * BPF map space.
- *
- * Pick DT_AGG_NUM_COPIES to be either 1 or 2.
- */
-#define DT_AGG_NUM_COPIES 1
-
 typedef struct dt_aggregate {
 	char **dtat_cpu_buf;		/* per-CPU agg snapshot buffers */
 	char *dtat_buf;			/* aggregation snapshot buffer */
diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
index bec12bee..d6c87ad2 100644
--- a/libdtrace/dt_map.c
+++ b/libdtrace/dt_map.c
@@ -296,7 +296,7 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid, int nrecs)
 
 	/* prepopulate some data fields */
 	recs[nrecs - 1].dtrd_action = ((dt_ident_t *) aid->di_iarg)->di_id;
-	recs[nrecs - 1].dtrd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
+	recs[nrecs - 1].dtrd_size = aid->di_size - sizeof(uint64_t);
 
 	/* update dt_aggsiz */
 	if (dtp->dt_aggsiz < aid->di_size)
-- 
2.18.4




More information about the DTrace-devel mailing list