[DTrace-devel] [PATCH 50/61] Remove DT_AGG_NUM_COPIES
eugene.loh at oracle.com
eugene.loh at oracle.com
Fri Jul 8 14:45:34 UTC 2022
From: Eugene Loh <eugene.loh at oracle.com>
Originally, the DTrace port to BPF used two copies of aggregations,
to allow lockless access to aggregation data for producer and consumer.
Then DT_AGG_NUM_COPIES was introduced and set to 1 since the consumer
could not mmap BPF maps; rather, it read data atomically using bpf()
system calls. The use of twice as much locked memory for aggregations
was not justified.
Eliminate DT_AGG_NUM_COPIES, committing to the single-copy approach
and simplifying the code.
The dual-copy version used a latch sequence number. It is now simply
a data counter; importantly, a nonzero value indicates the presence
of data.
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
libdtrace/dt_aggregate.c | 13 +++----
libdtrace/dt_bpf.c | 4 +-
libdtrace/dt_cg.c | 82 ++++------------------------------------
libdtrace/dt_impl.h | 14 -------
libdtrace/dt_map.c | 2 +-
5 files changed, 14 insertions(+), 101 deletions(-)
diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
index 8fa2755d..10df98da 100644
--- a/libdtrace/dt_aggregate.c
+++ b/libdtrace/dt_aggregate.c
@@ -460,14 +460,14 @@ dt_aggregate_snap_one(dt_idhash_t *dhp, dt_ident_t *aid, dt_snapstate_t *st)
nrecs = agg->dtagd_nrecs;
- /* point to the latch sequence number */
+ /* point to the data counter */
src = (int64_t *)(st->buf + aid->di_offset);
- /* skip it if latch sequence number is 0 */
+ /* skip it if data counter is 0 */
if (*src == 0)
return 0;
- /* advance past the latch sequence number */
+ /* advance past the data counter */
src++;
datasz = agg->dtagd_recs[nrecs - 1].dtrd_size;
@@ -980,7 +980,7 @@ dt_aggregate_bundlecmp(const void *lhs, const void *rhs)
* set for min(), so that any other value fed to the functions will register
* properly.
*
- * The latch sequence number of the first aggregation is used as a flag to
+ * The data counter of the first aggregation is used as a flag to
* indicate whether an initial value was stored for any aggregation.
*/
static int
@@ -1003,12 +1003,9 @@ init_minmax(dt_idhash_t *dhp, dt_ident_t *aid, char *buf)
/* Indicate that we are setting initial values. */
*(int64_t *)buf = 1;
- /* skip ptr[0], it is the latch sequence number */
+ /* skip ptr[0], it is the data counter */
ptr = (int64_t *)(buf + aid->di_offset);
ptr[1] = value;
-#if DT_AGG_NUM_COPIES == 2
- ptr[2] = value;
-#endif
return 0;
}
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index 8a5149d4..8a739891 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -196,9 +196,7 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
* dt_state.h.
* - aggs: Aggregation data buffer map, associated with each CPU. The
* map is implemented as a global per-CPU map with a singleton
- * element (key 0). Every aggregation is stored with two copies
- * of its data to provide a lockless latch-based mechanism for
- * atomic reading and writing.
+ * element (key 0).
* - specs: Map associating speculation IDs with a dt_bpf_specs_t struct
* giving the number of buffers speculated into for this
* speculation, and the number drained by userspace.
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index fdd32ce9..4a22c1db 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -5437,25 +5437,20 @@ dt_cg_node(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
* Macro to set the storage data (offset and size) for the aggregation
* identifier (if not set yet).
*
- * We make room for a latch sequence number of sizeof(uint64_t).
- *
- * If DT_AGG_NUM_COPIES==2, we consume twice the required data size for
- * a dual-copy mechanism to provide lockless, write-wait-free operation.
+ * We make room for a data counter of sizeof(uint64_t).
*/
#define DT_CG_AGG_SET_STORAGE(aid, sz) \
do { \
if ((aid)->di_offset == -1) \
dt_ident_set_storage((aid), sizeof(uint64_t), \
- sizeof(uint64_t) + \
- DT_AGG_NUM_COPIES * (sz)); \
+ sizeof(uint64_t) + (sz)); \
} while (0)
-#if DT_AGG_NUM_COPIES == 1
/*
* Return a register that holds a pointer to the aggregation data to be
* updated.
*
- * We update the latch seqcount (first value in the aggregation) to
+ * We update the data counter (first value in the aggregation) to
* signal that the aggregation has data. The location of data for the
* given aggregation is stored in the register returned from this function.
*/
@@ -5496,80 +5491,17 @@ dt_cg_agg_buf_prepare(dt_ident_t *aid, int size, dt_irlist_t *dlp,
return rptr;
}
-#else
-/*
- * Prepare the aggregation buffer for updating for a specific aggregation, and
- * return a register that holds a pointer to the aggregation data to be
- * updated.
- *
- * We update the latch seqcount (first value in the aggregation) to
- * signal that reads should be directed to the alternate copy of the data. We
- * then determine the location of data for the given aggregation that can be
- * updated. This value is stored in the register returned from this function.
- */
-static int
-dt_cg_agg_buf_prepare(dt_ident_t *aid, int size, dt_irlist_t *dlp,
- dt_regset_t *drp)
-{
- int ragd, roff;
-
- TRACE_REGSET(" Prep: Begin");
-
- dt_regset_xalloc(drp, BPF_REG_0);
- ragd = dt_regset_alloc(drp);
- roff = dt_regset_alloc(drp);
- assert(ragd != -1 && roff != -1);
-
- /*
- * agd = dctx->agg; // lddw %r0, [%fp + DT_STK_DCTX]
- * // lddw %ragd, [%r0 + DCTX_AGG]
- * agd += aid->di_offset // %ragd += aid->di_offset
- */
- emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DT_STK_DCTX));
- emit(dlp, BPF_LOAD(BPF_DW, ragd, BPF_REG_0, DCTX_AGG));
- emit(dlp, BPF_ALU64_IMM(BPF_ADD, ragd, aid->di_offset));
-
- /*
- * off = (*agd & 1) * size // lddw %roff, [%ragd + 0]
- * + sizeof(uint64_t); // and %roff, 1
- * // mul %roff, size
- * // add %roff, sizeof(uint64_t)
- * (*agd)++; // mov %r0, 1
- * // xadd [%ragd + 0], %r0
- * agd += off; // add %ragd, %roff
- */
- emit(dlp, BPF_LOAD(BPF_DW, roff, ragd, 0));
- emit(dlp, BPF_ALU64_IMM(BPF_AND, roff, 1));
- emit(dlp, BPF_ALU64_IMM(BPF_MUL, roff, size));
- emit(dlp, BPF_ALU64_IMM(BPF_ADD, roff, sizeof(uint64_t)));
- emit(dlp, BPF_MOV_IMM(BPF_REG_0, 1));
- emit(dlp, BPF_XADD_REG(BPF_DW, ragd, 0, BPF_REG_0));
- emit(dlp, BPF_ALU64_REG(BPF_ADD, ragd, roff));
-
- dt_regset_free(drp, roff);
- dt_regset_free(drp, BPF_REG_0);
-
- TRACE_REGSET(" Prep: End ");
-
- return ragd;
-}
-#endif
#define DT_CG_AGG_IMPL(aid, sz, dlp, drp, f, ...) \
do { \
- int i, dreg; \
+ int dreg; \
\
TRACE_REGSET(" Upd: Begin "); \
\
- for (i = 0; i < DT_AGG_NUM_COPIES; i++) { \
- if (i == 1) \
- TRACE_REGSET(" Upd: Switch"); \
- \
- dreg = dt_cg_agg_buf_prepare((aid), (sz), (dlp), (drp));\
+ dreg = dt_cg_agg_buf_prepare((aid), (sz), (dlp), (drp));\
\
- (f)((dlp), (drp), dreg, ## __VA_ARGS__); \
- dt_regset_free((drp), dreg); \
- } \
+ (f)((dlp), (drp), dreg, ## __VA_ARGS__); \
+ dt_regset_free((drp), dreg); \
\
TRACE_REGSET(" Upd: End "); \
} while (0)
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index bf31bd76..be2b0121 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -205,20 +205,6 @@ typedef struct dt_tstring {
int in_use; /* In use (1) or not (0) */
} dt_tstring_t;
-/*
- * To provide a lock-free aggregation write mechanism for the producer,
- * two copies of each aggregation can be used. A latch sequence number
- * for each aggregation can be incremented to indicate to the consumer
- * which copy should be read and whether a copy has changed during reading.
- *
- * Until BPF maps can be mmapped, however, we cannot take advantage of
- * this technique. Using only a single-copy, of course, saves precious
- * BPF map space.
- *
- * Pick DT_AGG_NUM_COPIES to be either 1 or 2.
- */
-#define DT_AGG_NUM_COPIES 1
-
typedef struct dt_aggregate {
char **dtat_cpu_buf; /* per-CPU agg snapshot buffers */
char *dtat_buf; /* aggregation snapshot buffer */
diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
index bec12bee..d6c87ad2 100644
--- a/libdtrace/dt_map.c
+++ b/libdtrace/dt_map.c
@@ -296,7 +296,7 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid, int nrecs)
/* prepopulate some data fields */
recs[nrecs - 1].dtrd_action = ((dt_ident_t *) aid->di_iarg)->di_id;
- recs[nrecs - 1].dtrd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
+ recs[nrecs - 1].dtrd_size = aid->di_size - sizeof(uint64_t);
/* update dt_aggsiz */
if (dtp->dt_aggsiz < aid->di_size)
--
2.18.4
More information about the DTrace-devel
mailing list