[DTrace-devel] [PATCH 50/61] Remove DT_AGG_NUM_COPIES
Kris Van Hees
kris.van.hees at oracle.com
Tue Aug 16 13:46:20 UTC 2022
On Fri, Jul 08, 2022 at 10:45:34AM -0400, eugene.loh--- via DTrace-devel wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
>
> Originally, the DTrace port to BPF used two copies of aggregations,
> to allow lockless access to aggregation data for producer and consumer.
>
> Then DT_AGG_NUM_COPIES was introduced and set to 1 since the consumer
> could not mmap BPF maps; rather, it read data atomically using bpf()
> system calls. The use of twice as much locked memory for aggregations
> was not justified.
>
> Eliminate DT_AGG_NUM_COPIES, committing to the single-copy approach
> and simplifying the code.
>
> The dual-copy version used a latch sequence number. It is now simply
> a data counter; importantly, a nonzero value indicates the presence
> of data.
>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
... queued for dev
> ---
> libdtrace/dt_aggregate.c | 13 +++----
> libdtrace/dt_bpf.c | 4 +-
> libdtrace/dt_cg.c | 82 ++++------------------------------------
> libdtrace/dt_impl.h | 14 -------
> libdtrace/dt_map.c | 2 +-
> 5 files changed, 14 insertions(+), 101 deletions(-)
>
> diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
> index 8fa2755d..10df98da 100644
> --- a/libdtrace/dt_aggregate.c
> +++ b/libdtrace/dt_aggregate.c
> @@ -460,14 +460,14 @@ dt_aggregate_snap_one(dt_idhash_t *dhp, dt_ident_t *aid, dt_snapstate_t *st)
>
> nrecs = agg->dtagd_nrecs;
>
> - /* point to the latch sequence number */
> + /* point to the data counter */
> src = (int64_t *)(st->buf + aid->di_offset);
>
> - /* skip it if latch sequence number is 0 */
> + /* skip it if data counter is 0 */
> if (*src == 0)
> return 0;
>
> - /* advance past the latch sequence number */
> + /* advance past the data counter */
> src++;
>
> datasz = agg->dtagd_recs[nrecs - 1].dtrd_size;
> @@ -980,7 +980,7 @@ dt_aggregate_bundlecmp(const void *lhs, const void *rhs)
> * set for min(), so that any other value fed to the functions will register
> * properly.
> *
> - * The latch sequence number of the first aggregation is used as a flag to
> + * The data counter of the first aggregation is used as a flag to
> * indicate whether an initial value was stored for any aggregation.
> */
> static int
> @@ -1003,12 +1003,9 @@ init_minmax(dt_idhash_t *dhp, dt_ident_t *aid, char *buf)
> /* Indicate that we are setting initial values. */
> *(int64_t *)buf = 1;
>
> - /* skip ptr[0], it is the latch sequence number */
> + /* skip ptr[0], it is the data counter */
> ptr = (int64_t *)(buf + aid->di_offset);
> ptr[1] = value;
> -#if DT_AGG_NUM_COPIES == 2
> - ptr[2] = value;
> -#endif
>
> return 0;
> }
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 8a5149d4..8a739891 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -196,9 +196,7 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
> * dt_state.h.
> * - aggs: Aggregation data buffer map, associated with each CPU. The
> * map is implemented as a global per-CPU map with a singleton
> - * element (key 0). Every aggregation is stored with two copies
> - * of its data to provide a lockless latch-based mechanism for
> - * atomic reading and writing.
> + * element (key 0).
> * - specs: Map associating speculation IDs with a dt_bpf_specs_t struct
> * giving the number of buffers speculated into for this
> * speculation, and the number drained by userspace.
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index fdd32ce9..4a22c1db 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -5437,25 +5437,20 @@ dt_cg_node(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> * Macro to set the storage data (offset and size) for the aggregation
> * identifier (if not set yet).
> *
> - * We make room for a latch sequence number of sizeof(uint64_t).
> - *
> - * If DT_AGG_NUM_COPIES==2, we consume twice the required data size for
> - * a dual-copy mechanism to provide lockless, write-wait-free operation.
> + * We make room for a data counter of sizeof(uint64_t).
> */
> #define DT_CG_AGG_SET_STORAGE(aid, sz) \
> do { \
> if ((aid)->di_offset == -1) \
> dt_ident_set_storage((aid), sizeof(uint64_t), \
> - sizeof(uint64_t) + \
> - DT_AGG_NUM_COPIES * (sz)); \
> + sizeof(uint64_t) + (sz)); \
> } while (0)
>
> -#if DT_AGG_NUM_COPIES == 1
> /*
> * Return a register that holds a pointer to the aggregation data to be
> * updated.
> *
> - * We update the latch seqcount (first value in the aggregation) to
> + * We update the data counter (first value in the aggregation) to
> * signal that the aggregation has data. The location of data for the
> * given aggregation is stored in the register returned from this function.
> */
> @@ -5496,80 +5491,17 @@ dt_cg_agg_buf_prepare(dt_ident_t *aid, int size, dt_irlist_t *dlp,
>
> return rptr;
> }
> -#else
> -/*
> - * Prepare the aggregation buffer for updating for a specific aggregation, and
> - * return a register that holds a pointer to the aggregation data to be
> - * updated.
> - *
> - * We update the latch seqcount (first value in the aggregation) to
> - * signal that reads should be directed to the alternate copy of the data. We
> - * then determine the location of data for the given aggregation that can be
> - * updated. This value is stored in the register returned from this function.
> - */
> -static int
> -dt_cg_agg_buf_prepare(dt_ident_t *aid, int size, dt_irlist_t *dlp,
> - dt_regset_t *drp)
> -{
> - int ragd, roff;
> -
> - TRACE_REGSET(" Prep: Begin");
> -
> - dt_regset_xalloc(drp, BPF_REG_0);
> - ragd = dt_regset_alloc(drp);
> - roff = dt_regset_alloc(drp);
> - assert(ragd != -1 && roff != -1);
> -
> - /*
> - * agd = dctx->agg; // lddw %r0, [%fp + DT_STK_DCTX]
> - * // lddw %ragd, [%r0 + DCTX_AGG]
> - * agd += aid->di_offset // %ragd += aid->di_offset
> - */
> - emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DT_STK_DCTX));
> - emit(dlp, BPF_LOAD(BPF_DW, ragd, BPF_REG_0, DCTX_AGG));
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, ragd, aid->di_offset));
> -
> - /*
> - * off = (*agd & 1) * size // lddw %roff, [%ragd + 0]
> - * + sizeof(uint64_t); // and %roff, 1
> - * // mul %roff, size
> - * // add %roff, sizeof(uint64_t)
> - * (*agd)++; // mov %r0, 1
> - * // xadd [%ragd + 0], %r0
> - * agd += off; // add %ragd, %roff
> - */
> - emit(dlp, BPF_LOAD(BPF_DW, roff, ragd, 0));
> - emit(dlp, BPF_ALU64_IMM(BPF_AND, roff, 1));
> - emit(dlp, BPF_ALU64_IMM(BPF_MUL, roff, size));
> - emit(dlp, BPF_ALU64_IMM(BPF_ADD, roff, sizeof(uint64_t)));
> - emit(dlp, BPF_MOV_IMM(BPF_REG_0, 1));
> - emit(dlp, BPF_XADD_REG(BPF_DW, ragd, 0, BPF_REG_0));
> - emit(dlp, BPF_ALU64_REG(BPF_ADD, ragd, roff));
> -
> - dt_regset_free(drp, roff);
> - dt_regset_free(drp, BPF_REG_0);
> -
> - TRACE_REGSET(" Prep: End ");
> -
> - return ragd;
> -}
> -#endif
>
> #define DT_CG_AGG_IMPL(aid, sz, dlp, drp, f, ...) \
> do { \
> - int i, dreg; \
> + int dreg; \
> \
> TRACE_REGSET(" Upd: Begin "); \
> \
> - for (i = 0; i < DT_AGG_NUM_COPIES; i++) { \
> - if (i == 1) \
> - TRACE_REGSET(" Upd: Switch"); \
> - \
> - dreg = dt_cg_agg_buf_prepare((aid), (sz), (dlp), (drp));\
> + dreg = dt_cg_agg_buf_prepare((aid), (sz), (dlp), (drp));\
> \
> - (f)((dlp), (drp), dreg, ## __VA_ARGS__); \
> - dt_regset_free((drp), dreg); \
> - } \
> + (f)((dlp), (drp), dreg, ## __VA_ARGS__); \
> + dt_regset_free((drp), dreg); \
> \
> TRACE_REGSET(" Upd: End "); \
> } while (0)
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index bf31bd76..be2b0121 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -205,20 +205,6 @@ typedef struct dt_tstring {
> int in_use; /* In use (1) or not (0) */
> } dt_tstring_t;
>
> -/*
> - * To provide a lock-free aggregation write mechanism for the producer,
> - * two copies of each aggregation can be used. A latch sequence number
> - * for each aggregation can be incremented to indicate to the consumer
> - * which copy should be read and whether a copy has changed during reading.
> - *
> - * Until BPF maps can be mmapped, however, we cannot take advantage of
> - * this technique. Using only a single-copy, of course, saves precious
> - * BPF map space.
> - *
> - * Pick DT_AGG_NUM_COPIES to be either 1 or 2.
> - */
> -#define DT_AGG_NUM_COPIES 1
> -
> typedef struct dt_aggregate {
> char **dtat_cpu_buf; /* per-CPU agg snapshot buffers */
> char *dtat_buf; /* aggregation snapshot buffer */
> diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
> index bec12bee..d6c87ad2 100644
> --- a/libdtrace/dt_map.c
> +++ b/libdtrace/dt_map.c
> @@ -296,7 +296,7 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid, int nrecs)
>
> /* prepopulate some data fields */
> recs[nrecs - 1].dtrd_action = ((dt_ident_t *) aid->di_iarg)->di_id;
> - recs[nrecs - 1].dtrd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
> + recs[nrecs - 1].dtrd_size = aid->di_size - sizeof(uint64_t);
>
> /* update dt_aggsiz */
> if (dtp->dt_aggsiz < aid->di_size)
> --
> 2.18.4
>
>
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list