[DTrace-devel] [PATCH 30/61] Add a block of zeroes to use to initialize BPF maps

Kris Van Hees kris.van.hees at oracle.com
Tue Aug 2 05:20:48 UTC 2022


On Fri, Jul 08, 2022 at 10:45:14AM -0400, eugene.loh--- via DTrace-devel wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> There are cases in BPF code where we want to read a contiguous block of
> zeroes.  Since the block is read-only, it can be shared among all CPUs and
> among multiple purposes to save BPF map space (locked memory).  Create such
> a block.
> 
> The block might as well reside in an already existing BPF map to consolidate
> allocations.  The BPF_MAP_TYPE_ARRAY candidates are arguably:
> 
>               key type    val type    # elems
> 
>     "state"   uint32_t    uint32_t    3
>     "strtab"  uint32_t    N / A       1
>     "gvars"   uint32_t    N / A       1
> 
> The "state" map is an odd choice architecturally, but it has a lot of
> spare room.

The 'state' map is a very odd choice simply because the value size is uint32_t
and in order for an element in that map to be used as a block of zeros, you
need to define the map with a value size equal to the size of the block of
zroes, which means that all elements in that map suddenly become that size.

I would not mention that map here as a candidate because it really does not
make sense to even consider it.

> The "gvars" map is arguably a decent choice;  we can think of there being a
> big "global variable" of rather arbitrary size that we reference by pointer.

Correct.

> The "strtab" map, for completeness, is another choice.

You might want to mention here that strtab already contains a nice chunk of
zeros at the end, which ...
> 
> Let us use gvars.

... would make me be inclined to argue that 'strtab' is the better choice.
Why would we create a block of zeros in 'gvars' when we already have one in
'strtab' (and it is perfectly fine to make that one larger).

> For code generation, we will need to know the offset of this region.
> This patch supports deferring the specification of this offset until
> relocation, but I suspect we might actually know the offset at cg time
> since it's only dependent on dt_idhash_datasize(dtp->dt_globals).

This is equally simple when using 'strtab' because the offset of the zero
is dtp->dt_strlen.

> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>  libdtrace/dt_bpf.c   | 5 +++--
>  libdtrace/dt_bpf.h   | 1 +
>  libdtrace/dt_cc.c    | 3 +++
>  libdtrace/dt_cg.c    | 2 +-
>  libdtrace/dt_dlibs.c | 1 +
>  libdtrace/dt_impl.h  | 2 ++
>  6 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 87cc0814..ee9aa846 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -351,8 +351,9 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>  	if (pr_mapfd == -1)
>  		return -1;		/* dt_errno is set for us */
>  
> -	/* global variables */
> -	sz = P2ROUNDUP(dt_idhash_datasize(dtp->dt_globals), 8);
> +	/* global variables, including a region of zeroes */
> +	dtp->dt_zerooffset = P2ROUNDUP(dt_idhash_datasize(dtp->dt_globals), 8);
> +	sz = dtp->dt_zerooffset + dtp->dt_zerosize;
>  	if (sz > 0 &&
>  	    create_gmap(dtp, "gvars", BPF_MAP_TYPE_ARRAY,
>  			sizeof(uint32_t), sz, 1) == -1)
> diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
> index 12b9fce3..40cbcf43 100644
> --- a/libdtrace/dt_bpf.h
> +++ b/libdtrace/dt_bpf.h
> @@ -36,6 +36,7 @@ extern "C" {
>  #define DT_CONST_TASK_COMM	16
>  #define DT_CONST_MUTEX_OWNER	17
>  #define DT_CONST_RWLOCK_CNTS	18
> +#define DT_CONST_ZERO_OFF	19
>  
>  extern int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
>  			   int group_fd, unsigned long flags);
> diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
> index e3166647..5d85eb82 100644
> --- a/libdtrace/dt_cc.c
> +++ b/libdtrace/dt_cc.c
> @@ -2454,6 +2454,9 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
>  				nrp->dofr_data = total_offset;
>  				continue;
>  			}
> +			case DT_CONST_ZERO_OFF:
> +				nrp->dofr_data = dtp->dt_zerooffset;
> +				continue;
>  			default:
>  				/* probe name -> value is probe id */
>  				if (strchr(idp->di_name, ':') != NULL)
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index a84359ea..f48452ee 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -208,7 +208,7 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>  		DT_CG_STORE_MAP_PTR("scratchmem", DCTX_SCRATCHMEM);
>  	if (dt_idhash_datasize(dtp->dt_aggs) > 0)
>  		DT_CG_STORE_MAP_PTR("aggs", DCTX_AGG);
> -	if (dt_idhash_datasize(dtp->dt_globals) > 0)
> +	if (dt_idhash_datasize(dtp->dt_globals) > 0 || dtp->dt_zerosize > 0)
>  		DT_CG_STORE_MAP_PTR("gvars", DCTX_GVARS);
>  	if (dtp->dt_maxlvaralloc > 0)
>  		DT_CG_STORE_MAP_PTR("lvars", DCTX_LVARS);
> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> index 89ea062d..41ef28d9 100644
> --- a/libdtrace/dt_dlibs.c
> +++ b/libdtrace/dt_dlibs.c
> @@ -89,6 +89,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
>  	DT_BPF_SYMBOL_ID(TASK_COMM, DT_IDENT_SCALAR, DT_CONST_TASK_COMM),
>  	DT_BPF_SYMBOL_ID(MUTEX_OWNER, DT_IDENT_SCALAR, DT_CONST_MUTEX_OWNER),
>  	DT_BPF_SYMBOL_ID(RWLOCK_CNTS, DT_IDENT_SCALAR, DT_CONST_RWLOCK_CNTS),
> +	DT_BPF_SYMBOL_ID(ZERO_OFF, DT_IDENT_SCALAR, DT_CONST_ZERO_OFF),
>  
>  	/* End-of-list marker */
>  	{ NULL, }
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index 37088a5d..149f8b18 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -295,6 +295,8 @@ struct dtrace_hdl {
>  	uint_t dt_maxdvarsize;	/* largest dynamic variable across programs */
>  	uint_t dt_maxtuplesize;	/* largest tuple across programs */
>  	uint_t dt_maxlvaralloc;	/* largest lvar alloc across pcbs */
> +	uint_t dt_zerosize;	/* zero region, size */
> +	uint_t dt_zerooffset;	/* zero region, offset */
>  	dt_tstring_t *dt_tstrings; /* temporary string slots */
>  	dt_list_t dt_modlist;	/* linked list of dt_module_t's */
>  	dt_htab_t *dt_mods;	/* hash table of dt_module_t's */
> -- 
> 2.18.4
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list