[DTrace-devel] [PATCH 12/12] Implement TLS variables

Eugene Loh eugene.loh at oracle.com
Mon Nov 22 23:08:18 UTC 2021


I assume the 12/12 is to be ignored.

I'm not sure if this post is WIP... there are no tests.  I'll assume 
WIP, so a few questions/comments below.

Actually, first, what's the status of bpf/map_tvar.c?  It includes the 
orphaned map tvars.

On 11/20/21 12:44 AM, Kris Van Hees via DTrace-devel wrote:
> Thread-local storage (TLS) variables are a form of dynamic variables in
> DTrace.  They are implemented using a global BPF hash map, index using
s/index/indexed/?
> a key value that is derived from the task ID and the variable ID (with
> some spwecial magic to handle the idle task that has TID 0 on all CPUs).
s/spwecial/special/
> Access to the TLS variables is handled through a pre-compiled BPF
> function : dt_get_tvar.  It returns the address of the storage location
> for the given variable in the current task.  This is used for both load
> and store operations.
>
> THe dvars BPF map contains an element at key 0 that contains a value of
> all zeros.  This is used to initialize new TLS variables.
s/THe/The/
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
>   bpf/Build            |  2 +-
>   bpf/get_tvar.c       | 40 ++++++++++++++++++----
>   bpf/set_tvar.c       | 18 ----------
>   libdtrace/dt_bpf.c   | 48 +++++++++++---------------
>   libdtrace/dt_bpf.h   |  1 +
>   libdtrace/dt_cc.c    |  3 ++
>   libdtrace/dt_cg.c    | 81 ++++++++++++++++++++++++++++++++++++--------
>   libdtrace/dt_dlibs.c |  3 +-
>   libdtrace/dt_open.c  |  3 ++
>   9 files changed, 130 insertions(+), 69 deletions(-)
>   delete mode 100644 bpf/set_tvar.c
>
> diff --git a/bpf/Build b/bpf/Build
> index 5c1bc235..2b17704b 100644
> --- a/bpf/Build
> +++ b/bpf/Build
> @@ -24,7 +24,7 @@ bpf_dlib_SRCDEPS = $(objdir)/include/.dir.stamp
>   bpf_dlib_SOURCES = \
>   	agg_lqbin.c agg_qbin.c \
>   	get_bvar.c \
> -	get_tvar.c set_tvar.c \
> +	get_tvar.c \
>   	index.S \
>   	lltostr.S \
>   	probe_error.c \
> diff --git a/bpf/get_tvar.c b/bpf/get_tvar.c
> index 6c8c1d2a..bafff2a1 100644
> --- a/bpf/get_tvar.c
> +++ b/bpf/get_tvar.c
> @@ -1,6 +1,6 @@
>   // SPDX-License-Identifier: GPL-2.0
>   /*
> - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
>    */
>   #include <linux/bpf.h>
>   #include <stdint.h>
> @@ -10,12 +10,40 @@
>   # define noinline	__attribute__((noinline))
>   #endif
>   
> -extern struct bpf_map_def tvars;
> +extern struct bpf_map_def dvars;
> +extern uint64_t NCPUS;
>   
> -noinline uint64_t dt_get_tvar(uint32_t id)
> +noinline void *dt_get_tvar(uint32_t id)
>   {
> -	uint64_t	*val;
> +	uint64_t	key;
> +	uint64_t	dflt_key = 0;;
> +	void		*val;
>   
> -	val = bpf_map_lookup_elem(&tvars, &id);
> -	return val ? *val : 0;
> +	key = bpf_get_current_pid_tgid();
> +	key &= 0x00000000ffffffffUL;
> +	if (key == 0)
> +		key = bpf_get_smp_processor_id();
> +	else
> +		key += (uint32_t)(uint64_t)&NCPUS;
> +
> +	key++;
I wouldn't mind an extra few comments here.  It took me a while, but I 
guess +=NCPUS so we can detect the pid==0 case and then ++ to make room 
for the dflt_key case?
> +	key = (key << 32) | id;
> +
> +	val = bpf_map_lookup_elem(&dvars, &key);
> +	if (val != 0)
> +		return val;
> +
> +	/* Not found - create it with the default value. */
> +	val = bpf_map_lookup_elem(&dvars, &dflt_key);
> +	if (val == 0)
> +		return 0;
> +
> +	if (bpf_map_update_elem(&dvars, &key, val, BPF_ANY) < 0)
> +		return 0;
> +
> +	val = bpf_map_lookup_elem(&dvars, &key);
> +	if (val != 0)
> +		return val;
> +
> +	return 0;
>   }
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> @@ -213,36 +213,21 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
>    *		use.
>    * - gvars:	Global variables map.  This is a global map with a singleton
>    *		element (key 0) addressed by variable offset.
> + * - dvars:	Dynamic variables map.  This is a global hash map index with
> + *		a unique numeric identifier for each variable.
Well, variable/pid combo.
>    * - lvars:	Local variables map.  This is a per-CPU map with a singleton
>    *		element (key 0) addressed by variable offset.
>    */
>   int
>   dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>   {
> -	int		stabsz, gvarsz, lvarsz, tvarc, aggsz, memsz;
> +	int		stabsz, gvarsz, lvarsz, dvarc, aggsz, memsz;
>   	int		ci_mapfd, st_mapfd, pr_mapfd;
>   	uint32_t	key = 0;
>   	size_t		strsize = dtp->dt_options[DTRACEOPT_STRSIZE];
>   	uint8_t		*buf, *end;
>   	char		*strtab;
> +	size_t		strdatasz = P2ROUNDUP(DT_STRLEN_BYTES + strsize + 1, 8);
>   
>   	/* If we already created the global maps, return success. */
>   	if (dt_gmap_done)
> @@ -257,7 +242,7 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>   	/* Determine sizes for global, local, and TLS maps. */
>   	gvarsz = P2ROUNDUP(dt_idhash_datasize(dtp->dt_globals), 8);
>   	lvarsz = P2ROUNDUP(dtp->dt_maxlvaralloc, 8);
> -	tvarc = dt_idhash_peekid(dtp->dt_tls) - DIF_VAR_OTHER_UBASE;
> +	dvarc = dtp->dt_options[DTRACEOPT_DYNVARSIZE] / strdatasz;
>   
>   	/* Create global maps as long as there are no errors. */
>   	dtp->dt_stmap_fd = create_gmap(dtp, "state", BPF_MAP_TYPE_ARRAY,
> @@ -371,10 +354,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>   			sizeof(uint32_t), lvarsz, 1) == -1)
>   		return -1;		/* dt_errno is set for us */
>   
> -	if (tvarc > 0 &&
> -	    create_gmap(dtp, "tvars", BPF_MAP_TYPE_ARRAY,
> -			sizeof(uint32_t), sizeof(uint64_t), tvarc) == -1)
> -		return -1;		/* dt_errno is set for us */
> +	if (dvarc > 0) {
I guess okay.  Dunno.  Or make sure dvarc is a minimum of 1?  We're 
going to go over DYNVARSIZE anyhow since we add the key==0 case.  I have 
no good ideas here.

Speaking of which, is there a plan to get fancier on the sizing? We're 
allocating string space for everyone.  In many cases, that will be 256/4 
or 256/8 times too much.  Meanwhile, for some structs, I guess it'll be 
too small.  Doesn't need to be fixed in the first patch, but maybe worth 
mentioning?
> +		int	fd;
> +		char	dflt[strdatasz];
> +
> +		/* We allocate room for an additional default value. */
> +		fd = create_gmap(dtp, "dvars", BPF_MAP_TYPE_HASH,
> +				 sizeof(uint64_t), strdatasz, dvarc + 1);
> +		if (fd == -1)
> +			return -1;	/* dt_errno is set for us */
> +
> +		memset(dflt, 0, strdatasz);
> +		dt_bpf_map_update(fd, &key, &dflt);
> +	}
>   
>   	/* Populate the 'cpuinfo' map. */
>   	dt_bpf_map_update(ci_mapfd, &key, dtp->dt_conf.cpus);
> diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
> index 287178c1..5b9bae80 100644
> --- a/libdtrace/dt_bpf.h
> +++ b/libdtrace/dt_bpf.h
> @@ -27,6 +27,7 @@ extern "C" {
>   #define DT_CONST_STKSIZ	7
>   #define DT_CONST_BOOTTM	8
>   #define DT_CONST_NSPEC	9
> +#define DT_CONST_NCPUS	10
>   
>   extern int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
>   			   int group_fd, unsigned long flags);
> diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
> index a25f3548..ce824eb5 100644
> --- a/libdtrace/dt_cc.c
> +++ b/libdtrace/dt_cc.c
> @@ -2355,6 +2355,9 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
>   			case DT_CONST_NSPEC:
>   				nrp->dofr_data = dtp->dt_options[DTRACEOPT_NSPEC];
>   				continue;
> +			case DT_CONST_NCPUS:
> +				nrp->dofr_data = dtp->dt_conf.max_cpuid + 1;
> +				continue;
>   			case DT_CONST_STKSIZ:
>   				nrp->dofr_data = sizeof(uint64_t)
>   				    * dtp->dt_options[DTRACEOPT_MAXFRAMES];
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index c64fa78c..d1922f0d 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -2057,13 +2057,40 @@ dt_cg_load_var(dt_node_t *dst, dt_irlist_t *dlp, dt_regset_t *drp)
>   	}
>   
>   	/* otherwise, handle thread-local and built-in variables */
> -	if (dt_regset_xalloc_args(drp) == -1)
> -		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> -
>   	if (idp->di_flags & DT_IDFLG_TLS) {	/* TLS var */
> +		dt_ident_t	*fidp = dt_dlib_get_func(yypcb->pcb_hdl,
> +							 "dt_get_tvar");
> +
> +		/* FIXME */
> +		assert(fidp != NULL);
> +		if ((dst->dn_reg = dt_regset_alloc(drp)) == -1)
> +			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +		if (dt_regset_xalloc_args(drp) == -1)
> +			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
>   		emit(dlp, BPF_MOV_IMM(BPF_REG_1, idp->di_id - DIF_VAR_OTHER_UBASE));
> -		idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_get_tvar");
> +		dt_regset_xalloc(drp, BPF_REG_0);
> +		emite(dlp, BPF_CALL_FUNC(fidp->di_id), fidp);
> +		dt_regset_free_args(drp);
> +		emit(dlp, BPF_MOV_REG(dst->dn_reg, BPF_REG_0));
> +		dt_regset_free(drp, BPF_REG_0);
> +
> +		dt_cg_check_notnull(dlp, drp, dst->dn_reg);
> +
> +		/* load the variable value if not by ref */
> +		if (!(dst->dn_flags & DT_NF_REF)) {
> +			size_t	size = dt_node_type_size(dst);
> +
> +			assert(size > 0 && size <= 8 &&
> +			       (size & (size - 1)) == 0);
> +
> +			emit(dlp, BPF_LOAD(ldstw[size], dst->dn_reg, dst->dn_reg, 0));
> +		}
> +
> +		return;
>   	} else if (idp->di_id < DIF_VAR_OTHER_UBASE) {	/* built-in var */
> +		if (dt_regset_xalloc_args(drp) == -1)
> +			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +
>   		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
>   		emit(dlp, BPF_MOV_IMM(BPF_REG_2, idp->di_id));
>   		idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_get_bvar");
> @@ -2301,14 +2328,13 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
>   		dt_ident_t *idp)
>   {
>   	uint_t	varid;
> +	int	reg;
> +	size_t	size;
>   
>   	idp->di_flags |= DT_IDFLG_DIFW;
>   
>   	/* global and local variables (that is, not thread-local) */
>   	if (!(idp->di_flags & DT_IDFLG_TLS)) {
> -		int	reg;
> -		size_t	size;
> -
>   		if ((reg = dt_regset_alloc(drp)) == -1)
>   			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
>   
> @@ -2351,23 +2377,48 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
>   
>   	/* TLS var */
>   	varid = idp->di_id - DIF_VAR_OTHER_UBASE;
> -	idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_set_tvar");
> +	size = idp->di_size;
> +
> +	idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_get_tvar");
>   	assert(idp != NULL);
>   
>   	if (dt_regset_xalloc_args(drp) == -1)
>   		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
>   
> -	/*
> -	 * We assign the varid in the instruction preceding the call because
> -	 * the disassembler expects this sequence in support for annotating
> -	 * the disassembly with variables names.
> -	 */
> -	emit(dlp,  BPF_MOV_REG(BPF_REG_2, dnp->dn_reg));
>   	emit(dlp,  BPF_MOV_IMM(BPF_REG_1, varid));
>   	dt_regset_xalloc(drp, BPF_REG_0);
>   	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> -	dt_regset_free(drp, BPF_REG_0);
>   	dt_regset_free_args(drp);
> +
> +	if ((reg = dt_regset_alloc(drp)) == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +
> +	emit(dlp, BPF_MOV_REG(reg, BPF_REG_0));
> +	dt_regset_free(drp, BPF_REG_0);
> +
> +	dt_cg_check_notnull(dlp, drp, reg);
> +
> +	if (dnp->dn_flags & DT_NF_REF) {
> +		size_t		srcsz;
> +
> +		/*
> +		 * Determine the amount of data to be copied.  It is
> +		 * the lesser of the size of the identifier and the
> +		 * size of the data being copied in.
> +		 */
> +		srcsz = dt_node_type_size(dnp->dn_right);
> +		if (dt_node_is_string(dnp))
> +			srcsz += DT_STRLEN_BYTES;
> +		size = MIN(srcsz, size);
I think neither "srcsz = dt_node_type_size(dnp->dn_right)" nor "size = 
idp->di_size" includes the NUL terminating byte.  Same with the 
pre-existing gvar/lvar code.
> +
> +		dt_cg_memcpy(dlp, drp, reg, dnp->dn_reg, size);
> +	} else {
> +		assert(size > 0 && size <= 8 && (size & (size - 1)) == 0);
> +
> +		emit(dlp, BPF_STORE(ldstw[size], reg, 0, dnp->dn_reg));
> +	}
> +
> +	dt_regset_free(drp, reg);
>   }
>   
>   /*
> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> index 75c00e9e..f7be9fa2 100644
> --- a/libdtrace/dt_dlibs.c
> +++ b/libdtrace/dt_dlibs.c
> @@ -75,6 +75,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
>   	DT_BPF_SYMBOL(aggs, DT_IDENT_PTR),
>   	DT_BPF_SYMBOL(buffers, DT_IDENT_PTR),
>   	DT_BPF_SYMBOL(cpuinfo, DT_IDENT_PTR),
> +	DT_BPF_SYMBOL(dvars, DT_IDENT_PTR),
>   	DT_BPF_SYMBOL(gvars, DT_IDENT_PTR),
>   	DT_BPF_SYMBOL(lvars, DT_IDENT_PTR),
>   	DT_BPF_SYMBOL(mem, DT_IDENT_PTR),
> @@ -82,7 +83,6 @@ static const dt_ident_t		dt_bpf_symbols[] = {
>   	DT_BPF_SYMBOL(specs, DT_IDENT_PTR),
>   	DT_BPF_SYMBOL(state, DT_IDENT_PTR),
>   	DT_BPF_SYMBOL(strtab, DT_IDENT_PTR),
> -	DT_BPF_SYMBOL(tvars, DT_IDENT_PTR),
>   	/* BPF internal identifiers */
>   	DT_BPF_SYMBOL_ID(EPID, DT_IDENT_SCALAR, DT_CONST_EPID),
>   	DT_BPF_SYMBOL_ID(PRID, DT_IDENT_SCALAR, DT_CONST_PRID),
> @@ -93,6 +93,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
>   	DT_BPF_SYMBOL_ID(STKSIZ, DT_IDENT_SCALAR, DT_CONST_STKSIZ),
>   	DT_BPF_SYMBOL_ID(BOOTTM, DT_IDENT_SCALAR, DT_CONST_BOOTTM),
>   	DT_BPF_SYMBOL_ID(NSPEC, DT_IDENT_SCALAR, DT_CONST_NSPEC),
> +	DT_BPF_SYMBOL_ID(NCPUS, DT_IDENT_SCALAR, DT_CONST_NCPUS),
>   	/* End-of-list marker */
>   	{ NULL, }
>   };
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index f83fb2f0..dde7b309 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -789,6 +789,9 @@ dt_vopen(int version, int flags, int *errp,
>   	 */
>   	dtp->dt_options[DTRACEOPT_STRSIZE] = 256;
>   
> +	/* Set the default dynamic variable space size. */
> +	dtp->dt_options[DTRACEOPT_DYNVARSIZE] = 1024 * 1024 * 1;
> +
>   	/*
>   	 * Set the default speculation size and number of simultaneously active
>   	 * speculations.



More information about the DTrace-devel mailing list