[DTrace-devel] [PATCH 12/12] Implement TLS variables

Kris Van Hees kris.van.hees at oracle.com
Tue Nov 23 07:45:11 UTC 2021


On Mon, Nov 22, 2021 at 06:08:18PM -0500, Eugene Loh via DTrace-devel wrote:
> I assume the 12/12 is to be ignored.

Ah no, it means I posted the wrong version...  Darn.

> I'm not sure if this post is WIP... there are no tests.  I'll assume WIP, so
> a few questions/comments below.

Yes, v2 coming up which contains what this one was supposed to contain, i.e.
almost the same code but with tests.

> Actually, first, what's the status of bpf/map_tvar.c?  It includes the
> orphaned map tvars.

v2 removes it.

> On 11/20/21 12:44 AM, Kris Van Hees via DTrace-devel wrote:
> > Thread-local storage (TLS) variables are a form of dynamic variables in
> > DTrace.  They are implemented using a global BPF hash map, index using
> s/index/indexed/?
> > a key value that is derived from the task ID and the variable ID (with
> > some spwecial magic to handle the idle task that has TID 0 on all CPUs).
> s/spwecial/special/
> > Access to the TLS variables is handled through a pre-compiled BPF
> > function : dt_get_tvar.  It returns the address of the storage location
> > for the given variable in the current task.  This is used for both load
> > and store operations.
> > 
> > THe dvars BPF map contains an element at key 0 that contains a value of
> > all zeros.  This is used to initialize new TLS variables.
> s/THe/The/

Fixed typos.

> > Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> > ---
> >   bpf/Build            |  2 +-
> >   bpf/get_tvar.c       | 40 ++++++++++++++++++----
> >   bpf/set_tvar.c       | 18 ----------
> >   libdtrace/dt_bpf.c   | 48 +++++++++++---------------
> >   libdtrace/dt_bpf.h   |  1 +
> >   libdtrace/dt_cc.c    |  3 ++
> >   libdtrace/dt_cg.c    | 81 ++++++++++++++++++++++++++++++++++++--------
> >   libdtrace/dt_dlibs.c |  3 +-
> >   libdtrace/dt_open.c  |  3 ++
> >   9 files changed, 130 insertions(+), 69 deletions(-)
> >   delete mode 100644 bpf/set_tvar.c
> > 
> > diff --git a/bpf/Build b/bpf/Build
> > index 5c1bc235..2b17704b 100644
> > --- a/bpf/Build
> > +++ b/bpf/Build
> > @@ -24,7 +24,7 @@ bpf_dlib_SRCDEPS = $(objdir)/include/.dir.stamp
> >   bpf_dlib_SOURCES = \
> >   	agg_lqbin.c agg_qbin.c \
> >   	get_bvar.c \
> > -	get_tvar.c set_tvar.c \
> > +	get_tvar.c \
> >   	index.S \
> >   	lltostr.S \
> >   	probe_error.c \
> > diff --git a/bpf/get_tvar.c b/bpf/get_tvar.c
> > index 6c8c1d2a..bafff2a1 100644
> > --- a/bpf/get_tvar.c
> > +++ b/bpf/get_tvar.c
> > @@ -1,6 +1,6 @@
> >   // SPDX-License-Identifier: GPL-2.0
> >   /*
> > - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
> > + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
> >    */
> >   #include <linux/bpf.h>
> >   #include <stdint.h>
> > @@ -10,12 +10,40 @@
> >   # define noinline	__attribute__((noinline))
> >   #endif
> > -extern struct bpf_map_def tvars;
> > +extern struct bpf_map_def dvars;
> > +extern uint64_t NCPUS;
> > -noinline uint64_t dt_get_tvar(uint32_t id)
> > +noinline void *dt_get_tvar(uint32_t id)
> >   {
> > -	uint64_t	*val;
> > +	uint64_t	key;
> > +	uint64_t	dflt_key = 0;;
> > +	void		*val;
> > -	val = bpf_map_lookup_elem(&tvars, &id);
> > -	return val ? *val : 0;
> > +	key = bpf_get_current_pid_tgid();
> > +	key &= 0x00000000ffffffffUL;
> > +	if (key == 0)
> > +		key = bpf_get_smp_processor_id();
> > +	else
> > +		key += (uint32_t)(uint64_t)&NCPUS;
> > +
> > +	key++;
> I wouldn't mind an extra few comments here.  It took me a while, but I guess
> +=NCPUS so we can detect the pid==0 case and then ++ to make room for the
> dflt_key case?
> > +	key = (key << 32) | id;
> > +
> > +	val = bpf_map_lookup_elem(&dvars, &key);
> > +	if (val != 0)
> > +		return val;
> > +
> > +	/* Not found - create it with the default value. */
> > +	val = bpf_map_lookup_elem(&dvars, &dflt_key);
> > +	if (val == 0)
> > +		return 0;
> > +
> > +	if (bpf_map_update_elem(&dvars, &key, val, BPF_ANY) < 0)
> > +		return 0;
> > +
> > +	val = bpf_map_lookup_elem(&dvars, &key);
> > +	if (val != 0)
> > +		return val;
> > +
> > +	return 0;
> >   }
> > diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> > @@ -213,36 +213,21 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
> >    *		use.
> >    * - gvars:	Global variables map.  This is a global map with a singleton
> >    *		element (key 0) addressed by variable offset.
> > + * - dvars:	Dynamic variables map.  This is a global hash map index with
> > + *		a unique numeric identifier for each variable.
> Well, variable/pid combo.
> >    * - lvars:	Local variables map.  This is a per-CPU map with a singleton
> >    *		element (key 0) addressed by variable offset.
> >    */
> >   int
> >   dt_bpf_gmap_create(dtrace_hdl_t *dtp)
> >   {
> > -	int		stabsz, gvarsz, lvarsz, tvarc, aggsz, memsz;
> > +	int		stabsz, gvarsz, lvarsz, dvarc, aggsz, memsz;
> >   	int		ci_mapfd, st_mapfd, pr_mapfd;
> >   	uint32_t	key = 0;
> >   	size_t		strsize = dtp->dt_options[DTRACEOPT_STRSIZE];
> >   	uint8_t		*buf, *end;
> >   	char		*strtab;
> > +	size_t		strdatasz = P2ROUNDUP(DT_STRLEN_BYTES + strsize + 1, 8);
> >   	/* If we already created the global maps, return success. */
> >   	if (dt_gmap_done)
> > @@ -257,7 +242,7 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
> >   	/* Determine sizes for global, local, and TLS maps. */
> >   	gvarsz = P2ROUNDUP(dt_idhash_datasize(dtp->dt_globals), 8);
> >   	lvarsz = P2ROUNDUP(dtp->dt_maxlvaralloc, 8);
> > -	tvarc = dt_idhash_peekid(dtp->dt_tls) - DIF_VAR_OTHER_UBASE;
> > +	dvarc = dtp->dt_options[DTRACEOPT_DYNVARSIZE] / strdatasz;
> >   	/* Create global maps as long as there are no errors. */
> >   	dtp->dt_stmap_fd = create_gmap(dtp, "state", BPF_MAP_TYPE_ARRAY,
> > @@ -371,10 +354,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
> >   			sizeof(uint32_t), lvarsz, 1) == -1)
> >   		return -1;		/* dt_errno is set for us */
> > -	if (tvarc > 0 &&
> > -	    create_gmap(dtp, "tvars", BPF_MAP_TYPE_ARRAY,
> > -			sizeof(uint32_t), sizeof(uint64_t), tvarc) == -1)
> > -		return -1;		/* dt_errno is set for us */
> > +	if (dvarc > 0) {
> I guess okay.  Dunno.  Or make sure dvarc is a minimum of 1?  We're going to
> go over DYNVARSIZE anyhow since we add the key==0 case.  I have no good
> ideas here.

If there is not enough space to hold a single TLS var, there is no point in
allocating space for key = 0 since that is the default value which we then
would never be able to copy into any variable anyway.

> Speaking of which, is there a plan to get fancier on the sizing? We're
> allocating string space for everyone.  In many cases, that will be 256/4 or
> 256/8 times too much.  Meanwhile, for some structs, I guess it'll be too
> small.  Doesn't need to be fixed in the first patch, but maybe worth
> mentioning?

This is the challenge of doing tracing and supporting dynamic variables.  You
cannot really get into implementing fancy memory management.  And yes, later on
we will need to make the size be based on the largest datatype a variable can
have rather than strsize, so indeed it can be larger than strsize.

> > +		int	fd;
> > +		char	dflt[strdatasz];
> > +
> > +		/* We allocate room for an additional default value. */
> > +		fd = create_gmap(dtp, "dvars", BPF_MAP_TYPE_HASH,
> > +				 sizeof(uint64_t), strdatasz, dvarc + 1);
> > +		if (fd == -1)
> > +			return -1;	/* dt_errno is set for us */
> > +
> > +		memset(dflt, 0, strdatasz);
> > +		dt_bpf_map_update(fd, &key, &dflt);
> > +	}
> >   	/* Populate the 'cpuinfo' map. */
> >   	dt_bpf_map_update(ci_mapfd, &key, dtp->dt_conf.cpus);
> > diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
> > index 287178c1..5b9bae80 100644
> > --- a/libdtrace/dt_bpf.h
> > +++ b/libdtrace/dt_bpf.h
> > @@ -27,6 +27,7 @@ extern "C" {
> >   #define DT_CONST_STKSIZ	7
> >   #define DT_CONST_BOOTTM	8
> >   #define DT_CONST_NSPEC	9
> > +#define DT_CONST_NCPUS	10
> >   extern int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
> >   			   int group_fd, unsigned long flags);
> > diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
> > index a25f3548..ce824eb5 100644
> > --- a/libdtrace/dt_cc.c
> > +++ b/libdtrace/dt_cc.c
> > @@ -2355,6 +2355,9 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
> >   			case DT_CONST_NSPEC:
> >   				nrp->dofr_data = dtp->dt_options[DTRACEOPT_NSPEC];
> >   				continue;
> > +			case DT_CONST_NCPUS:
> > +				nrp->dofr_data = dtp->dt_conf.max_cpuid + 1;
> > +				continue;
> >   			case DT_CONST_STKSIZ:
> >   				nrp->dofr_data = sizeof(uint64_t)
> >   				    * dtp->dt_options[DTRACEOPT_MAXFRAMES];
> > diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> > index c64fa78c..d1922f0d 100644
> > --- a/libdtrace/dt_cg.c
> > +++ b/libdtrace/dt_cg.c
> > @@ -2057,13 +2057,40 @@ dt_cg_load_var(dt_node_t *dst, dt_irlist_t *dlp, dt_regset_t *drp)
> >   	}
> >   	/* otherwise, handle thread-local and built-in variables */
> > -	if (dt_regset_xalloc_args(drp) == -1)
> > -		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > -
> >   	if (idp->di_flags & DT_IDFLG_TLS) {	/* TLS var */
> > +		dt_ident_t	*fidp = dt_dlib_get_func(yypcb->pcb_hdl,
> > +							 "dt_get_tvar");
> > +
> > +		/* FIXME */
> > +		assert(fidp != NULL);
> > +		if ((dst->dn_reg = dt_regset_alloc(drp)) == -1)
> > +			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > +		if (dt_regset_xalloc_args(drp) == -1)
> > +			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> >   		emit(dlp, BPF_MOV_IMM(BPF_REG_1, idp->di_id - DIF_VAR_OTHER_UBASE));
> > -		idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_get_tvar");
> > +		dt_regset_xalloc(drp, BPF_REG_0);
> > +		emite(dlp, BPF_CALL_FUNC(fidp->di_id), fidp);
> > +		dt_regset_free_args(drp);
> > +		emit(dlp, BPF_MOV_REG(dst->dn_reg, BPF_REG_0));
> > +		dt_regset_free(drp, BPF_REG_0);
> > +
> > +		dt_cg_check_notnull(dlp, drp, dst->dn_reg);
> > +
> > +		/* load the variable value if not by ref */
> > +		if (!(dst->dn_flags & DT_NF_REF)) {
> > +			size_t	size = dt_node_type_size(dst);
> > +
> > +			assert(size > 0 && size <= 8 &&
> > +			       (size & (size - 1)) == 0);
> > +
> > +			emit(dlp, BPF_LOAD(ldstw[size], dst->dn_reg, dst->dn_reg, 0));
> > +		}
> > +
> > +		return;
> >   	} else if (idp->di_id < DIF_VAR_OTHER_UBASE) {	/* built-in var */
> > +		if (dt_regset_xalloc_args(drp) == -1)
> > +			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > +
> >   		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
> >   		emit(dlp, BPF_MOV_IMM(BPF_REG_2, idp->di_id));
> >   		idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_get_bvar");
> > @@ -2301,14 +2328,13 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
> >   		dt_ident_t *idp)
> >   {
> >   	uint_t	varid;
> > +	int	reg;
> > +	size_t	size;
> >   	idp->di_flags |= DT_IDFLG_DIFW;
> >   	/* global and local variables (that is, not thread-local) */
> >   	if (!(idp->di_flags & DT_IDFLG_TLS)) {
> > -		int	reg;
> > -		size_t	size;
> > -
> >   		if ((reg = dt_regset_alloc(drp)) == -1)
> >   			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > @@ -2351,23 +2377,48 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
> >   	/* TLS var */
> >   	varid = idp->di_id - DIF_VAR_OTHER_UBASE;
> > -	idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_set_tvar");
> > +	size = idp->di_size;
> > +
> > +	idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_get_tvar");
> >   	assert(idp != NULL);
> >   	if (dt_regset_xalloc_args(drp) == -1)
> >   		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > -	/*
> > -	 * We assign the varid in the instruction preceding the call because
> > -	 * the disassembler expects this sequence in support for annotating
> > -	 * the disassembly with variables names.
> > -	 */
> > -	emit(dlp,  BPF_MOV_REG(BPF_REG_2, dnp->dn_reg));
> >   	emit(dlp,  BPF_MOV_IMM(BPF_REG_1, varid));
> >   	dt_regset_xalloc(drp, BPF_REG_0);
> >   	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> > -	dt_regset_free(drp, BPF_REG_0);
> >   	dt_regset_free_args(drp);
> > +
> > +	if ((reg = dt_regset_alloc(drp)) == -1)
> > +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > +
> > +	emit(dlp, BPF_MOV_REG(reg, BPF_REG_0));
> > +	dt_regset_free(drp, BPF_REG_0);
> > +
> > +	dt_cg_check_notnull(dlp, drp, reg);
> > +
> > +	if (dnp->dn_flags & DT_NF_REF) {
> > +		size_t		srcsz;
> > +
> > +		/*
> > +		 * Determine the amount of data to be copied.  It is
> > +		 * the lesser of the size of the identifier and the
> > +		 * size of the data being copied in.
> > +		 */
> > +		srcsz = dt_node_type_size(dnp->dn_right);
> > +		if (dt_node_is_string(dnp))
> > +			srcsz += DT_STRLEN_BYTES;
> > +		size = MIN(srcsz, size);
> I think neither "srcsz = dt_node_type_size(dnp->dn_right)" nor "size =
> idp->di_size" includes the NUL terminating byte.  Same with the pre-existing
> gvar/lvar code.

This is actually an interesting situation because dt_node_type_size() does
include the terminating NUL byte for string constants but not for regular
string types.  And di_size generally will not contain it either.  And as you
say, this applies to gvar and lvar as well.  So, I am inclined to leave it as
is in this patch and simply fix it for all three cases with a single patch.

> > +
> > +		dt_cg_memcpy(dlp, drp, reg, dnp->dn_reg, size);
> > +	} else {
> > +		assert(size > 0 && size <= 8 && (size & (size - 1)) == 0);
> > +
> > +		emit(dlp, BPF_STORE(ldstw[size], reg, 0, dnp->dn_reg));
> > +	}
> > +
> > +	dt_regset_free(drp, reg);
> >   }
> >   /*
> > diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> > index 75c00e9e..f7be9fa2 100644
> > --- a/libdtrace/dt_dlibs.c
> > +++ b/libdtrace/dt_dlibs.c
> > @@ -75,6 +75,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
> >   	DT_BPF_SYMBOL(aggs, DT_IDENT_PTR),
> >   	DT_BPF_SYMBOL(buffers, DT_IDENT_PTR),
> >   	DT_BPF_SYMBOL(cpuinfo, DT_IDENT_PTR),
> > +	DT_BPF_SYMBOL(dvars, DT_IDENT_PTR),
> >   	DT_BPF_SYMBOL(gvars, DT_IDENT_PTR),
> >   	DT_BPF_SYMBOL(lvars, DT_IDENT_PTR),
> >   	DT_BPF_SYMBOL(mem, DT_IDENT_PTR),
> > @@ -82,7 +83,6 @@ static const dt_ident_t		dt_bpf_symbols[] = {
> >   	DT_BPF_SYMBOL(specs, DT_IDENT_PTR),
> >   	DT_BPF_SYMBOL(state, DT_IDENT_PTR),
> >   	DT_BPF_SYMBOL(strtab, DT_IDENT_PTR),
> > -	DT_BPF_SYMBOL(tvars, DT_IDENT_PTR),
> >   	/* BPF internal identifiers */
> >   	DT_BPF_SYMBOL_ID(EPID, DT_IDENT_SCALAR, DT_CONST_EPID),
> >   	DT_BPF_SYMBOL_ID(PRID, DT_IDENT_SCALAR, DT_CONST_PRID),
> > @@ -93,6 +93,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
> >   	DT_BPF_SYMBOL_ID(STKSIZ, DT_IDENT_SCALAR, DT_CONST_STKSIZ),
> >   	DT_BPF_SYMBOL_ID(BOOTTM, DT_IDENT_SCALAR, DT_CONST_BOOTTM),
> >   	DT_BPF_SYMBOL_ID(NSPEC, DT_IDENT_SCALAR, DT_CONST_NSPEC),
> > +	DT_BPF_SYMBOL_ID(NCPUS, DT_IDENT_SCALAR, DT_CONST_NCPUS),
> >   	/* End-of-list marker */
> >   	{ NULL, }
> >   };
> > diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> > index f83fb2f0..dde7b309 100644
> > --- a/libdtrace/dt_open.c
> > +++ b/libdtrace/dt_open.c
> > @@ -789,6 +789,9 @@ dt_vopen(int version, int flags, int *errp,
> >   	 */
> >   	dtp->dt_options[DTRACEOPT_STRSIZE] = 256;
> > +	/* Set the default dynamic variable space size. */
> > +	dtp->dt_options[DTRACEOPT_DYNVARSIZE] = 1024 * 1024 * 1;
> > +
> >   	/*
> >   	 * Set the default speculation size and number of simultaneously active
> >   	 * speculations.
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list