[DTrace-devel] [PATCH 12/12] Implement TLS variables

Kris Van Hees kris.van.hees at oracle.com
Sat Nov 20 05:44:24 UTC 2021


Thread-local storage (TLS) variables are a form of dynamic variables in
DTrace.  They are implemented using a global BPF hash map, index using
a key value that is derived from the task ID and the variable ID (with
some spwecial magic to handle the idle task that has TID 0 on all CPUs).

Access to the TLS variables is handled through a pre-compiled BPF
function : dt_get_tvar.  It returns the address of the storage location
for the given variable in the current task.  This is used for both load
and store operations.

THe dvars BPF map contains an element at key 0 that contains a value of
all zeros.  This is used to initialize new TLS variables.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 bpf/Build            |  2 +-
 bpf/get_tvar.c       | 40 ++++++++++++++++++----
 bpf/set_tvar.c       | 18 ----------
 libdtrace/dt_bpf.c   | 48 +++++++++++---------------
 libdtrace/dt_bpf.h   |  1 +
 libdtrace/dt_cc.c    |  3 ++
 libdtrace/dt_cg.c    | 81 ++++++++++++++++++++++++++++++++++++--------
 libdtrace/dt_dlibs.c |  3 +-
 libdtrace/dt_open.c  |  3 ++
 9 files changed, 130 insertions(+), 69 deletions(-)
 delete mode 100644 bpf/set_tvar.c

diff --git a/bpf/Build b/bpf/Build
index 5c1bc235..2b17704b 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -24,7 +24,7 @@ bpf_dlib_SRCDEPS = $(objdir)/include/.dir.stamp
 bpf_dlib_SOURCES = \
 	agg_lqbin.c agg_qbin.c \
 	get_bvar.c \
-	get_tvar.c set_tvar.c \
+	get_tvar.c \
 	index.S \
 	lltostr.S \
 	probe_error.c \
diff --git a/bpf/get_tvar.c b/bpf/get_tvar.c
index 6c8c1d2a..bafff2a1 100644
--- a/bpf/get_tvar.c
+++ b/bpf/get_tvar.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
  */
 #include <linux/bpf.h>
 #include <stdint.h>
@@ -10,12 +10,40 @@
 # define noinline	__attribute__((noinline))
 #endif
 
-extern struct bpf_map_def tvars;
+extern struct bpf_map_def dvars;
+extern uint64_t NCPUS;
 
-noinline uint64_t dt_get_tvar(uint32_t id)
+noinline void *dt_get_tvar(uint32_t id)
 {
-	uint64_t	*val;
+	uint64_t	key;
+	uint64_t	dflt_key = 0;;
+	void		*val;
 
-	val = bpf_map_lookup_elem(&tvars, &id);
-	return val ? *val : 0;
+	key = bpf_get_current_pid_tgid();
+	key &= 0x00000000ffffffffUL;
+	if (key == 0)
+		key = bpf_get_smp_processor_id();
+	else
+		key += (uint32_t)(uint64_t)&NCPUS;
+
+	key++;
+	key = (key << 32) | id;
+
+	val = bpf_map_lookup_elem(&dvars, &key);
+	if (val != 0)
+		return val;
+
+	/* Not found - create it with the default value. */
+	val = bpf_map_lookup_elem(&dvars, &dflt_key);
+	if (val == 0)
+		return 0;
+
+	if (bpf_map_update_elem(&dvars, &key, val, BPF_ANY) < 0)
+		return 0;
+
+	val = bpf_map_lookup_elem(&dvars, &key);
+	if (val != 0)
+		return val;
+
+	return 0;
 }
diff --git a/bpf/set_tvar.c b/bpf/set_tvar.c
deleted file mode 100644
index d7c48665..00000000
--- a/bpf/set_tvar.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
- */
-#include <linux/bpf.h>
-#include <stdint.h>
-#include <bpf-helpers.h>
-
-#ifndef noinline
-# define noinline	__attribute__((noinline))
-#endif
-
-extern struct bpf_map_def tvars;
-
-noinline void dt_set_tvar(uint32_t id, uint64_t val)
-{
-	bpf_map_update_elem(&tvars, &id, &val, BPF_ANY);
-}
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index fc1e5e8f..6ef41870 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -98,7 +98,7 @@ int dt_bpf_map_update(int fd, const void *key, const void *val)
 	attr.map_fd = fd;
 	attr.key = (uint64_t)(unsigned long)key;
 	attr.value = (uint64_t)(unsigned long)val;
-	attr.flags = 0;
+	attr.flags = BPF_ANY;
 
 	return bpf(BPF_MAP_UPDATE_ELEM, &attr);
 }
@@ -213,36 +213,21 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
  *		use.
  * - gvars:	Global variables map.  This is a global map with a singleton
  *		element (key 0) addressed by variable offset.
+ * - dvars:	Dynamic variables map.  This is a global hash map index with
+ *		a unique numeric identifier for each variable.
  * - lvars:	Local variables map.  This is a per-CPU map with a singleton
  *		element (key 0) addressed by variable offset.
- *
- * FIXME: TLS variable storage is still being designed further so this is just
- *	  a temporary placeholder and will most likely be replaced by something
- *	  else.  If we stick to the legacy DTrace approach, we will need to
- *	  determine the maximum overall key size for TLS variables *and* the
- *	  maximum value size.  Based on these values, the legacy code would
- *	  take the memory size set aside for dynamic variables, and divide it by
- *	  the storage size needed for the largest dynamic variable (associative
- *	  array element or TLS variable).
- *
- * - tvars:	Thread-local (TLS) variables map, associating a 64-bit value
- *		with each thread-local variable.  The map is indexed by a value
- *		computed based on the thread-local variable id and execution
- *		thread information to ensure each thread has its own copy of a
- *		given thread-local variable.  The amount of TLS variable space
- *		to allocate for these dynamic variables is calculated based on
- *		the number of uniquely named TLS variables (next-to-be-assigned
- *		id minus the base id).
  */
 int
 dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 {
-	int		stabsz, gvarsz, lvarsz, tvarc, aggsz, memsz;
+	int		stabsz, gvarsz, lvarsz, dvarc, aggsz, memsz;
 	int		ci_mapfd, st_mapfd, pr_mapfd;
 	uint32_t	key = 0;
 	size_t		strsize = dtp->dt_options[DTRACEOPT_STRSIZE];
 	uint8_t		*buf, *end;
 	char		*strtab;
+	size_t		strdatasz = P2ROUNDUP(DT_STRLEN_BYTES + strsize + 1, 8);
 
 	/* If we already created the global maps, return success. */
 	if (dt_gmap_done)
@@ -257,7 +242,7 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 	/* Determine sizes for global, local, and TLS maps. */
 	gvarsz = P2ROUNDUP(dt_idhash_datasize(dtp->dt_globals), 8);
 	lvarsz = P2ROUNDUP(dtp->dt_maxlvaralloc, 8);
-	tvarc = dt_idhash_peekid(dtp->dt_tls) - DIF_VAR_OTHER_UBASE;
+	dvarc = dtp->dt_options[DTRACEOPT_DYNVARSIZE] / strdatasz;
 
 	/* Create global maps as long as there are no errors. */
 	dtp->dt_stmap_fd = create_gmap(dtp, "state", BPF_MAP_TYPE_ARRAY,
@@ -312,9 +297,7 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 		8 +
 		roundup(dtp->dt_maxreclen, 8) +
 		MAX(sizeof(uint64_t) * dtp->dt_options[DTRACEOPT_MAXFRAMES],
-		    DT_TSTRING_SLOTS *
-			roundup(DT_STRLEN_BYTES +
-				dtp->dt_options[DTRACEOPT_STRSIZE] + 1, 8) +
+		    DT_TSTRING_SLOTS * strdatasz +
 		    dtp->dt_options[DTRACEOPT_STRSIZE] + 1
 		);
 	if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
@@ -371,10 +354,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 			sizeof(uint32_t), lvarsz, 1) == -1)
 		return -1;		/* dt_errno is set for us */
 
-	if (tvarc > 0 &&
-	    create_gmap(dtp, "tvars", BPF_MAP_TYPE_ARRAY,
-			sizeof(uint32_t), sizeof(uint64_t), tvarc) == -1)
-		return -1;		/* dt_errno is set for us */
+	if (dvarc > 0) {
+		int	fd;
+		char	dflt[strdatasz];
+
+		/* We allocate room for an additional default value. */
+		fd = create_gmap(dtp, "dvars", BPF_MAP_TYPE_HASH,
+				 sizeof(uint64_t), strdatasz, dvarc + 1);
+		if (fd == -1)
+			return -1;	/* dt_errno is set for us */
+
+		memset(dflt, 0, strdatasz);
+		dt_bpf_map_update(fd, &key, &dflt);
+	}
 
 	/* Populate the 'cpuinfo' map. */
 	dt_bpf_map_update(ci_mapfd, &key, dtp->dt_conf.cpus);
diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
index 287178c1..5b9bae80 100644
--- a/libdtrace/dt_bpf.h
+++ b/libdtrace/dt_bpf.h
@@ -27,6 +27,7 @@ extern "C" {
 #define DT_CONST_STKSIZ	7
 #define DT_CONST_BOOTTM	8
 #define DT_CONST_NSPEC	9
+#define DT_CONST_NCPUS	10
 
 extern int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
 			   int group_fd, unsigned long flags);
diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
index a25f3548..ce824eb5 100644
--- a/libdtrace/dt_cc.c
+++ b/libdtrace/dt_cc.c
@@ -2355,6 +2355,9 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
 			case DT_CONST_NSPEC:
 				nrp->dofr_data = dtp->dt_options[DTRACEOPT_NSPEC];
 				continue;
+			case DT_CONST_NCPUS:
+				nrp->dofr_data = dtp->dt_conf.max_cpuid + 1;
+				continue;
 			case DT_CONST_STKSIZ:
 				nrp->dofr_data = sizeof(uint64_t)
 				    * dtp->dt_options[DTRACEOPT_MAXFRAMES];
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index c64fa78c..d1922f0d 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -2057,13 +2057,40 @@ dt_cg_load_var(dt_node_t *dst, dt_irlist_t *dlp, dt_regset_t *drp)
 	}
 
 	/* otherwise, handle thread-local and built-in variables */
-	if (dt_regset_xalloc_args(drp) == -1)
-		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
-
 	if (idp->di_flags & DT_IDFLG_TLS) {	/* TLS var */
+		dt_ident_t	*fidp = dt_dlib_get_func(yypcb->pcb_hdl,
+							 "dt_get_tvar");
+
+		/* FIXME */
+		assert(fidp != NULL);
+		if ((dst->dn_reg = dt_regset_alloc(drp)) == -1)
+			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+		if (dt_regset_xalloc_args(drp) == -1)
+			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
 		emit(dlp, BPF_MOV_IMM(BPF_REG_1, idp->di_id - DIF_VAR_OTHER_UBASE));
-		idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_get_tvar");
+		dt_regset_xalloc(drp, BPF_REG_0);
+		emite(dlp, BPF_CALL_FUNC(fidp->di_id), fidp);
+		dt_regset_free_args(drp);
+		emit(dlp, BPF_MOV_REG(dst->dn_reg, BPF_REG_0));
+		dt_regset_free(drp, BPF_REG_0);
+
+		dt_cg_check_notnull(dlp, drp, dst->dn_reg);
+
+		/* load the variable value if not by ref */
+		if (!(dst->dn_flags & DT_NF_REF)) {
+			size_t	size = dt_node_type_size(dst);
+
+			assert(size > 0 && size <= 8 &&
+			       (size & (size - 1)) == 0);
+
+			emit(dlp, BPF_LOAD(ldstw[size], dst->dn_reg, dst->dn_reg, 0));
+		}
+
+		return;
 	} else if (idp->di_id < DIF_VAR_OTHER_UBASE) {	/* built-in var */
+		if (dt_regset_xalloc_args(drp) == -1)
+			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
 		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
 		emit(dlp, BPF_MOV_IMM(BPF_REG_2, idp->di_id));
 		idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_get_bvar");
@@ -2301,14 +2328,13 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
 		dt_ident_t *idp)
 {
 	uint_t	varid;
+	int	reg;
+	size_t	size;
 
 	idp->di_flags |= DT_IDFLG_DIFW;
 
 	/* global and local variables (that is, not thread-local) */
 	if (!(idp->di_flags & DT_IDFLG_TLS)) {
-		int	reg;
-		size_t	size;
-
 		if ((reg = dt_regset_alloc(drp)) == -1)
 			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
 
@@ -2351,23 +2377,48 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
 
 	/* TLS var */
 	varid = idp->di_id - DIF_VAR_OTHER_UBASE;
-	idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_set_tvar");
+	size = idp->di_size;
+
+	idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_get_tvar");
 	assert(idp != NULL);
 
 	if (dt_regset_xalloc_args(drp) == -1)
 		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
 
-	/*
-	 * We assign the varid in the instruction preceding the call because
-	 * the disassembler expects this sequence in support for annotating
-	 * the disassembly with variables names.
-	 */
-	emit(dlp,  BPF_MOV_REG(BPF_REG_2, dnp->dn_reg));
 	emit(dlp,  BPF_MOV_IMM(BPF_REG_1, varid));
 	dt_regset_xalloc(drp, BPF_REG_0);
 	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
-	dt_regset_free(drp, BPF_REG_0);
 	dt_regset_free_args(drp);
+
+	if ((reg = dt_regset_alloc(drp)) == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
+	emit(dlp, BPF_MOV_REG(reg, BPF_REG_0));
+	dt_regset_free(drp, BPF_REG_0);
+
+	dt_cg_check_notnull(dlp, drp, reg);
+
+	if (dnp->dn_flags & DT_NF_REF) {
+		size_t		srcsz;
+
+		/*
+		 * Determine the amount of data to be copied.  It is
+		 * the lesser of the size of the identifier and the
+		 * size of the data being copied in.
+		 */
+		srcsz = dt_node_type_size(dnp->dn_right);
+		if (dt_node_is_string(dnp))
+			srcsz += DT_STRLEN_BYTES;
+		size = MIN(srcsz, size);
+
+		dt_cg_memcpy(dlp, drp, reg, dnp->dn_reg, size);
+	} else {
+		assert(size > 0 && size <= 8 && (size & (size - 1)) == 0);
+
+		emit(dlp, BPF_STORE(ldstw[size], reg, 0, dnp->dn_reg));
+	}
+
+	dt_regset_free(drp, reg);
 }
 
 /*
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index 75c00e9e..f7be9fa2 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -75,6 +75,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
 	DT_BPF_SYMBOL(aggs, DT_IDENT_PTR),
 	DT_BPF_SYMBOL(buffers, DT_IDENT_PTR),
 	DT_BPF_SYMBOL(cpuinfo, DT_IDENT_PTR),
+	DT_BPF_SYMBOL(dvars, DT_IDENT_PTR),
 	DT_BPF_SYMBOL(gvars, DT_IDENT_PTR),
 	DT_BPF_SYMBOL(lvars, DT_IDENT_PTR),
 	DT_BPF_SYMBOL(mem, DT_IDENT_PTR),
@@ -82,7 +83,6 @@ static const dt_ident_t		dt_bpf_symbols[] = {
 	DT_BPF_SYMBOL(specs, DT_IDENT_PTR),
 	DT_BPF_SYMBOL(state, DT_IDENT_PTR),
 	DT_BPF_SYMBOL(strtab, DT_IDENT_PTR),
-	DT_BPF_SYMBOL(tvars, DT_IDENT_PTR),
 	/* BPF internal identifiers */
 	DT_BPF_SYMBOL_ID(EPID, DT_IDENT_SCALAR, DT_CONST_EPID),
 	DT_BPF_SYMBOL_ID(PRID, DT_IDENT_SCALAR, DT_CONST_PRID),
@@ -93,6 +93,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
 	DT_BPF_SYMBOL_ID(STKSIZ, DT_IDENT_SCALAR, DT_CONST_STKSIZ),
 	DT_BPF_SYMBOL_ID(BOOTTM, DT_IDENT_SCALAR, DT_CONST_BOOTTM),
 	DT_BPF_SYMBOL_ID(NSPEC, DT_IDENT_SCALAR, DT_CONST_NSPEC),
+	DT_BPF_SYMBOL_ID(NCPUS, DT_IDENT_SCALAR, DT_CONST_NCPUS),
 	/* End-of-list marker */
 	{ NULL, }
 };
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index f83fb2f0..dde7b309 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -789,6 +789,9 @@ dt_vopen(int version, int flags, int *errp,
 	 */
 	dtp->dt_options[DTRACEOPT_STRSIZE] = 256;
 
+	/* Set the default dynamic variable space size. */
+	dtp->dt_options[DTRACEOPT_DYNVARSIZE] = 1024 * 1024 * 1;
+
 	/*
 	 * Set the default speculation size and number of simultaneously active
 	 * speculations.
-- 
2.33.0




More information about the DTrace-devel mailing list