[DTrace-devel] [PATCH] Add a block of zeroes to use to initialize BPF maps

Kris Van Hees kris.van.hees at oracle.com
Wed Aug 3 16:34:41 UTC 2022


There are cases in BPF code where we want to read a contiguous block of
zeroes.  Since the block is read-only, it can be shared among all CPUs and
among multiple purposes to save BPF map space (locked memory).  Create such
a block.

The 'strtab' map is a BPF_MAP_TYPE_ARRAY map that already reserves a block
memory at the end of its value to placate the BPF verifier, so we can just
use that (and enlarge it if needed).

For code generation, we need to know the offset of this region in the
strtab.  The offset cannot be known until after compilation, so the
relocation mechanism is used to patch in its value..

The dt_cg_zerosptr() function should be called to obtain the pointer value
to the memory block of zeros.  It will be stored in the given register.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 libdtrace/dt_bpf.c   | 30 +++++++++++++++++-------------
 libdtrace/dt_bpf.h   |  1 +
 libdtrace/dt_cc.c    |  3 +++
 libdtrace/dt_cg.c    | 14 ++++++++++++++
 libdtrace/dt_dlibs.c |  1 +
 libdtrace/dt_impl.h  |  2 ++
 6 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index 94879b4e..17562b9d 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -292,7 +292,7 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
  *		element (key 0).  Every aggregation is stored with two copies
  *		of its data to provide a lockless latch-based mechanism for
  *		atomic reading and writing.
- * - specs:     Map associating speculation IDs with a dt_bpf_specs_t struct
+ * - specs:	Map associating speculation IDs with a dt_bpf_specs_t struct
  *		giving the number of buffers speculated into for this
  *		speculation, and the number drained by userspace.
  * - buffers:	Perf event output buffer map, associating a perf event output
@@ -307,14 +307,16 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
  *		buffer, and temporary storage for stack traces, string
  *		manipulation, etc.
  * - scratchmem: Storage for alloca() and other per-clause scratch space,
- *               implemented just as for mem.
+ *		implemented just as for mem.
  * - strtab:	String table map.  This is a global map with a singleton
  *		element (key 0) that contains the entire string table as a
  *		concatenation of all unique strings (each terminated with a
  *		NUL byte).  The string table size is taken from the DTrace
- *		consumer handle (dt_strlen), and increased by the maximum
- *		string size to ensure that the BPF verifier can validate all
- *		access requests for dynamic references to string constants.
+ *		consumer handle (dt_strlen).  Extra memory is allocated as a
+ *		memory block of zeros for initializing memory regions.  Its
+ *		size is at least the maximum string size to ensure the BPF
+ *		verifier can validate all access requests for dynamic
+ *		references to string constants.
  * - probes:	Probe information map.  This is a global map indexed by probe
  *		ID.  The value is a struct that contains static probe info.
  *		The map only contains entries for probes that are actually in
@@ -337,7 +339,7 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
 int
 dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 {
-	int		stabsz, sz;
+	int		sz;
 	int		dvarc = 0;
 	int		ci_mapfd, st_mapfd, pr_mapfd;
 	uint64_t	key = 0;
@@ -422,13 +424,15 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 
 	/*
 	 * We need to create the global (consolidated) string table.  We store
-	 * the actual length (for in-code BPF validation purposes) but augment
-	 * it by the maximum string storage size to determine the size of the
-	 * BPF map value that is used to store the strtab.
+	 * the actual length (for in-code BPF validation purposes).  The size
+	 * of the map value is the string table size plus the greater of:
+	 *	- size of the memory block of zeros
+	 *	- maximum string size (plus 1 for the NUL byte)
 	 */
 	dtp->dt_strlen = dt_strtab_size(dtp->dt_ccstab);
-	stabsz = dtp->dt_strlen + strsize + 1;
-	strtab = dt_zalloc(dtp, stabsz);
+	dtp->dt_zerooffset = P2ROUNDUP(dtp->dt_strlen, 8);
+	sz = dtp->dt_zerooffset + MAX(strsize + 1, dtp->dt_zerosize);
+	strtab = dt_zalloc(dtp, sz);
 	if (strtab == NULL)
 		return dt_set_errno(dtp, EDT_NOMEM);
 
@@ -448,7 +452,7 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 	}
 
 	st_mapfd = create_gmap(dtp, "strtab", BPF_MAP_TYPE_ARRAY,
-			       sizeof(uint32_t), stabsz, 1);
+			       sizeof(uint32_t), sz, 1);
 	if (st_mapfd == -1)
 		return -1;		/* dt_errno is set for us */
 
@@ -460,7 +464,7 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 		return -1;		/* dt_errno is set for us */
 
 	/* global variables */
-	sz = P2ROUNDUP(dt_idhash_datasize(dtp->dt_globals), 8);
+	sz = dt_idhash_datasize(dtp->dt_globals);
 	if (sz > 0 &&
 	    create_gmap(dtp, "gvars", BPF_MAP_TYPE_ARRAY,
 			sizeof(uint32_t), sz, 1) == -1)
diff --git a/libdtrace/dt_bpf.h b/libdtrace/dt_bpf.h
index fc35ef44..91dc8290 100644
--- a/libdtrace/dt_bpf.h
+++ b/libdtrace/dt_bpf.h
@@ -36,6 +36,7 @@ extern "C" {
 #define DT_CONST_TASK_COMM	16
 #define DT_CONST_MUTEX_OWNER	17
 #define DT_CONST_RWLOCK_CNTS	18
+#define DT_CONST_ZERO_OFF	19
 
 #define DT_BPF_LOG_SIZE_DEFAULT	(UINT32_MAX >> 8)
 #define DT_BPF_LOG_SIZE_SMALL	4096
diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
index 2822a57b..bbc203c3 100644
--- a/libdtrace/dt_cc.c
+++ b/libdtrace/dt_cc.c
@@ -2466,6 +2466,9 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
 				nrp->dofr_data = total_offset;
 				continue;
 			}
+			case DT_CONST_ZERO_OFF:
+				nrp->dofr_data = dtp->dt_zerooffset;
+				continue;
 			default:
 				/* probe name -> value is probe id */
 				if (strchr(idp->di_name, ':') != NULL)
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 6e20ba23..f37785a5 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -2563,6 +2563,20 @@ dt_cg_typecast(const dt_node_t *src, const dt_node_t *dst,
 	}
 }
 
+/*
+ * Store a pointer to the 'memory block of zeros' in reg.
+ */
+static void
+dt_cg_zerosptr(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
+{
+	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
+	dt_ident_t	*zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
+
+	emit(dlp,  BPF_LOAD(BPF_DW, reg, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, reg, reg, DCTX_STRTAB));
+	emite(dlp, BPF_ALU64_IMM(BPF_ADD, reg, -1), zero_off);
+}
+
 /*
  * Generate code to push the specified argument list on to the tuple stack.
  * We use this routine for handling the index tuple for associative arrays.
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index 35936945..c302b19c 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -89,6 +89,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
 	DT_BPF_SYMBOL_ID(TASK_COMM, DT_IDENT_SCALAR, DT_CONST_TASK_COMM),
 	DT_BPF_SYMBOL_ID(MUTEX_OWNER, DT_IDENT_SCALAR, DT_CONST_MUTEX_OWNER),
 	DT_BPF_SYMBOL_ID(RWLOCK_CNTS, DT_IDENT_SCALAR, DT_CONST_RWLOCK_CNTS),
+	DT_BPF_SYMBOL_ID(ZERO_OFF, DT_IDENT_SCALAR, DT_CONST_ZERO_OFF),
 
 	/* End-of-list marker */
 	{ NULL, }
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 46c81cd0..eccc1fb7 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -295,6 +295,8 @@ struct dtrace_hdl {
 	uint_t dt_maxdvarsize;	/* largest dynamic variable across programs */
 	uint_t dt_maxtuplesize;	/* largest tuple across programs */
 	uint_t dt_maxlvaralloc;	/* largest lvar alloc across pcbs */
+	uint_t dt_zerosize;	/* zero region, size */
+	uint_t dt_zerooffset;	/* zero region, offset */
 	dt_tstring_t *dt_tstrings; /* temporary string slots */
 	dt_list_t dt_modlist;	/* linked list of dt_module_t's */
 	dt_htab_t *dt_mods;	/* hash table of dt_module_t's */
-- 
2.34.1




More information about the DTrace-devel mailing list