[DTrace-devel] [PATCH 13/20] Store strings in the string table with varint length prefix

Kris Van Hees kris.van.hees at oracle.com
Tue Jun 1 22:48:09 PDT 2021


With the introduction of variable length integers, strings in D (and
then also BPF) can be stored with their length embedded in the byte
stream.  This makes it possible to perform operations on strings
without the need to recalculate the length of the string multiple
times.

The string constant table stored in DIFOs contains strings that will
be made available to the BPF program by loading the string constant
table into a BPF map.  It makes sense to already store strings using
the variable length prefix when the string constant table is
constructed, which then also requires code that makes of it to know
about the variable length integer prefixing each string.

This patch introduces a dt_difo_getstr() function that returns a
regular C char * entity given a DIFO and a string offset into the
string constant table for that DIFO.  Any code that needs to access
a string constant from a DIFO is updated to make use of this new
function.

The string constant table creation code is updated to account for
the fact that the empty string will now occupy 2 bytes (one for the
length that is 0, and one for the terminating 0-byte.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 libdtrace/dt_bpf.c    |  5 +--
 libdtrace/dt_cc.c     | 13 ++++----
 libdtrace/dt_dis.c    | 27 +++++++--------
 libdtrace/dt_impl.h   |  1 +
 libdtrace/dt_link.c   |  8 ++---
 libdtrace/dt_strtab.c | 76 ++++++++++++++++++++++++++++++-------------
 libdtrace/dt_subr.c   |  9 +++++
 7 files changed, 91 insertions(+), 48 deletions(-)

diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index 67c36e97..41bd8ecd 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -17,6 +17,7 @@
 #include <dt_dctx.h>
 #include <dt_probe.h>
 #include <dt_state.h>
+#include <dt_varint.h>
 #include <dt_bpf.h>
 #include <port.h>
 
@@ -310,7 +311,7 @@ dt_bpf_reloc_prog(dtrace_hdl_t *dtp, const dtrace_difo_t *dp)
 	struct bpf_insn		*text = dp->dtdo_buf;
 
 	for (; len != 0; len--, rp++) {
-		char		*name = &dp->dtdo_strtab[rp->dofr_name];
+		const char	*name = dt_difo_getstr(dp, rp->dofr_name);
 		dt_ident_t	*idp = dt_idhash_lookup(dtp->dt_bpfsyms, name);
 		int		ioff = rp->dofr_offset /
 					sizeof(struct bpf_insn);
@@ -426,7 +427,7 @@ dt_bpf_reloc_error_prog(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
 	struct bpf_insn		*text = dp->dtdo_buf;
 
 	for (; len != 0; len--, rp++) {
-		char		*name = &dp->dtdo_strtab[rp->dofr_name];
+		const char	*name = dt_difo_getstr(dp, rp->dofr_name);
 		dt_ident_t	*idp = dt_idhash_lookup(dtp->dt_bpfsyms, name);
 		int		ioff = rp->dofr_offset /
 					sizeof(struct bpf_insn);
diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
index 2798ed5c..66d93f3a 100644
--- a/libdtrace/dt_cc.c
+++ b/libdtrace/dt_cc.c
@@ -87,6 +87,7 @@
 #include <dt_grammar.h>
 #include <dt_ident.h>
 #include <dt_string.h>
+#include <dt_varint.h>
 #include <dt_impl.h>
 #include <dt_dis.h>
 #include <dt_cg.h>
@@ -2188,8 +2189,8 @@ dt_link_layout(dtrace_hdl_t *dtp, const dtrace_difo_t *dp, uint_t *pcp,
 		return pc;
 
 	for (; len != 0; len--, rp++) {
-		char            *name = &dp->dtdo_strtab[rp->dofr_name];
-		dtrace_difo_t   *rdp;
+		const char	*name = dt_difo_getstr(dp, rp->dofr_name);
+		dtrace_difo_t	*rdp;
 		int		ipc;
 
 		idp = dt_dlib_get_func(dtp, name);
@@ -2251,7 +2252,7 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
 	 */
 	(*vcp) += vlen;
 	for (; vlen != 0; vlen--, vp++, nvp++) {
-		const char	*name = &sdp->dtdo_strtab[vp->dtdv_name];
+		const char	*name = dt_difo_getstr(sdp, vp->dtdv_name);
 
 		*nvp = *vp;
 		nvp->dtdv_name = dt_strtab_insert(stab, name);
@@ -2268,7 +2269,7 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
 	 */
 	(*rcp) += len;
 	for (; len != 0; len--, rp++, nrp++) {
-		const char	*name = &sdp->dtdo_strtab[rp->dofr_name];
+		const char	*name = dt_difo_getstr(sdp, rp->dofr_name);
 		dt_ident_t	*idp = dt_dlib_get_func(dtp, name);
 
 		nrp->dofr_name = dt_strtab_insert(stab, name);
@@ -2294,7 +2295,7 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
 	rp = sdp->dtdo_breltab;
 	nrp = &dp->dtdo_breltab[rc];
 	for (; len != 0; len--, rp++, nrp++) {
-		const char	*name = &sdp->dtdo_strtab[rp->dofr_name];
+		const char	*name = dt_difo_getstr(sdp, rp->dofr_name);
 		dtrace_difo_t	*rdp;
 		dtrace_epid_t	nepid;
 		int		ipc;
@@ -2376,7 +2377,7 @@ dt_link_resolve(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
 	const dof_relodesc_t	*rp = dp->dtdo_breltab;
 
 	for (; len != 0; len--, rp++) {
-		const char	*name = &dp->dtdo_strtab[rp->dofr_name];
+		const char	*name = dt_difo_getstr(dp, rp->dofr_name);
 		dt_ident_t	*idp = dt_dlib_get_sym(dtp, name);
 		uint_t		ioff = rp->dofr_offset /
 				       sizeof(struct bpf_insn);
diff --git a/libdtrace/dt_dis.c b/libdtrace/dt_dis.c
index 8094e40d..85b70b47 100644
--- a/libdtrace/dt_dis.c
+++ b/libdtrace/dt_dis.c
@@ -13,6 +13,7 @@
 #include <dt_ident.h>
 #include <dt_printf.h>
 #include <dt_string.h>
+#include <dt_varint.h>
 #include <bpf_asm.h>
 #include <port.h>
 
@@ -55,7 +56,7 @@ dt_dis_varname_id(const dtrace_difo_t *dp, uint_t id, uint_t scope, uint_t addr)
 		if (dvp->dtdv_id == id && dvp->dtdv_scope == scope &&
 		    dvp->dtdv_insn_from <= addr && addr <= dvp->dtdv_insn_to) {
 			if (dvp->dtdv_name < dp->dtdo_strlen)
-				return dp->dtdo_strtab + dvp->dtdv_name;
+				return dt_difo_getstr(dp, dvp->dtdv_name);
 			break;
 		}
 	}
@@ -73,7 +74,7 @@ dt_dis_varname_off(const dtrace_difo_t *dp, uint_t off, uint_t scope, uint_t add
 		if (dvp->dtdv_offset == off && dvp->dtdv_scope == scope &&
 		    dvp->dtdv_insn_from <= addr && addr <= dvp->dtdv_insn_to) {
 			if (dvp->dtdv_name < dp->dtdo_strlen)
-				return dp->dtdo_strtab + dvp->dtdv_name;
+				return dt_difo_getstr(dp, dvp->dtdv_name);
 			break;
 		}
 	}
@@ -296,8 +297,6 @@ static char *
 dt_dis_bpf_args(const dtrace_difo_t *dp, const char *fn,
 		const struct bpf_insn *in, char *buf, size_t len, uint_t addr)
 {
-	char		*s;
-
 	if (strcmp(fn, "dt_get_bvar") == 0) {
 		/*
 		 * We know that the previous instruction exists and assigns
@@ -321,6 +320,9 @@ dt_dis_bpf_args(const dtrace_difo_t *dp, const char *fn,
 					DIFV_SCOPE_THREAD, addr));
 		return buf;
 	} else if (strcmp(fn, "dt_get_string") == 0) {
+		const char	*s;
+		char		*se;
+
 		/*
 		 * We know that the previous instruction exists and assigns
 		 * the string offset to %r1 (because we wrote the code
@@ -330,11 +332,10 @@ dt_dis_bpf_args(const dtrace_difo_t *dp, const char *fn,
 		if (in->imm >= dp->dtdo_strlen)
 			return NULL;
 
-		s = dp->dtdo_strtab + in->imm;
-		s = strchr2esc(s, strlen(s));
-		snprintf(buf, len, "\"%s\"n",
-			 s ? s : dp->dtdo_strtab + in->imm);
-		free(s);
+		s = dt_difo_getstr(dp, in->imm);
+		se = strchr2esc(s, strlen(s));
+		snprintf(buf, len, "\"%s\"n", se ? se : s);
+		free(se);
 		return buf;
 	}
 
@@ -501,12 +502,12 @@ dt_dis_rtab(const char *rtag, const dtrace_difo_t *dp, FILE *fp,
 			fprintf(fp, "%-17s %-8llu %-8llu %s\n", tstr,
 				(u_longlong_t)rp->dofr_offset,
 				(u_longlong_t)rp->dofr_data,
-				&dp->dtdo_strtab[rp->dofr_name]);
+				dt_difo_getstr(dp, rp->dofr_name));
 		else
 			fprintf(fp, "%-17s %-8llu %-8s %s\n", tstr,
 				(u_longlong_t)rp->dofr_offset,
 				"*UND*",
-				&dp->dtdo_strtab[rp->dofr_name]);
+				dt_difo_getstr(dp, rp->dofr_name));
 	}
 }
 
@@ -693,7 +694,7 @@ dt_dis_difo(const dtrace_difo_t *dp, FILE *fp, const dt_ident_t *idp,
 			if (rp->dofr_offset < i * sizeof(uint64_t))
 				continue;
 			if (rp->dofr_offset == i * sizeof(uint64_t))
-				rname = &dp->dtdo_strtab[rp->dofr_name];
+				rname = dt_difo_getstr(dp, rp->dofr_name);
 
 			break;
 		}
@@ -764,7 +765,7 @@ dt_dis_difo(const dtrace_difo_t *dp, FILE *fp, const dt_ident_t *idp,
 			strcat(flags, "/w");
 
 		fprintf(fp, "%-16s %-4x %-6s %-3s %-3s %-11s %-4s %s\n",
-			&dp->dtdo_strtab[v->dtdv_name], v->dtdv_id,
+			dt_difo_getstr(dp, v->dtdv_name), v->dtdv_id,
 			offset, kind, scope, range, flags + 1,
 			dt_dis_typestr(&v->dtdv_type, type, sizeof(type)));
 	}
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 5070c891..c2d32863 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -677,6 +677,7 @@ extern void *dt_calloc(dtrace_hdl_t *, size_t, size_t);
 extern void *dt_alloc(dtrace_hdl_t *, size_t);
 extern void dt_free(dtrace_hdl_t *, void *);
 extern void dt_difo_free(dtrace_hdl_t *, dtrace_difo_t *);
+extern const char *dt_difo_getstr(const dtrace_difo_t *, ssize_t);
 
 extern void dt_conf_init(dtrace_hdl_t *);
 
diff --git a/libdtrace/dt_link.c b/libdtrace/dt_link.c
index 0b03bf8c..73391b9d 100644
--- a/libdtrace/dt_link.c
+++ b/libdtrace/dt_link.c
@@ -1255,7 +1255,7 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 		 * populate our string table and count the number of extra
 		 * symbols we'll require.
 		 */
-		strtab = dt_strtab_create(1);
+		strtab = dt_strtab_create(BUFSIZ);
 		nsym = 0;
 		isym = data_sym->d_size / symsize;
 		istr = data_str->d_size;
@@ -1337,10 +1337,10 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 		 */
 		if (nsym > 0) {
 			/*
-			 * The first byte of the string table is reserved for
-			 * the \0 entry.
+			 * The first two bytes of the string table are reserved
+			 * for the \0 entry.
 			 */
-			len = dt_strtab_size(strtab) - 1;
+			len = dt_strtab_size(strtab) - 2;
 
 			assert(len > 0);
 			assert(dt_strtab_index(strtab, "") == 0);
diff --git a/libdtrace/dt_strtab.c b/libdtrace/dt_strtab.c
index 605b21ae..76e4e95a 100644
--- a/libdtrace/dt_strtab.c
+++ b/libdtrace/dt_strtab.c
@@ -12,6 +12,7 @@
 
 #include <dt_strtab.h>
 #include <dt_string.h>
+#include <dt_varint.h>
 #include <dt_impl.h>
 
 static int
@@ -40,8 +41,9 @@ dt_strtab_grow(dt_strtab_t *sp)
 dt_strtab_t *
 dt_strtab_create(size_t bufsz)
 {
-	dt_strtab_t *sp = malloc(sizeof(dt_strtab_t));
-	uint_t nbuckets = _dtrace_strbuckets;
+	dt_strtab_t	*sp = malloc(sizeof(dt_strtab_t));
+	uint_t		nbuckets = _dtrace_strbuckets;
+	int		n;
 
 	assert(bufsz != 0);
 
@@ -71,7 +73,12 @@ dt_strtab_create(size_t bufsz)
 	 * at offset 0.  We use this guarantee in dt_strtab_insert() and
 	 * dt_strtab_index().
 	 */
+	n = dt_int2vint(0, sp->str_ptr);
+	sp->str_ptr += n;
 	*sp->str_ptr++ = '\0';
+	sp->str_size = n + 1;
+	sp->str_nstrs = 1;
+
 	return sp;
 
 err:
@@ -169,17 +176,9 @@ err:
 }
 
 ssize_t
-dt_strtab_index(dt_strtab_t *sp, const char *str)
+dt_strtab_xindex(dt_strtab_t *sp, const char *str, size_t len, ulong_t h)
 {
-	dt_strhash_t *hp;
-	size_t len;
-	ulong_t h;
-
-	if (str == NULL || str[0] == '\0')
-		return 0;	/* The empty string is always at offset 0. */
-
-	len = strlen(str);
-	h = str2hval(str, 0) % sp->str_hashsz;
+	dt_strhash_t	*hp;
 
 	for (hp = sp->str_hash[h]; hp != NULL; hp = hp->str_next) {
 		if (dt_strtab_compare(sp, hp, str, len + 1) == 0)
@@ -189,22 +188,53 @@ dt_strtab_index(dt_strtab_t *sp, const char *str)
 	return -1;
 }
 
+ssize_t
+dt_strtab_index(dt_strtab_t *sp, const char *str)
+{
+	size_t		plen, slen;
+	ulong_t		h;
+	char		*s;
+
+	if (str == NULL || str[0] == '\0')
+		return 0;	/* The empty string is always at offset 0. */
+
+	slen = strlen(str);
+	s = malloc(VARINT_MAX_BYTES + slen + 1);
+	if (s == NULL)
+		return -1L;
+
+	plen = dt_int2vint(slen, s);
+	memcpy(s + plen, str, slen + 1);
+
+	h = str2hval(str, slen) % sp->str_hashsz;
+	return dt_strtab_xindex(sp, s, plen + slen, h);
+}
+
 ssize_t
 dt_strtab_insert(dt_strtab_t *sp, const char *str)
 {
-	dt_strhash_t *hp;
-	size_t len;
-	ssize_t off;
-	ulong_t h;
+	dt_strhash_t	*hp;
+	size_t		slen, plen;
+	ssize_t		off;
+	ulong_t		h;
+	char		*s;
 
 	if (str == NULL || str[0] == '\0')
 		return 0;	/* The empty string is always at offset 0. */
 
-	if ((off = dt_strtab_index(sp, str)) != -1)
-		return off;
+	slen = strlen(str);
+	s = malloc(VARINT_MAX_BYTES + slen + 1);
+	if (s == NULL)
+		return -1L;
 
-	len = strlen(str);
-	h = str2hval(str, 0) % sp->str_hashsz;
+	plen = dt_int2vint(slen, s);
+	memcpy(s + plen, str, slen + 1);
+
+	h = str2hval(str, slen) % sp->str_hashsz;
+	slen += plen;
+	off = dt_strtab_xindex(sp, s, slen, h);
+	if (off != -1)
+		return off;
 
 	/*
 	 * Create a new hash bucket, initialize it, and insert it at the front
@@ -216,18 +246,18 @@ dt_strtab_insert(dt_strtab_t *sp, const char *str)
 	hp->str_data = sp->str_ptr;
 	hp->str_buf = sp->str_nbufs - 1;
 	hp->str_off = sp->str_size;
-	hp->str_len = len;
+	hp->str_len = slen;
 	hp->str_next = sp->str_hash[h];
 
 	/*
 	 * Now copy the string data into our buffer list, and then update
 	 * the global counts of strings and bytes.  Return str's byte offset.
 	 */
-	if (dt_strtab_copyin(sp, str, len + 1) == -1)
+	if (dt_strtab_copyin(sp, s, slen + 1) == -1)
 		return -1L;
 
 	sp->str_nstrs++;
-	sp->str_size += len + 1;
+	sp->str_size += slen + 1;
 	sp->str_hash[h] = hp;
 
 	return hp->str_off;
diff --git a/libdtrace/dt_subr.c b/libdtrace/dt_subr.c
index 51ba6439..f34fb4ae 100644
--- a/libdtrace/dt_subr.c
+++ b/libdtrace/dt_subr.c
@@ -23,6 +23,7 @@
 #include <sys/ioctl.h>
 #include <port.h>
 
+#include <dt_varint.h>
 #include <dt_impl.h>
 #include <sys/dtrace.h>
 
@@ -747,6 +748,14 @@ dt_difo_free(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
 	dt_free(dtp, dp);
 }
 
+const char *
+dt_difo_getstr(const dtrace_difo_t *dp, ssize_t idx)
+{
+	assert(idx < dp->dtdo_strlen);
+
+	return dt_vint_skip(&dp->dtdo_strtab[idx]);
+}
+
 /*
  * dt_gmatch() is similar to gmatch(3GEN) and dtrace(7D) globbing, but also
  * implements the behavior that an empty pattern matches any string.
-- 
2.31.1




More information about the DTrace-devel mailing list