[DTrace-devel] [PATCH 1/3] Transition from variable-length string size to 2-byte string size

Kris Van Hees kris.van.hees at oracle.com
Wed Sep 1 23:38:49 PDT 2021


The use of variable-length integers to store string sizes has turned
out to prohibitively complex for the BPF verifier to handle.  In order
to provide string manipulation functions we have decided to transition
away from variable-length string size prefix.

This patch provides a transition to 2-byte string sizes stored ahead
of the actual string data.  A future patch will provide a more
permanent solution.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 bpf/Build                    |   3 +-
 bpf/get_bvar.c               |   1 -
 bpf/strlen.c                 |  26 +++++
 bpf/strnlen.c                |  82 --------------
 bpf/varint.c                 | 199 ----------------------------------
 libdtrace/Build              |   1 -
 libdtrace/dt_cg.c            |  41 +++----
 libdtrace/dt_consume.c       |   4 +-
 libdtrace/dt_printf.c        |   3 +-
 libdtrace/dt_string.c        |  17 +++
 libdtrace/dt_string.h        |   7 +-
 libdtrace/dt_strtab.c        |  34 +++---
 libdtrace/dt_subr.c          |   4 +-
 libdtrace/dt_varint.c        | 203 -----------------------------------
 libdtrace/dt_varint.h        | 114 --------------------
 test/internals/tst.varint.sh |   9 --
 test/utils/Build             |   2 +-
 test/utils/tst.varint.c      |  97 -----------------
 18 files changed, 90 insertions(+), 757 deletions(-)
 create mode 100644 bpf/strlen.c
 delete mode 100644 bpf/strnlen.c
 delete mode 100644 bpf/varint.c
 delete mode 100644 libdtrace/dt_varint.c
 delete mode 100644 libdtrace/dt_varint.h
 delete mode 100755 test/internals/tst.varint.sh
 delete mode 100644 test/utils/tst.varint.c

diff --git a/bpf/Build b/bpf/Build
index e08a28b6..e7682bc7 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -26,8 +26,7 @@ bpf_dlib_SOURCES = \
 	get_bvar.c \
 	get_tvar.c set_tvar.c \
 	probe_error.c \
-	strnlen.c \
-	varint.c
+	strlen.c
 
 bpf-check: $(objdir)/include/.dir.stamp
 	$(BPFC) $(BPFCPPFLAGS) $(bpf_dlib_CPPFLAGS) $(BPFCFLAGS) -S \
diff --git a/bpf/get_bvar.c b/bpf/get_bvar.c
index ddada70e..7c98e166 100644
--- a/bpf/get_bvar.c
+++ b/bpf/get_bvar.c
@@ -12,7 +12,6 @@
 #include <dt_bpf_maps.h>
 #include <dt_dctx.h>
 #include <dt_state.h>
-#include <dt_varint.h>
 
 #include "probe_error.h"
 
diff --git a/bpf/strlen.c b/bpf/strlen.c
new file mode 100644
index 00000000..b2a8c740
--- /dev/null
+++ b/bpf/strlen.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+#include <stdint.h>
+
+#define DT_STRLEN_BYTES	2
+
+#ifndef noinline
+# define noinline	__attribute__((noinline))
+#endif
+
+noinline void dt_strlen_store(uint64_t val, char *str)
+{
+	uint8_t		*buf = (uint8_t *)str;
+
+	buf[0] = (uint8_t)(val >> 8);
+	buf[1] = (uint8_t)(val & 0xff);
+}
+
+noinline uint64_t dt_strlen(const char *str)
+{
+	const uint8_t	*buf = (const uint8_t *)str;
+
+	return ((uint64_t)buf[0] << 8) + (uint64_t)buf[1];
+}
diff --git a/bpf/strnlen.c b/bpf/strnlen.c
deleted file mode 100644
index 885100e2..00000000
--- a/bpf/strnlen.c
+++ /dev/null
@@ -1,82 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
- */
-#include <stddef.h>
-#include <stdint.h>
-
-#ifndef noinline
-# define noinline	__attribute__((noinline))
-#endif
-
-/*
- * Determine the length of a string, no longer than a given size.
- *
- * Currently, only strings smaller than 256 characters are supported.
- *
- * Strings are expected to be allocated in 64-bit chunks, which guarantees that
- * every string starts on a 64-bit boundary and that the string data can be
- * read in chunks of 64-bit values.
- *
- * Algorithm based on the strlen() implementation in the GNU C Library, written
- * by Torbjorn Granlund with help from Dan Sahlin.
- */
-#define STRNLEN_SUBV	0x0101010101010101UL
-#define STRNLEN_MASK	0x8080808080808080UL
-noinline int dt_strnlen_dw(const uint64_t *p, size_t n)
-{
-	uint64_t	v = *p;
-	char		*s = (char *)p;
-
-	if (((v - STRNLEN_SUBV) & ~v & STRNLEN_MASK) != 0) {
-		if (s[0] == 0)
-			return 0;
-		if (s[1] == 0)
-			return 1;
-		if (s[2] == 0)
-			return 2;
-		if (s[3] == 0)
-			return 3;
-		if (s[4] == 0)
-			return 4;
-		if (s[5] == 0)
-			return 5;
-		if (s[6] == 0)
-			return 6;
-		if (s[7] == 0)
-			return 7;
-	}
-
-	return 8;
-}
-
-noinline int dt_strnlen(const char *s, size_t maxlen)
-{
-	uint64_t	*p = (uint64_t *)s;
-	int		l = 0;
-	int		n;
-
-#define STRNLEN_1_DW(p, n, l) \
-	do { \
-		n = dt_strnlen_dw(p++, 1); \
-		l += n; \
-		if (n < 8) \
-			return l; \
-		if ((char *)p - s > maxlen) \
-			return -1; \
-	} while (0)
-#define STRNLEN_4_DW(p, n, l) \
-	do { \
-		STRNLEN_1_DW(p, n, l); \
-		STRNLEN_1_DW(p, n, l); \
-		STRNLEN_1_DW(p, n, l); \
-		STRNLEN_1_DW(p, n, l); \
-	} while (0)
-
-	STRNLEN_4_DW(p, n, l);
-	STRNLEN_4_DW(p, n, l);
-	STRNLEN_4_DW(p, n, l);
-	STRNLEN_4_DW(p, n, l);
-
-	return -1;
-}
diff --git a/bpf/varint.c b/bpf/varint.c
deleted file mode 100644
index 02308a7a..00000000
--- a/bpf/varint.c
+++ /dev/null
@@ -1,199 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
- */
-#include <stdint.h>
-#include <dt_varint.h>
-
-#ifndef noinline
-# define noinline	__attribute__((noinline))
-#endif
-
-noinline uint64_t dt_int2vint(uint64_t val, char *str)
-{
-	uint64_t	len;
-	uint8_t		*buf = (uint8_t *)str;
-
-	if (val <= VARINT_1_MAX) {
-		buf[0] = (uint8_t)val;
-		return 1;
-	} else if (val <= VARINT_2_MAX) {
-		val -= VARINT_2_MIN;
-		len = 2;
-		buf[0] = VARINT_2_PREFIX | (uint8_t)(val >> VARINT_2_SHIFT);
-		goto two;
-	} else if (val <= VARINT_3_MAX) {
-		val -= VARINT_3_MIN;
-		len = 3;
-		buf[0] = VARINT_3_PREFIX | (uint8_t)(val >> VARINT_3_SHIFT);
-		goto three;
-	} else if (val <= VARINT_4_MAX) {
-		val -= VARINT_4_MIN;
-		len = 4;
-		buf[0] = VARINT_4_PREFIX | (uint8_t)(val >> VARINT_4_SHIFT);
-		goto four;
-	} else if (val <= VARINT_5_MAX) {
-		val -= VARINT_5_MIN;
-		len = 5;
-		buf[0] = VARINT_5_PREFIX | (uint8_t)(val >> VARINT_5_SHIFT);
-		goto five;
-	} else if (val <= VARINT_6_MAX) {
-		val -= VARINT_6_MIN;
-		len = 6;
-		buf[0] = VARINT_6_PREFIX | (uint8_t)(val >> VARINT_6_SHIFT);
-		goto six;
-	} else if (val <= VARINT_7_MAX) {
-		val -= VARINT_7_MIN;
-		len = 7;
-		buf[0] = VARINT_7_PREFIX | (uint8_t)(val >> VARINT_7_SHIFT);
-		goto seven;
-	} else if (val <= VARINT_8_MAX) {
-		val -= VARINT_8_MIN;
-		len = 8;
-		buf[0] = VARINT_8_PREFIX | (uint8_t)(val >> VARINT_8_SHIFT);
-		goto eight;
-	}
-
-	val -= VARINT_9_MIN;
-	len = 9;
-	buf[0] = VARINT_9_PREFIX;
-	buf[8] = (uint8_t)((val >> 56) & 0xff);
-eight:
-	buf[7] = (uint8_t)((val >> 48) & 0xff);
-seven:
-	buf[6] = (uint8_t)((val >> 40) & 0xff);
-six:
-	buf[5] = (uint8_t)((val >> 32) & 0xff);
-five:
-	buf[4] = (uint8_t)((val >> 24) & 0xff);
-four:
-	buf[3] = (uint8_t)((val >> 16) & 0xff);
-three:
-	buf[2] = (uint8_t)((val >> 8) & 0xff);
-two:
-	buf[1] = (uint8_t)(val & 0xff);
-
-	return len;
-}
-
-noinline uint64_t dt_vint2int(const char *str)
-{
-	const uint8_t	*buf = (const uint8_t *)str;
-	uint64_t	hi = buf[0];
-	uint64_t	base;
-	uint64_t	val = 0;
-
-	if (hi < VARINT_1_PLIM)	  /* 0xxxxxxx -> 0x00 - 0x7f */
-		return hi;
-	if (hi < VARINT_2_PLIM) { /* 10xxxxxx -> 0x0080 - 0x407f */
-		hi &= VARINT_HI_MASK(VARINT_2_PLIM);
-		hi <<= VARINT_2_SHIFT;
-		base = VARINT_2_MIN;
-		goto two;
-	}
-	if (hi < VARINT_3_PLIM) { /* 110xxxxx -> 0x4080 - 0x20407f */
-		hi &= VARINT_HI_MASK(VARINT_3_PLIM);
-		hi <<= VARINT_3_SHIFT;
-		base = VARINT_3_MIN;
-		goto three;
-	}
-	if (hi < VARINT_4_PLIM) { /* 1110xxxx -> 0x204080 - 0x1020407f */
-		hi &= VARINT_HI_MASK(VARINT_4_PLIM);
-		hi <<= VARINT_4_SHIFT;
-		base = VARINT_4_MIN;
-		goto four;
-	}
-	if (hi < VARINT_5_PLIM) { /* 11110xxx -> 0x10204080 - 0x081020407f */
-		hi &= VARINT_HI_MASK(VARINT_5_PLIM);
-		hi <<= VARINT_5_SHIFT;
-		base = VARINT_5_MIN;
-		goto five;
-	}
-	if (hi < VARINT_6_PLIM) { /* 111110xx -> 0x0810204080 - 0x4081020407f */
-		hi &= VARINT_HI_MASK(VARINT_6_PLIM);
-		hi <<= VARINT_6_SHIFT;
-		base = VARINT_6_MIN;
-		goto six;
-	}
-	if (hi < VARINT_7_PLIM) { /* 1111110x -> 0x40810204080 - 0x204081020407f */
-		hi &= VARINT_HI_MASK(VARINT_7_PLIM);
-		hi <<= VARINT_7_SHIFT;
-		base = VARINT_7_MIN;
-		goto seven;
-	}
-	if (hi < VARINT_8_PLIM) { /* 11111110 -> 0x2040810204080 - 0x10204081020407f */
-		hi = 0;
-		base = VARINT_8_MIN;
-		goto eight;
-	}
-
-	/* 11111111 -> 0x102040810204080 - 0xffffffffffffffff */
-	hi = 0;
-	base = VARINT_9_MIN;
-
-	val += ((uint64_t)buf[8]) << 56;
-eight:
-	val += ((uint64_t)buf[7]) << 48;
-seven:
-	val += ((uint64_t)buf[6]) << 40;
-six:
-	val += ((uint64_t)buf[5]) << 32;
-five:
-	val += ((uint64_t)buf[4]) << 24;
-four:
-	val += ((uint64_t)buf[3]) << 16;
-three:
-	val += ((uint64_t)buf[2]) << 8;
-two:
-	val += (uint64_t)buf[1];
-	val += hi;
-
-	return base + val;
-}
-
-noinline uint64_t dt_vint_size(uint64_t val)
-{
-	if (val <= VARINT_1_MAX)
-		return 1;
-	if (val <= VARINT_2_MAX)
-		return 2;
-	if (val <= VARINT_3_MAX)
-		return 3;
-	if (val <= VARINT_4_MAX)
-		return 4;
-	if (val <= VARINT_5_MAX)
-		return 5;
-	if (val <= VARINT_6_MAX)
-		return 6;
-	if (val <= VARINT_7_MAX)
-		return 7;
-	if (val <= VARINT_8_MAX)
-		return 8;
-
-	return 9;
-}
-
-noinline const char *dt_vint_skip(const char *str)
-{
-	const uint8_t	*buf = (const uint8_t *)str;
-	uint64_t	hi = buf[0];
-
-	if (hi < VARINT_1_PLIM)	 /* 0xxxxxxx -> 0x00 - 0x7f */
-		return &str[1];
-	if (hi < VARINT_2_PLIM)  /* 10xxxxxx -> 0x0080 - 0x407f */
-		return &str[2];
-	if (hi < VARINT_3_PLIM)  /* 110xxxxx -> 0x4080 - 0x20407f */
-		return &str[3];
-	if (hi < VARINT_4_PLIM)  /* 1110xxxx -> 0x204080 - 0x1020407f */
-		return &str[4];
-	if (hi < VARINT_5_PLIM)  /* 11110xxx -> 0x10204080 - 0x081020407f */
-		return &str[5];
-	if (hi < VARINT_6_PLIM)  /* 111110xx -> 0x0810204080 - 0x4081020407f */
-		return &str[6];
-	if (hi < VARINT_7_PLIM)  /* 1111110x -> 0x40810204080 - 0x204081020407f */
-		return &str[7];
-	if (hi < VARINT_8_PLIM)  /* 11111110 -> 0x2040810204080 - 0x10204081020407f */
-		return &str[8];
-
-	return &str[9];
-}
diff --git a/libdtrace/Build b/libdtrace/Build
index 3555c857..47e92736 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -61,7 +61,6 @@ libdtrace-build_SOURCES = dt_aggregate.c \
 			  dt_strtab.c \
 			  dt_subr.c \
 			  dt_symtab.c \
-			  dt_varint.c \
 			  dt_work.c \
 			  dt_xlator.c
 
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index b94f4485..67621be6 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -21,7 +21,7 @@
 #include <dt_printf.h>
 #include <dt_provider.h>
 #include <dt_probe.h>
-#include <dt_varint.h>
+#include <dt_string.h>
 #include <bpf_asm.h>
 
 static void dt_cg_node(dt_node_t *, dt_irlist_t *, dt_regset_t *);
@@ -749,7 +749,7 @@ dt_cg_memcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src, size_t size)
 static void
 dt_cg_strlen(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src)
 {
-	dt_ident_t	*idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_vint2int");
+	dt_ident_t	*idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_strlen");
 	size_t		size = yypcb->pcb_hdl->dt_options[DTRACEOPT_STRSIZE];
 	uint_t		lbl_ok = dt_irlist_label(dlp);
 
@@ -850,40 +850,31 @@ dt_cg_store_val(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind,
 
 		return 0;
 	} else if (dt_node_is_string(dnp)) {
-		dt_ident_t	*idp;
 		uint_t		size_ok = dt_irlist_label(dlp);
 		int		reg = dt_regset_alloc(drp);
 
 		off = dt_rec_add(pcb->pcb_hdl, dt_cg_fill_gap, kind,
 				 size, 1, pfp, arg);
 
-		/* Retrieve the length of the string.  */
+		/*
+		 * Retrieve the length of the string, and adjust for the
+		 * terminating NUL byte and the length prefix.
+		 */
 		dt_cg_strlen(dlp, drp, reg, dnp->dn_reg);
-
-		if (dt_regset_xalloc_args(drp) == -1)
-			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
-
-		/* Determine the number of bytes used for the length. */
-		emit(dlp,  BPF_MOV_REG(BPF_REG_1, reg));
-		idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_vint_size");
-		assert(idp != NULL);
-		dt_regset_xalloc(drp, BPF_REG_0);
-		emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
-
-		/* Add length of the string (adjusted for terminating byte). */
-		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, reg, 1));
-		emit(dlp,  BPF_ALU64_REG(BPF_ADD, BPF_REG_0, reg));
-		dt_regset_free(drp, reg);
+		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, reg, 1 + DT_STRLEN_BYTES));
 
 		/*
 		 * Copy string data (varint length + string content) to the
 		 * output buffer at [%r9 + off].  The amount of bytes copied is
 		 * the lesser of the data size and the maximum string size.
 		 */
+		if (dt_regset_xalloc_args(drp) == -1)
+			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
 		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_9));
 		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, off));
-		emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_0));
-		dt_regset_free(drp, BPF_REG_0);
+		emit(dlp,  BPF_MOV_REG(BPF_REG_2, reg));
+		dt_regset_free(drp, reg);
 		emit(dlp,  BPF_BRANCH_IMM(BPF_JLT, BPF_REG_2, size, size_ok));
 		emit(dlp,  BPF_MOV_IMM(BPF_REG_2, size));
 		emitl(dlp, size_ok,
@@ -2166,10 +2157,10 @@ dt_cg_store_var(dt_node_t *src, dt_irlist_t *dlp, dt_regset_t *drp,
 			 * DT_TOK_STRING.
 			 */
 			if (dt_node_is_string(src) &&
-			    src->dn_right->dn_op == DT_TOK_STRING) {
-				size = dt_node_type_size(src->dn_right);
-				size += dt_vint_size(size);
-			} else
+			    src->dn_right->dn_op == DT_TOK_STRING)
+				size = dt_node_type_size(src->dn_right) +
+				       DT_STRLEN_BYTES;
+			else
 				size = idp->di_size;
 
 			dt_cg_memcpy(dlp, drp, reg, src->dn_reg, size);
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index db0e1dea..f1601bab 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -17,7 +17,7 @@
 #include <dt_pcap.h>
 #include <dt_peb.h>
 #include <dt_state.h>
-#include <dt_varint.h>
+#include <dt_string.h>
 #include <libproc.h>
 #include <port.h>
 #include <sys/epoll.h>
@@ -1880,7 +1880,7 @@ dt_print_trace(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
 			return dt_print_rawbytes(dtp, fp, data, rec->dtrd_size);
 
 		/* We have a string.  Skip the length prefix and print it. */
-		s = (char *)dt_vint_skip(s);
+		s += DT_STRLEN_BYTES;
 		if (quiet)
 			return dt_printf(dtp, fp, "%s", s);
 		else
diff --git a/libdtrace/dt_printf.c b/libdtrace/dt_printf.c
index 703c37b3..5c6fcd49 100644
--- a/libdtrace/dt_printf.c
+++ b/libdtrace/dt_printf.c
@@ -16,7 +16,6 @@
 
 #include <dt_printf.h>
 #include <dt_string.h>
-#include <dt_varint.h>
 #include <dt_impl.h>
 
 /*ARGSUSED*/
@@ -490,7 +489,7 @@ pfprint_cstr(dtrace_hdl_t *dtp, FILE *fp, const char *format,
 
 	memcpy(s, addr, size);
 	s[size] = '\0';
-	s = (char *)dt_vint_skip(s);
+	s += DT_STRLEN_BYTES;
 	return dt_printf(dtp, fp, format, s);
 }
 
diff --git a/libdtrace/dt_string.c b/libdtrace/dt_string.c
index 9b7fea5a..e9eccb94 100644
--- a/libdtrace/dt_string.c
+++ b/libdtrace/dt_string.c
@@ -331,3 +331,20 @@ char *strrstr(const char *haystack, const char *needle)
 
 	return (char *)prev_s;
 }
+
+void
+dt_strlen_store(uint64_t val, char *str)
+{
+	uint8_t	*buf = (uint8_t *)str;
+
+	buf[0] = (uint8_t)(val >> 8);
+	buf[1] = (uint8_t)(val & 0xff);
+}
+
+uint64_t
+dt_strlen(const char *str)
+{
+	const uint8_t	*buf = (const uint8_t *)str;
+
+	return ((uint64_t)buf[0] << 8) + (uint64_t)buf[1];
+}
diff --git a/libdtrace/dt_string.h b/libdtrace/dt_string.h
index 5ff665ce..83af7551 100644
--- a/libdtrace/dt_string.h
+++ b/libdtrace/dt_string.h
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2004, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2004, 2021, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -27,6 +27,11 @@ extern char *strhyphenate(char *);
 extern char *strrstr(const char *, const char *);
 #endif
 
+#define DT_STRLEN_BYTES	2
+
+extern void dt_strlen_store(uint64_t val, char *str);
+extern uint64_t dt_strlen(const char *str);
+
 /*
  * To get around issues with strncpy:
  * - strncpy() use is generally discouraged due to:
diff --git a/libdtrace/dt_strtab.c b/libdtrace/dt_strtab.c
index 3ca83e62..9362f722 100644
--- a/libdtrace/dt_strtab.c
+++ b/libdtrace/dt_strtab.c
@@ -12,7 +12,6 @@
 
 #include <dt_strtab.h>
 #include <dt_string.h>
-#include <dt_varint.h>
 #include <dt_impl.h>
 
 static int
@@ -43,7 +42,6 @@ dt_strtab_create(size_t bufsz)
 {
 	dt_strtab_t	*sp = malloc(sizeof(dt_strtab_t));
 	uint_t		nbuckets = _dtrace_strbuckets;
-	int		n;
 
 	assert(bufsz != 0);
 
@@ -73,10 +71,10 @@ dt_strtab_create(size_t bufsz)
 	 * at offset 0.  We use this guarantee in dt_strtab_insert() and
 	 * dt_strtab_index().
 	 */
-	n = dt_int2vint(0, sp->str_ptr);
-	sp->str_ptr += n;
+	dt_strlen_store(0, sp->str_ptr);
+	sp->str_ptr += DT_STRLEN_BYTES;
 	*sp->str_ptr++ = '\0';
-	sp->str_size = n + 1;
+	sp->str_size = DT_STRLEN_BYTES + 1;
 	sp->str_nstrs = 1;
 
 	return sp;
@@ -129,7 +127,11 @@ dt_strtab_compare(dt_strtab_t *sp, dt_strhash_t *hp,
 		resid = sp->str_bufs[b] + sp->str_bufsz - buf;
 		n = MIN(resid, len);
 
-		if ((rv = strncmp(buf, str, n)) != 0)
+		if ((rv = buf[0] - str[0]) != 0)
+			return rv;
+		if ((rv = buf[1] - str[1]) != 0)
+			return rv;
+		if ((rv = strncmp(buf + 2, str + 2, n)) != 0)
 			return rv;
 
 		buf += n;
@@ -191,7 +193,7 @@ dt_strtab_xindex(dt_strtab_t *sp, const char *str, size_t len, ulong_t h)
 ssize_t
 dt_strtab_index(dt_strtab_t *sp, const char *str)
 {
-	size_t	plen, slen;
+	size_t	slen;
 	ssize_t	rc;
 	ulong_t	h;
 	char	*s;
@@ -200,15 +202,15 @@ dt_strtab_index(dt_strtab_t *sp, const char *str)
 		return 0;	/* The empty string is always at offset 0. */
 
 	slen = strlen(str);
-	s = malloc(VARINT_MAX_BYTES + slen + 1);
+	s = malloc(DT_STRLEN_BYTES + slen + 1);
 	if (s == NULL)
 		return -1L;
 
-	plen = dt_int2vint(slen, s);
-	memcpy(s + plen, str, slen + 1);
+	dt_strlen_store(slen, s);
+	memcpy(s + DT_STRLEN_BYTES, str, slen + 1);
 
 	h = str2hval(str, slen) % sp->str_hashsz;
-	rc = dt_strtab_xindex(sp, s, plen + slen, h);
+	rc = dt_strtab_xindex(sp, s, DT_STRLEN_BYTES + slen, h);
 	free(s);
 
 	return rc;
@@ -218,7 +220,7 @@ ssize_t
 dt_strtab_insert(dt_strtab_t *sp, const char *str)
 {
 	dt_strhash_t	*hp;
-	size_t		slen, plen;
+	size_t		slen;
 	ssize_t		off;
 	ulong_t		h;
 	char		*s;
@@ -227,15 +229,15 @@ dt_strtab_insert(dt_strtab_t *sp, const char *str)
 		return 0;	/* The empty string is always at offset 0. */
 
 	slen = strlen(str);
-	s = malloc(VARINT_MAX_BYTES + slen + 1);
+	s = malloc(DT_STRLEN_BYTES + slen + 1);
 	if (s == NULL)
 		return -1L;
 
-	plen = dt_int2vint(slen, s);
-	memcpy(s + plen, str, slen + 1);
+	dt_strlen_store(slen, s);
+	memcpy(s + DT_STRLEN_BYTES, str, slen + 1);
 
 	h = str2hval(str, slen) % sp->str_hashsz;
-	slen += plen;
+	slen += DT_STRLEN_BYTES;
 	off = dt_strtab_xindex(sp, s, slen, h);
 	if (off != -1) {
 		free(s);
diff --git a/libdtrace/dt_subr.c b/libdtrace/dt_subr.c
index eba6a81f..2c8a4a22 100644
--- a/libdtrace/dt_subr.c
+++ b/libdtrace/dt_subr.c
@@ -23,8 +23,8 @@
 #include <sys/ioctl.h>
 #include <port.h>
 
-#include <dt_varint.h>
 #include <dt_impl.h>
+#include <dt_string.h>
 #include <sys/dtrace.h>
 
 int
@@ -753,7 +753,7 @@ dt_difo_getstr(const dtrace_difo_t *dp, ssize_t idx)
 {
 	assert(idx < dp->dtdo_strlen);
 
-	return dt_vint_skip(&dp->dtdo_strtab[idx]);
+	return &dp->dtdo_strtab[idx] + DT_STRLEN_BYTES;
 }
 
 /*
diff --git a/libdtrace/dt_varint.c b/libdtrace/dt_varint.c
deleted file mode 100644
index 5eb51a77..00000000
--- a/libdtrace/dt_varint.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Oracle Linux DTrace.
- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
- * Licensed under the Universal Permissive License v 1.0 as shown at
- * http://oss.oracle.com/licenses/upl.
- */
-
-#include <sys/types.h>
-#include <stdint.h>
-#include <dt_varint.h>
-
-uint64_t
-dt_int2vint(uint64_t val, char *str)
-{
-	uint8_t	*buf = (uint8_t *)str;
-	int	len;
-
-	if (val <= VARINT_1_MAX) {
-		buf[0] = (uint8_t)val;
-		return 1;
-	} else if (val <= VARINT_2_MAX) {
-		val -= VARINT_2_MIN;
-		len = 2;
-		buf[0] = VARINT_2_PREFIX | (uint8_t)(val >> VARINT_2_SHIFT);
-		goto two;
-	} else if (val <= VARINT_3_MAX) {
-		val -= VARINT_3_MIN;
-		len = 3;
-		buf[0] = VARINT_3_PREFIX | (uint8_t)(val >> VARINT_3_SHIFT);
-		goto three;
-	} else if (val <= VARINT_4_MAX) {
-		val -= VARINT_4_MIN;
-		len = 4;
-		buf[0] = VARINT_4_PREFIX | (uint8_t)(val >> VARINT_4_SHIFT);
-		goto four;
-	} else if (val <= VARINT_5_MAX) {
-		val -= VARINT_5_MIN;
-		len = 5;
-		buf[0] = VARINT_5_PREFIX | (uint8_t)(val >> VARINT_5_SHIFT);
-		goto five;
-	} else if (val <= VARINT_6_MAX) {
-		val -= VARINT_6_MIN;
-		len = 6;
-		buf[0] = VARINT_6_PREFIX | (uint8_t)(val >> VARINT_6_SHIFT);
-		goto six;
-	} else if (val <= VARINT_7_MAX) {
-		val -= VARINT_7_MIN;
-		len = 7;
-		buf[0] = VARINT_7_PREFIX | (uint8_t)(val >> VARINT_7_SHIFT);
-		goto seven;
-	} else if (val <= VARINT_8_MAX) {
-		val -= VARINT_8_MIN;
-		len = 8;
-		buf[0] = VARINT_8_PREFIX | (uint8_t)(val >> VARINT_8_SHIFT);
-		goto eight;
-	}
-
-	val -= VARINT_9_MIN;
-	len = 9;
-	buf[0] = VARINT_9_PREFIX;
-	buf[8] = (uint8_t)((val >> 56) & 0xff);
-eight:
-	buf[7] = (uint8_t)((val >> 48) & 0xff);
-seven:
-	buf[6] = (uint8_t)((val >> 40) & 0xff);
-six:
-	buf[5] = (uint8_t)((val >> 32) & 0xff);
-five:
-	buf[4] = (uint8_t)((val >> 24) & 0xff);
-four:
-	buf[3] = (uint8_t)((val >> 16) & 0xff);
-three:
-	buf[2] = (uint8_t)((val >> 8) & 0xff);
-two:
-	buf[1] = (uint8_t)(val & 0xff);
-
-	return len;
-}
-
-uint64_t
-dt_vint2int(const char *str)
-{
-	const uint8_t	*buf = (const uint8_t *)str;
-	uint64_t	hi = buf[0];
-	uint64_t	base;
-	uint64_t	val = 0;
-
-	if (hi < VARINT_1_PLIM)	  /* 0xxxxxxx -> 0x00 - 0x7f */
-		return hi;
-	if (hi < VARINT_2_PLIM) { /* 10xxxxxx -> 0x0080 - 0x407f */
-		hi &= VARINT_HI_MASK(VARINT_2_PLIM);
-		hi <<= VARINT_2_SHIFT;
-		base = VARINT_2_MIN;
-		goto two;
-	}
-	if (hi < VARINT_3_PLIM) { /* 110xxxxx -> 0x4080 - 0x20407f */
-		hi &= VARINT_HI_MASK(VARINT_3_PLIM);
-		hi <<= VARINT_3_SHIFT;
-		base = VARINT_3_MIN;
-		goto three;
-	}
-	if (hi < VARINT_4_PLIM) { /* 1110xxxx -> 0x204080 - 0x1020407f */
-		hi &= VARINT_HI_MASK(VARINT_4_PLIM);
-		hi <<= VARINT_4_SHIFT;
-		base = VARINT_4_MIN;
-		goto four;
-	}
-	if (hi < VARINT_5_PLIM) { /* 11110xxx -> 0x10204080 - 0x081020407f */
-		hi &= VARINT_HI_MASK(VARINT_5_PLIM);
-		hi <<= VARINT_5_SHIFT;
-		base = VARINT_5_MIN;
-		goto five;
-	}
-	if (hi < VARINT_6_PLIM) { /* 111110xx -> 0x0810204080 - 0x4081020407f */
-		hi &= VARINT_HI_MASK(VARINT_6_PLIM);
-		hi <<= VARINT_6_SHIFT;
-		base = VARINT_6_MIN;
-		goto six;
-	}
-	if (hi < VARINT_7_PLIM) { /* 1111110x -> 0x40810204080 - 0x204081020407f */
-		hi &= VARINT_HI_MASK(VARINT_7_PLIM);
-		hi <<= VARINT_7_SHIFT;
-		base = VARINT_7_MIN;
-		goto seven;
-	}
-	if (hi < VARINT_8_PLIM) { /* 11111110 -> 0x2040810204080 - 0x10204081020407f */
-		hi = 0;
-		base = VARINT_8_MIN;
-		goto eight;
-	}
-
-	/* 11111111 -> 0x102040810204080 - 0xffffffffffffffff */
-	hi = 0;
-	base = VARINT_9_MIN;
-
-	val += ((uint64_t)buf[8]) << 56;
-eight:
-	val += ((uint64_t)buf[7]) << 48;
-seven:
-	val += ((uint64_t)buf[6]) << 40;
-six:
-	val += ((uint64_t)buf[5]) << 32;
-five:
-	val += ((uint64_t)buf[4]) << 24;
-four:
-	val += ((uint64_t)buf[3]) << 16;
-three:
-	val += ((uint64_t)buf[2]) << 8;
-two:
-	val += (uint64_t)buf[1];
-	val += hi;
-
-	return base + val;
-}
-
-uint64_t
-dt_vint_size(uint64_t val)
-{
-	if (val <= VARINT_1_MAX)
-		return 1;
-	if (val <= VARINT_2_MAX)
-		return 2;
-	if (val <= VARINT_3_MAX)
-		return 3;
-	if (val <= VARINT_4_MAX)
-		return 4;
-	if (val <= VARINT_5_MAX)
-		return 5;
-	if (val <= VARINT_6_MAX)
-		return 6;
-	if (val <= VARINT_7_MAX)
-		return 7;
-	if (val <= VARINT_8_MAX)
-		return 8;
-
-	return 9;
-}
-
-const char *
-dt_vint_skip(const char *str)
-{
-	const uint8_t	*buf = (const uint8_t *)str;
-	uint64_t	hi = buf[0];
-
-	if (hi < VARINT_1_PLIM)	 /* 0xxxxxxx -> 0x00 - 0x7f */
-		return &str[1];
-	if (hi < VARINT_2_PLIM)  /* 10xxxxxx -> 0x0080 - 0x407f */
-		return &str[2];
-	if (hi < VARINT_3_PLIM)  /* 110xxxxx -> 0x4080 - 0x20407f */
-		return &str[3];
-	if (hi < VARINT_4_PLIM)  /* 1110xxxx -> 0x204080 - 0x1020407f */
-		return &str[4];
-	if (hi < VARINT_5_PLIM)  /* 11110xxx -> 0x10204080 - 0x081020407f */
-		return &str[5];
-	if (hi < VARINT_6_PLIM)  /* 111110xx -> 0x0810204080 - 0x4081020407f */
-		return &str[6];
-	if (hi < VARINT_7_PLIM)  /* 1111110x -> 0x40810204080 - 0x204081020407f */
-		return &str[7];
-	if (hi < VARINT_8_PLIM)  /* 11111110 -> 0x2040810204080 - 0x10204081020407f */
-		return &str[8];
-
-	return &str[9];
-}
diff --git a/libdtrace/dt_varint.h b/libdtrace/dt_varint.h
deleted file mode 100644
index 7eaeaa2b..00000000
--- a/libdtrace/dt_varint.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Oracle Linux DTrace.
- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
- * Licensed under the Universal Permissive License v 1.0 as shown at
- * http://oss.oracle.com/licenses/upl.
- */
-
-#ifndef	_DT_VARINT_H
-#define	_DT_VARINT_H
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-/*
- * Variable-length integers
- *
- * These functions convert between uint64_t integers and strings of 1-9
- * bytes.  The first 1<<7 integers are stored in a single byte with leading
- * bit 0.  The next 1<<14 integers are stored in two bytes with leading bits
- * 1 and 0.  And so on.  Here are the ranges of integers:
- *
- *      minimum integer    # of    leading    # of bits
- *        in this range    bytes     byte     left over       # of integers
- *                                (in bits)   for values      in this range
- *
- *                    0      1     0???????    1*8-1= 7 0x80
- *                 0x80      2     10??????    2*8-2=14 0x4000
- *               0x4080      3     110?????    3*8-3=21 0x200000
- *             0x204080      4     1110????    4*8-4=28 0x10000000
- *           0x10204080      5     11110???    5*8-5=35 0x800000000
- *          0x810204080      6     111110??    6*8-6=42 0x40000000000
- *        0x40810204080      7     1111110?    7*8-7=49 0x2000000000000
- *      0x2040810204080      8     11111110    8*8-8=56 0x100000000000000
- *    0x102040810204080      9     11111111    9*8-8=64        1 << 64
- *
- * If n is the number of bytes:
- *   VARINT_$n_PREFIX  =  leading byte (with 0 for ?), shown above
- *   VARINT_$n_PLIM    =  VARINT_${n+1}_PREFIX
- *   VARINT_$n_SHIFT   =  8*(n-1)
- *   VARINT_$n_MIN     =  VARINT_${n-1}_MAX + 1   with VARINT_1_MIN = 0
- *   VARINT_$n_MAX     =  inclusive maximum
- *
- * Notice that since we go up to at most 9 bytes.  So if the first 8 bits
- * are 1s, we we know the next 8 bytes represent the value.
- */
-#define VARINT_HI_MASK(b)	((uint8_t)~(b))
-
-#define VARINT_1_PREFIX		(uint8_t)0x00
-#define VARINT_1_PLIM		0x80
-#define VARINT_1_SHIFT		0
-#define VARINT_1_MIN		0
-#define VARINT_1_MAX		0x7f
-
-#define VARINT_2_PREFIX		(uint8_t)0x80
-#define VARINT_2_PLIM		0xc0
-#define VARINT_2_SHIFT		8
-#define VARINT_2_MIN		(VARINT_1_MAX + 1)
-#define VARINT_2_MAX		0x407f
-
-#define VARINT_3_PREFIX		(uint8_t)0xc0
-#define VARINT_3_PLIM		0xe0
-#define VARINT_3_SHIFT		16
-#define VARINT_3_MIN		(VARINT_2_MAX + 1)
-#define VARINT_3_MAX		0x20407f
-
-#define VARINT_4_PREFIX		(uint8_t)0xe0
-#define VARINT_4_PLIM		0xf0
-#define VARINT_4_SHIFT		24
-#define VARINT_4_MIN		(VARINT_3_MAX + 1)
-#define VARINT_4_MAX		0x1020407f
-
-#define VARINT_5_PREFIX		(uint8_t)0xf0
-#define VARINT_5_PLIM		0xf8
-#define VARINT_5_SHIFT		32
-#define VARINT_5_MIN		(VARINT_4_MAX + 1)
-#define VARINT_5_MAX		0x081020407f
-
-#define VARINT_6_PREFIX		(uint8_t)0xf8
-#define VARINT_6_PLIM		0xfc
-#define VARINT_6_SHIFT		40
-#define VARINT_6_MIN		(VARINT_5_MAX + 1)
-#define VARINT_6_MAX		0x04081020407f
-
-#define VARINT_7_PREFIX		(uint8_t)0xfc
-#define VARINT_7_PLIM		0xfe
-#define VARINT_7_SHIFT		48
-#define VARINT_7_MIN		(VARINT_6_MAX + 1)
-#define VARINT_7_MAX		0x0204081020407f
-
-#define VARINT_8_PREFIX		(uint8_t)0xfe
-#define VARINT_8_PLIM		0xff
-#define VARINT_8_SHIFT		56
-#define VARINT_8_MIN		(VARINT_7_MAX + 1)
-#define VARINT_8_MAX		0x010204081020407f
-
-#define VARINT_9_PREFIX		(uint8_t)0xff
-#define VARINT_9_PLIM		0xff
-#define VARINT_9_SHIFT		0
-#define VARINT_9_MIN		(VARINT_8_MAX + 1)
-#define VARINT_9_MAX		0xffffffffffffffff
-
-#define VARINT_MAX_BYTES	9
-
-extern uint64_t dt_int2vint(uint64_t num, char *str);
-extern uint64_t dt_vint2int(const char *str);
-extern uint64_t dt_vint_size(uint64_t val);
-extern const char *dt_vint_skip(const char *str);
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif	/* _DT_VARINT_H */
diff --git a/test/internals/tst.varint.sh b/test/internals/tst.varint.sh
deleted file mode 100755
index 390a4414..00000000
--- a/test/internals/tst.varint.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-#
-# Oracle Linux DTrace.
-# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-# Licensed under the Universal Permissive License v 1.0 as shown at
-# http://oss.oracle.com/licenses/upl.
-#
-
-exec test/utils/tst.varint
diff --git a/test/utils/Build b/test/utils/Build
index 8cfec179..3204d054 100644
--- a/test/utils/Build
+++ b/test/utils/Build
@@ -3,7 +3,7 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 
-TEST_UTILS = baddof badioctl showUSDT print-stack-layout tst.varint
+TEST_UTILS = baddof badioctl showUSDT print-stack-layout
 
 define test-util-template
 CMDS += $(1)
diff --git a/test/utils/tst.varint.c b/test/utils/tst.varint.c
deleted file mode 100644
index 4f80d3c2..00000000
--- a/test/utils/tst.varint.c
+++ /dev/null
@@ -1,97 +0,0 @@
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <dt_varint.h>
-
-void
-check(uint64_t val, int exp)
-{
-	char		s[VARINT_MAX_BYTES];
-	const char	*p;
-	int		rc, len;
-	uint64_t	dval;
-
-	rc = dt_int2vint(val, s);
-	if (rc != exp) {
-		printf("Length wrong for %lu: %d vs %d\n", val, rc, exp);
-		exit(1);
-	}
-	len = dt_vint_size(val);
-	if (len != exp) {
-		printf("Size wrong for %lu: %d vs %d\n", val, len, exp);
-		exit(1);
-	}
-	p = dt_vint_skip(s);
-	if (!p) {
-		printf("Skip wrong for %lu: %d vs %d\n", val, 0, exp);
-		exit(1);
-	} else if ((p - s) != exp) {
-		printf("Skip wrong for %lu: %ld vs %d\n", val, p - s, exp);
-		exit(1);
-	}
-	dval = dt_vint2int(s);
-	if (dval != val) {
-		printf("Value decode error (%d byte prefix): %lx vs %lx\n",
-		       rc, dval, val);
-		exit(1);
-	}
-}
-
-void
-check_range(uint64_t lo, uint64_t hi, int len)
-{
-	uint64_t	val;
-
-	/* taste test!  Here are two styles to choose from: */
-#if 0
-	for (val = lo - 10000; val < lo; val++)
-		check(val, len-1);
-	for (val = lo; val < lo + 10000; val++)
-		check(val, len);
-	for (val = hi - 10000; val <= hi; val++)
-		check(val, len);
-	for (val = hi + 1; val < hi + 10000; val++)
-		check(val, len + 1);
-#else
-	for (val = lo - 10000; val < lo + 10000; val++)
-		check(val, val < lo ? len-1 : len);
-	for (val = hi - 10000; val < hi + 10000; val++)
-		check(val, val <= hi ? len : len + 1);
-#endif
-}
-
-int
-main(void)
-{
-	uint64_t	val;
-
-	/* First range: we go through all 16-bit values. */
-	for (val = 0; val <= VARINT_1_MAX; val++)
-		check(val, 1);
-	for (val = VARINT_1_MAX + 1; val <= VARINT_2_MAX; val++)
-		check(val, 2);
-	for (val = VARINT_2_MAX + 1; val < 0xffff; val++)
-		check(val, 3);
-
-	/* For higher ranges, verify the low and high boundary ranges. */
-	check_range(VARINT_3_MIN, VARINT_3_MAX, 3);
-	check_range(VARINT_4_MIN, VARINT_4_MAX, 4);
-	check_range(VARINT_5_MIN, VARINT_5_MAX, 5);
-	check_range(VARINT_6_MIN, VARINT_6_MAX, 6);
-	check_range(VARINT_7_MIN, VARINT_7_MAX, 7);
-	check_range(VARINT_8_MIN, VARINT_8_MAX, 8);
-
-	/* Verify the final range. */
-	for (val = VARINT_9_MIN - 10000; val < VARINT_9_MIN; val++)
-		check(val, 8);
-	for (val = VARINT_9_MIN; val < VARINT_9_MIN + 10000; val++)
-		check(val, 9);
-	for (val = VARINT_9_MAX - 10000; val < VARINT_9_MAX; val++) {
-		check(val, 9);
-	}
-	check(VARINT_9_MAX, 9);
-
-	return 0;
-}
-
-#include "dt_varint.c"
-- 
2.33.0




More information about the DTrace-devel mailing list