[DTrace-devel] [PATCH 12/20] Implement variable length integer support

Kris Van Hees kris.van.hees at oracle.com
Tue Jun 1 22:48:05 PDT 2021


This patch provides an implementation of variable-length integers,
both in libdtrace and as precompiled BPF code.  It is based on the
PrefixVarint algorithm.  Unsigned integers are encoded as follows:

  0x0 - 0x7f		1 byte:  0xxxxxxx
  0x80 - 0x407f		2 bytes: 10xxxxxx xxxxxxxx
  0x4080 - 0x20407f	3 bytes: 110xxxxx xxxxxxxx xxxxxxxx

and so on.  This algorithm ensures that there is exactly one
representation for a given unsigned integers.  It also ensures that
the number of bytes in the variable length representation can be
determined based on the first byte.  This makes decoding simpler.

The following functions are provided:

  int dt_int2vint(uint64_t val, char *str)
	Store a 64-bit unsigned integer in the provided string and
	return the number of bytes used.

  uint64_t dt_vint2int(const char *str)
	Retrieve a 64-bit unsigned integer from a variable length
	integer in the provided string.

  int dt_vint_size(uint64_t val)
	Return the number of bytes necessary to represent the given
	64-bit unsigned integer as a variable length integer.

  const char *dt_vint_skip(const char *str)
	Skip over the variable length integer at the start of the
	given string and return a pointer to the first byte beyond
	the variable length integer.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 bpf/Build             |   3 +-
 bpf/varint.c          | 199 +++++++++++++++++++++++++++++++++++++++++
 libdtrace/Build       |   1 +
 libdtrace/dt_varint.c | 203 ++++++++++++++++++++++++++++++++++++++++++
 libdtrace/dt_varint.h |  82 +++++++++++++++++
 5 files changed, 487 insertions(+), 1 deletion(-)
 create mode 100644 bpf/varint.c
 create mode 100644 libdtrace/dt_varint.c
 create mode 100644 libdtrace/dt_varint.h

diff --git a/bpf/Build b/bpf/Build
index 5133a2f2..a436bc96 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -27,7 +27,8 @@ bpf_dlib_SOURCES = \
 	get_tvar.c set_tvar.c \
 	memcpy.c \
 	probe_error.c \
-	strnlen.c
+	strnlen.c \
+	varint.c
 
 bpf-check: $(objdir)/include/.dir.stamp
 	$(BPFC) $(BPFCPPFLAGS) $(bpf_dlib_CPPFLAGS) $(BPFCFLAGS) -S \
diff --git a/bpf/varint.c b/bpf/varint.c
new file mode 100644
index 00000000..2b525cd1
--- /dev/null
+++ b/bpf/varint.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+#include <stdint.h>
+#include <dt_varint.h>
+
+#ifndef noinline
+# define noinline	__attribute__((noinline))
+#endif
+
+noinline int dt_int2vint(uint64_t val, char *str)
+{
+	int	len;
+	uint8_t	*buf = (uint8_t *)str;
+
+	if (val <= VARINT_1_MAX) {
+		buf[0] = (uint8_t)val;
+		return 1;
+	} else if (val <= VARINT_2_MAX) {
+		val -= VARINT_2_MIN;
+		len = 2;
+		buf[0] = VARINT_2_PREFIX | (uint8_t)(val >> VARINT_2_SHIFT);
+		goto two;
+	} else if (val <= VARINT_3_MAX) {
+		val -= VARINT_3_MIN;
+		len = 3;
+		buf[0] = VARINT_3_PREFIX | (uint8_t)(val >> VARINT_3_SHIFT);
+		goto three;
+	} else if (val <= VARINT_4_MAX) {
+		val -= VARINT_4_MIN;
+		len = 4;
+		buf[0] = VARINT_4_PREFIX | (uint8_t)(val >> VARINT_4_SHIFT);
+		goto four;
+	} else if (val <= VARINT_5_MAX) {
+		val -= VARINT_5_MIN;
+		len = 5;
+		buf[0] = VARINT_5_PREFIX | (uint8_t)(val >> VARINT_5_SHIFT);
+		goto five;
+	} else if (val <= VARINT_6_MAX) {
+		val -= VARINT_6_MIN;
+		len = 6;
+		buf[0] = VARINT_6_PREFIX | (uint8_t)(val >> VARINT_6_SHIFT);
+		goto six;
+	} else if (val <= VARINT_7_MAX) {
+		val -= VARINT_7_MIN;
+		len = 7;
+		buf[0] = VARINT_7_PREFIX | (uint8_t)(val >> VARINT_7_SHIFT);
+		goto seven;
+	} else if (val <= VARINT_8_MAX) {
+		val -= VARINT_8_MIN;
+		len = 8;
+		buf[0] = VARINT_8_PREFIX | (uint8_t)(val >> VARINT_8_SHIFT);
+		goto eight;
+	}
+
+	val -= VARINT_9_MIN;
+	len = 9;
+	buf[0] = VARINT_9_PREFIX;
+	buf[8] = (uint8_t)((val >> 56) & 0xff);
+eight:
+	buf[7] = (uint8_t)((val >> 48) & 0xff);
+seven:
+	buf[6] = (uint8_t)((val >> 40) & 0xff);
+six:
+	buf[5] = (uint8_t)((val >> 32) & 0xff);
+five:
+	buf[4] = (uint8_t)((val >> 24) & 0xff);
+four:
+	buf[3] = (uint8_t)((val >> 16) & 0xff);
+three:
+	buf[2] = (uint8_t)((val >> 8) & 0xff);
+two:
+	buf[1] = (uint8_t)(val & 0xff);
+
+	return len;
+}
+
+noinline uint64_t dt_vint2int(const char *str)
+{
+	const uint8_t	*buf = (const uint8_t *)str;
+	uint64_t	hi = buf[0];
+	uint64_t	bs;
+	uint64_t	len = 0;
+
+	if (hi < VARINT_1_PLIM)	  /* 0xxxxxxx -> 0x00 - 0x7f */
+		return hi;
+	if (hi < VARINT_2_PLIM) { /* 10xxxxxx -> 0x0080 - 0x407f */
+		hi &= VARINT_HI_MASK(VARINT_2_PLIM);
+		hi <<= VARINT_2_SHIFT;
+		bs = VARINT_2_MIN;
+		goto two;
+	}
+	if (hi < VARINT_3_PLIM) { /* 110xxxxx -> 0x4080 - 0x20407f */
+		hi &= VARINT_HI_MASK(VARINT_3_PLIM);
+		hi <<= VARINT_3_SHIFT;
+		bs = VARINT_3_MIN;
+		goto three;
+	}
+	if (hi < VARINT_4_PLIM) { /* 1110xxxx -> 0x204080 - 0x1020407f */
+		hi &= VARINT_HI_MASK(VARINT_4_PLIM);
+		hi <<= VARINT_4_SHIFT;
+		bs = VARINT_4_MIN;
+		goto four;
+	}
+	if (hi < VARINT_5_PLIM) { /* 11110xxx -> 0x10204080 - 0x081020407f */
+		hi &= VARINT_HI_MASK(VARINT_5_PLIM);
+		hi <<= VARINT_5_SHIFT;
+		bs = VARINT_5_MIN;
+		goto five;
+	}
+	if (hi < VARINT_6_PLIM) { /* 111110xx -> 0x0810204080 -> 0x4081020407f */
+		hi &= VARINT_HI_MASK(VARINT_6_PLIM);
+		hi <<= VARINT_6_SHIFT;
+		bs = VARINT_6_MIN;
+		goto six;
+	}
+	if (hi < VARINT_7_PLIM) { /* 1111110x -> 0x40810204080 -> 0x204081020407f */
+		hi &= VARINT_HI_MASK(VARINT_7_PLIM);
+		hi <<= VARINT_7_SHIFT;
+		bs = VARINT_7_MIN;
+		goto seven;
+	}
+	if (hi < VARINT_8_PLIM) { /* 11111110 -> 0x2040810204080 -> 0x10204081020407f */
+		hi = 0;
+		bs = VARINT_8_MIN;
+		goto eight;
+	}
+
+	/* 11111111 -> 0x102040810204080 - 0xffffffffffffffff */
+	hi = 0;
+	bs = VARINT_9_MIN;
+
+	len += ((uint64_t)buf[8]) << 56;
+eight:
+	len += ((uint64_t)buf[7]) << 48;
+seven:
+	len += ((uint64_t)buf[6]) << 40;
+six:
+	len += ((uint64_t)buf[5]) << 32;
+five:
+	len += ((uint64_t)buf[4]) << 24;
+four:
+	len += ((uint64_t)buf[3]) << 16;
+three:
+	len += ((uint64_t)buf[2]) << 8;
+two:
+	len += (uint64_t)buf[1];
+	len += hi;
+
+	return bs + len;
+}
+
+noinline int dt_vint_size(uint64_t val)
+{
+	if (val <= VARINT_1_MAX)
+		return 1;
+	if (val <= VARINT_2_MAX)
+		return 2;
+	if (val <= VARINT_3_MAX)
+		return 3;
+	if (val <= VARINT_4_MAX)
+		return 4;
+	if (val <= VARINT_5_MAX)
+		return 5;
+	if (val <= VARINT_6_MAX)
+		return 6;
+	if (val <= VARINT_7_MAX)
+		return 7;
+	if (val <= VARINT_8_MAX)
+		return 8;
+
+	return 9;
+}
+
+noinline const char *dt_vint_skip(const char *str)
+{
+	const uint8_t	*buf = (const uint8_t *)str;
+	uint64_t	hi = buf[0];
+
+	if (hi < VARINT_1_PLIM)	 /* 0xxxxxxx -> 0x00 - 0x7f */
+		return &str[1];
+	if (hi < VARINT_2_PLIM)  /* 10xxxxxx -> 0x0080 - 0x407f */
+		return &str[2];
+	if (hi < VARINT_3_PLIM)  /* 110xxxxx -> 0x4080 - 0x20407f */
+		return &str[3];
+	if (hi < VARINT_4_PLIM)  /* 1110xxxx -> 0x204080 - 0x1020407f */
+		return &str[4];
+	if (hi < VARINT_5_PLIM)  /* 11110xxx -> 0x10204080 - 0x081020407f */
+		return &str[5];
+	if (hi < VARINT_6_PLIM)  /* 111110xx -> 0x0810204080 -> 0x4081020407f */
+		return &str[6];
+	if (hi < VARINT_7_PLIM)  /* 1111110x -> 0x40810204080 -> 0x204081020407f */
+		return &str[7];
+	if (hi < VARINT_8_PLIM)  /* 11111110 -> 0x2040810204080 -> 0x10204081020407f */
+		return &str[8];
+
+	return &str[9];
+}
diff --git a/libdtrace/Build b/libdtrace/Build
index 47e92736..3555c857 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -61,6 +61,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
 			  dt_strtab.c \
 			  dt_subr.c \
 			  dt_symtab.c \
+			  dt_varint.c \
 			  dt_work.c \
 			  dt_xlator.c
 
diff --git a/libdtrace/dt_varint.c b/libdtrace/dt_varint.c
new file mode 100644
index 00000000..72801acd
--- /dev/null
+++ b/libdtrace/dt_varint.c
@@ -0,0 +1,203 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <dt_varint.h>
+
+int
+dt_int2vint(uint64_t val, char *str)
+{
+	uint8_t	*buf = (uint8_t *)str;
+	int	len;
+
+	if (val <= VARINT_1_MAX) {
+		buf[0] = (uint8_t)val;
+		return 1;
+	} else if (val <= VARINT_2_MAX) {
+		val -= VARINT_2_MIN;
+		len = 2;
+		buf[0] = VARINT_2_PREFIX | (uint8_t)(val >> VARINT_2_SHIFT);
+		goto two;
+	} else if (val <= VARINT_3_MAX) {
+		val -= VARINT_3_MIN;
+		len = 3;
+		buf[0] = VARINT_3_PREFIX | (uint8_t)(val >> VARINT_3_SHIFT);
+		goto three;
+	} else if (val <= VARINT_4_MAX) {
+		val -= VARINT_4_MIN;
+		len = 4;
+		buf[0] = VARINT_4_PREFIX | (uint8_t)(val >> VARINT_4_SHIFT);
+		goto four;
+	} else if (val <= VARINT_5_MAX) {
+		val -= VARINT_5_MIN;
+		len = 5;
+		buf[0] = VARINT_5_PREFIX | (uint8_t)(val >> VARINT_5_SHIFT);
+		goto five;
+	} else if (val <= VARINT_6_MAX) {
+		val -= VARINT_6_MIN;
+		len = 6;
+		buf[0] = VARINT_6_PREFIX | (uint8_t)(val >> VARINT_6_SHIFT);
+		goto six;
+	} else if (val <= VARINT_7_MAX) {
+		val -= VARINT_7_MIN;
+		len = 7;
+		buf[0] = VARINT_7_PREFIX | (uint8_t)(val >> VARINT_7_SHIFT);
+		goto seven;
+	} else if (val <= VARINT_8_MAX) {
+		val -= VARINT_8_MIN;
+		len = 8;
+		buf[0] = VARINT_8_PREFIX | (uint8_t)(val >> VARINT_8_SHIFT);
+		goto eight;
+	}
+
+	val -= VARINT_9_MIN;
+	len = 9;
+	buf[0] = VARINT_9_PREFIX;
+	buf[8] = (uint8_t)((val >> 56) & 0xff);
+eight:
+	buf[7] = (uint8_t)((val >> 48) & 0xff);
+seven:
+	buf[6] = (uint8_t)((val >> 40) & 0xff);
+six:
+	buf[5] = (uint8_t)((val >> 32) & 0xff);
+five:
+	buf[4] = (uint8_t)((val >> 24) & 0xff);
+four:
+	buf[3] = (uint8_t)((val >> 16) & 0xff);
+three:
+	buf[2] = (uint8_t)((val >> 8) & 0xff);
+two:
+	buf[1] = (uint8_t)(val & 0xff);
+
+	return len;
+}
+
+uint64_t
+dt_vint2int(const char *str)
+{
+	const uint8_t	*buf = (const uint8_t *)str;
+	uint64_t	hi = buf[0];
+	uint64_t	bs;
+	uint64_t	len = 0;
+
+	if (hi < VARINT_1_PLIM)	  /* 0xxxxxxx -> 0x00 - 0x7f */
+		return hi;
+	if (hi < VARINT_2_PLIM) { /* 10xxxxxx -> 0x0080 - 0x407f */
+		hi &= VARINT_HI_MASK(VARINT_2_PLIM);
+		hi <<= VARINT_2_SHIFT;
+		bs = VARINT_2_MIN;
+		goto two;
+	}
+	if (hi < VARINT_3_PLIM) { /* 110xxxxx -> 0x4080 - 0x20407f */
+		hi &= VARINT_HI_MASK(VARINT_3_PLIM);
+		hi <<= VARINT_3_SHIFT;
+		bs = VARINT_3_MIN;
+		goto three;
+	}
+	if (hi < VARINT_4_PLIM) { /* 1110xxxx -> 0x204080 - 0x1020407f */
+		hi &= VARINT_HI_MASK(VARINT_4_PLIM);
+		hi <<= VARINT_4_SHIFT;
+		bs = VARINT_4_MIN;
+		goto four;
+	}
+	if (hi < VARINT_5_PLIM) { /* 11110xxx -> 0x10204080 - 0x081020407f */
+		hi &= VARINT_HI_MASK(VARINT_5_PLIM);
+		hi <<= VARINT_5_SHIFT;
+		bs = VARINT_5_MIN;
+		goto five;
+	}
+	if (hi < VARINT_6_PLIM) { /* 111110xx -> 0x0810204080 -> 0x4081020407f */
+		hi &= VARINT_HI_MASK(VARINT_6_PLIM);
+		hi <<= VARINT_6_SHIFT;
+		bs = VARINT_6_MIN;
+		goto six;
+	}
+	if (hi < VARINT_7_PLIM) { /* 1111110x -> 0x40810204080 -> 0x204081020407f */
+		hi &= VARINT_HI_MASK(VARINT_7_PLIM);
+		hi <<= VARINT_7_SHIFT;
+		bs = VARINT_7_MIN;
+		goto seven;
+	}
+	if (hi < VARINT_8_PLIM) { /* 11111110 -> 0x2040810204080 -> 0x10204081020407f */
+		hi = 0;
+		bs = VARINT_8_MIN;
+		goto eight;
+	}
+
+	/* 11111111 -> 0x102040810204080 - 0xffffffffffffffff */
+	hi = 0;
+	bs = VARINT_9_MIN;
+
+	len += ((uint64_t)buf[8]) << 56;
+eight:
+	len += ((uint64_t)buf[7]) << 48;
+seven:
+	len += ((uint64_t)buf[6]) << 40;
+six:
+	len += ((uint64_t)buf[5]) << 32;
+five:
+	len += ((uint64_t)buf[4]) << 24;
+four:
+	len += ((uint64_t)buf[3]) << 16;
+three:
+	len += ((uint64_t)buf[2]) << 8;
+two:
+	len += (uint64_t)buf[1];
+	len += hi;
+
+	return bs + len;
+}
+
+int
+dt_vint_size(uint64_t val)
+{
+	if (val <= VARINT_1_MAX)
+		return 1;
+	if (val <= VARINT_2_MAX)
+		return 2;
+	if (val <= VARINT_3_MAX)
+		return 3;
+	if (val <= VARINT_4_MAX)
+		return 4;
+	if (val <= VARINT_5_MAX)
+		return 5;
+	if (val <= VARINT_6_MAX)
+		return 6;
+	if (val <= VARINT_7_MAX)
+		return 7;
+	if (val <= VARINT_8_MAX)
+		return 8;
+
+	return 9;
+}
+
+const char *
+dt_vint_skip(const char *str)
+{
+	const uint8_t	*buf = (const uint8_t *)str;
+	uint64_t	hi = buf[0];
+
+	if (hi < VARINT_1_PLIM)	 /* 0xxxxxxx -> 0x00 - 0x7f */
+		return &str[1];
+	if (hi < VARINT_2_PLIM)  /* 10xxxxxx -> 0x0080 - 0x407f */
+		return &str[2];
+	if (hi < VARINT_3_PLIM)  /* 110xxxxx -> 0x4080 - 0x20407f */
+		return &str[3];
+	if (hi < VARINT_4_PLIM)  /* 1110xxxx -> 0x204080 - 0x1020407f */
+		return &str[4];
+	if (hi < VARINT_5_PLIM)  /* 11110xxx -> 0x10204080 - 0x081020407f */
+		return &str[5];
+	if (hi < VARINT_6_PLIM)  /* 111110xx -> 0x0810204080 -> 0x4081020407f */
+		return &str[6];
+	if (hi < VARINT_7_PLIM)  /* 1111110x -> 0x40810204080 -> 0x204081020407f */
+		return &str[7];
+	if (hi < VARINT_8_PLIM)  /* 11111110 -> 0x2040810204080 -> 0x10204081020407f */
+		return &str[8];
+
+	return &str[9];
+}
diff --git a/libdtrace/dt_varint.h b/libdtrace/dt_varint.h
new file mode 100644
index 00000000..fe68153b
--- /dev/null
+++ b/libdtrace/dt_varint.h
@@ -0,0 +1,82 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+#ifndef	_DT_VARINT_H
+#define	_DT_VARINT_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define VARINT_HI_MASK(b)	((uint8_t)~(b))
+
+#define VARINT_1_PREFIX		(uint8_t)0x00
+#define VARINT_1_PLIM		0x80
+#define VARINT_1_SHIFT		0
+#define VARINT_1_MIN		0
+#define VARINT_1_MAX		0x7f
+
+#define VARINT_2_PREFIX		(uint8_t)0x80
+#define VARINT_2_PLIM		0xc0
+#define VARINT_2_SHIFT		8
+#define VARINT_2_MIN		(VARINT_1_MAX + 1)
+#define VARINT_2_MAX		0x407f
+
+#define VARINT_3_PREFIX		(uint8_t)0xc0
+#define VARINT_3_PLIM		0xe0
+#define VARINT_3_SHIFT		16
+#define VARINT_3_MIN		(VARINT_2_MAX + 1)
+#define VARINT_3_MAX		0x20407f
+
+#define VARINT_4_PREFIX		(uint8_t)0xe0
+#define VARINT_4_PLIM		0xf0
+#define VARINT_4_SHIFT		24
+#define VARINT_4_MIN		(VARINT_3_MAX + 1)
+#define VARINT_4_MAX		0x1020407f
+
+#define VARINT_5_PREFIX		(uint8_t)0xf0
+#define VARINT_5_PLIM		0xf8
+#define VARINT_5_SHIFT		32
+#define VARINT_5_MIN		(VARINT_4_MAX + 1)
+#define VARINT_5_MAX		0x081020407f
+
+#define VARINT_6_PREFIX		(uint8_t)0xf8
+#define VARINT_6_PLIM		0xfc
+#define VARINT_6_SHIFT		40
+#define VARINT_6_MIN		(VARINT_5_MAX + 1)
+#define VARINT_6_MAX		0x04081020407f
+
+#define VARINT_7_PREFIX		(uint8_t)0xfc
+#define VARINT_7_PLIM		0xfe
+#define VARINT_7_SHIFT		48
+#define VARINT_7_MIN		(VARINT_6_MAX + 1)
+#define VARINT_7_MAX		0x0204081020407f
+
+#define VARINT_8_PREFIX		(uint8_t)0xfe
+#define VARINT_8_PLIM		0xff
+#define VARINT_8_SHIFT		56
+#define VARINT_8_MIN		(VARINT_7_MAX + 1)
+#define VARINT_8_MAX		0x010204081020407f
+
+#define VARINT_9_PREFIX		(uint8_t)0xff
+#define VARINT_9_PLIM		0xff
+#define VARINT_9_SHIFT		0
+#define VARINT_9_MIN		(VARINT_8_MAX + 1)
+#define VARINT_9_MAX		0xffffffffffffffff
+
+#define VARINT_MAX_BYTES	9
+
+extern int dt_int2vint(uint64_t num, char *str);
+extern uint64_t dt_vint2int(const char *str);
+extern int dt_vint_size(uint64_t val);
+extern const char *dt_vint_skip(const char *str);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _DT_VARINT_H */
-- 
2.31.1




More information about the DTrace-devel mailing list