[DTrace-devel] [PATCH v3] Add support for strtok() subroutine

eugene.loh at oracle.com eugene.loh at oracle.com
Thu Dec 2 20:18:08 UTC 2021


From: Eugene Loh <eugene.loh at oracle.com>

Internal state holds a copy of the string being parsed, along with an
offset where in the string the next strtok(NULL, ...) call should start.
This state cannot overlap the scratch memory used for stack() calls.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 bpf/Build                                     |   1 +
 bpf/strtok.S                                  | 498 ++++++++++++++++++
 libdtrace/dt_bpf.c                            |  17 +-
 libdtrace/dt_cg.c                             | 103 +++-
 libdtrace/dt_dctx.h                           |   2 +
 libdtrace/dt_dlibs.c                          |   1 +
 test/stress/fbtsafety/tst.shortstr.d          |   1 -
 test/unittest/funcs/{ => strtok}/tst.strtok.d |   0
 test/unittest/funcs/{ => strtok}/tst.strtok.r |   0
 test/unittest/funcs/strtok/tst.strtok2.d      |  39 ++
 test/unittest/funcs/strtok/tst.strtok2.r      |   9 +
 test/unittest/funcs/strtok/tst.strtok_long.d  |  35 ++
 test/unittest/funcs/strtok/tst.strtok_long.r  |   5 +
 .../funcs/{ => strtok}/tst.strtok_null.d      |   3 +-
 test/unittest/funcs/strtok/tst.strtok_null.r  |   6 +
 test/unittest/funcs/tst.strtok_null.r         |   6 -
 16 files changed, 707 insertions(+), 19 deletions(-)
 create mode 100644 bpf/strtok.S
 rename test/unittest/funcs/{ => strtok}/tst.strtok.d (100%)
 rename test/unittest/funcs/{ => strtok}/tst.strtok.r (100%)
 create mode 100644 test/unittest/funcs/strtok/tst.strtok2.d
 create mode 100644 test/unittest/funcs/strtok/tst.strtok2.r
 create mode 100644 test/unittest/funcs/strtok/tst.strtok_long.d
 create mode 100644 test/unittest/funcs/strtok/tst.strtok_long.r
 rename test/unittest/funcs/{ => strtok}/tst.strtok_null.d (79%)
 create mode 100644 test/unittest/funcs/strtok/tst.strtok_null.r
 delete mode 100644 test/unittest/funcs/tst.strtok_null.r

diff --git a/bpf/Build b/bpf/Build
index 8d4fb0fa..062381b8 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -37,6 +37,7 @@ bpf_dlib_SOURCES = \
 	strjoin.S \
 	strlen.c \
 	strrchr.S \
+	strtok.S \
 	substr.S
 
 bpf-check: $(objdir)/include/.dir.stamp
diff --git a/bpf/strtok.S b/bpf/strtok.S
new file mode 100644
index 00000000..9b2563e0
--- /dev/null
+++ b/bpf/strtok.S
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define DT_STRLEN_BYTES		2
+
+#define BPF_FUNC_probe_read	4
+#define BPF_FUNC_probe_read_str	45
+
+	.text
+
+/*
+ * The algorithm stores information about the delimiters in a table for
+ * 256 ASCII chars, each table entry a single bit indicating whether the
+ * char is a delimiter.  This version stores the 256-bit (32-byte) table
+ * on the stack as four, 8-byte bins.  The bit for char c is bit c&63 in
+ * bin c>>6.  The bins are stored at:
+ *     bin 0    [TAB+- 8]
+ *     bin 1    [TAB+-16]
+ *     bin 2    [TAB+-24]
+ *     bin 3    [TAB+-32]
+ * where TAB will be dt_strtok()'s FP.
+ */
+
+/* ================================================== */
+
+/*
+ * // Initialize the delimiter table (place a nonzero bit for each delimiter found).
+ * // Note that:
+ * //   - we expect the delimiter string to be NULL-terminated
+ * //   - the NULL byte is always going to be set in the delimiter table
+ * //   - we also pass a "length" argument in to help the BPF verifier
+ * void dt_strtok_init_table(uint64_t *tab, char *del, uint64_t len)
+ * {
+ *     idx = 0
+ * Linit_table:
+ *     if (idx >= len) return
+ *     chr = del[idx]
+ *     bit = (1 << (chr & 63))
+ *     bin = chr >> 6
+ *     tab[bin] |= bit
+ *     if (chr == '\0') return
+ *     idx++
+ *     goto Linit_table
+ * }
+ */
+	.align	4
+	.global	dt_strtok_init_table
+	.type	dt_strtok_init_table, @function
+dt_strtok_init_table:
+#define TAB %r1
+#define DEL %r2
+#define LEN %r3
+#define IDX %r4
+#define CHR %r5
+#define BIN %r6
+#define BIT %r7
+#define TMP %r8
+#define USE %r9
+	mov	IDX, 0			/* idx = 0 */
+
+.Linit_table:
+	jlt	IDX, LEN, 1		/* if (idx >= len) return */
+	exit
+
+	mov	CHR, DEL
+	add	CHR, IDX
+	ldxb	CHR, [CHR+0]		/* chr = del[idx] */
+	and	CHR, 0xff
+
+	mov	BIT, 1			/* bit = (1 << (chr & 63)) */
+	mov	TMP, CHR
+	and	TMP, 63
+	lsh	BIT, TMP
+
+	mov	BIN, CHR		/* bin = chr >> 6 */
+	rsh	BIN, 6
+
+	/* tab[bin] |= bit */
+	/*
+	 * Since the table is on the stack and we cannot use variable
+	 * offsets into the stack, we iterate over all four bins:
+	 *     tab[- 8] |= (0 == bin ? 1 : 0) * bit
+	 *     tab[-16] |= (1 == bin ? 1 : 0) * bit
+	 *     tab[-24] |= (2 == bin ? 1 : 0) * bit
+	 *     tab[-32] |= (3 == bin ? 1 : 0) * bit
+	 *
+	 * Further, since the BPF verifier is easily stymied by conditional
+	 * branching, we compute the conditionalized operators with:
+	 *                         //    try == bin    try != bin
+	 *     use = (try ^ bin)   //    use == 0      use >  0
+	 *     use--               //    use <  0      use >= 0
+	 *     use >>= 63          //    use == 1      use == 0
+	 *     tab[off] |= use * bit
+	 */
+.macro macro_init try off
+	mov	USE, BIN
+	xor	USE, \try
+	sub	USE, 1
+	rsh	USE, 63
+	mul	USE, BIT
+
+	ldxdw	TMP, [TAB+\off]
+	or	TMP, USE
+	stxdw	[TAB+\off], TMP
+.endm
+	macro_init 0,-8
+	macro_init 1,-16
+	macro_init 2,-24
+	macro_init 3,-32
+
+	jne	CHR, 0, 1		/* if (chr == '\0') return */
+	exit
+
+	add	IDX, 1			/* idx++ */
+
+	ja	.Linit_table		/* goto Linit_table */
+#undef TAB
+#undef DEL
+#undef LEN
+#undef IDX
+#undef CHR
+#undef BIN
+#undef BIT
+#undef TMP
+#undef USE
+	.size	dt_strtok_init_table, .-dt_strtok_init_table
+
+/* ================================================== */
+
+/*
+ * // Look up each byte, replacing it by 1 or 0 using the table.
+ * void dt_strtok_lookup(uint64_t *tab, char *str, uint64_t len)
+ * {
+ *     idx = 0
+ * Llookup:
+ *     if (idx >= len) return
+ *     ptr = &str[idx]
+ *     bin = *ptr >> 6
+ *     val = tab[bin]
+ *     val >>= *ptr & 63
+ *     val &= 1
+ *     *ptr = val
+ *     idx++
+ *     goto Llookup
+ * }
+ */
+	.align	4
+	.global	dt_strtok_lookup
+	.type	dt_strtok_lookup, @function
+dt_strtok_lookup:
+
+#define VAL %r0
+#define TAB %r1
+#define STR %r2
+#define LEN %r3
+#define BIN %r4
+#define IDX %r5
+#define PTR %r6
+#define TMP %r7
+#define USE %r8
+
+	mov	IDX, 0			/* idx = 0 */
+
+.Llookup:
+	jlt	IDX, LEN, 1		/* if (idx >= len) return */
+	exit
+
+	mov	PTR, STR
+	add	PTR, IDX		/* ptr = &str[idx] */
+
+	ldxb	BIN, [PTR+0]
+	and	BIN, 255
+	rsh	BIN, 6			/* bin = *ptr >> 6 */
+
+	/* val = tab[bin] */
+	/*
+	 * The tab[bin] lookup is tricky for the same reasons as
+	 * described for dt_strtok_init_table().  This time, we use
+	 *     val = 0
+	 *     val += (0 == bin ? 1 : 0) * tab[- 8]
+	 *     val += (1 == bin ? 1 : 0) * tab[-16]
+	 *     val += (2 == bin ? 1 : 0) * tab[-24]
+	 *     val += (3 == bin ? 1 : 0) * tab[-32]
+	 * with each conditionalized operator replaced by
+	 *                         //    try == bin    try != bin
+	 *     use = (try ^ bin)   //    use == 0      use >  0
+	 *     use--               //    use <  0      use >= 0
+	 *     use >>= 63          //    use == 1      use == 0
+	 *     val += use * tab[off]
+	 */
+	mov	VAL, 0			/* val = 0 */
+.macro macro_look try off
+	mov	USE, \try
+	xor	USE, BIN		/* use = try ^ bin */
+	sub	USE, 1			/* use-- */
+	rsh	USE, 63			/* use >>= 63 */
+	ldxdw	TMP, [TAB+\off]
+	mul	TMP, USE
+	add	VAL, TMP		/* val += use * tab[try] */
+.endm
+	macro_look 0,-8
+	macro_look 1,-16
+	macro_look 2,-24
+	macro_look 3,-32
+
+	ldxb	TMP, [PTR+0]
+	and	TMP, 63
+	rsh	VAL, TMP		/* val >>= *ptr & 63 */
+
+	and	VAL, 1			/* val &= 1 */
+
+	stxb	[PTR+0], VAL		/* *ptr = val */
+
+	add	IDX, 1			/* idx++ */
+
+	ja	.Llookup		/* goto Llookup */
+#undef VAL
+#undef TAB
+#undef STR
+#undef LEN
+#undef BIN
+#undef IDX
+#undef PTR
+#undef TMP
+#undef USE
+	.size	dt_strtok_lookup, .-dt_strtok_lookup
+
+/* ================================================== */
+
+/*
+ * // Each byte is 1 or 0, flip its value.
+ * // Do 8 bytes at a time; len must be a multiple of 8.
+ * void dt_strtok_flip(char *str, uint64_t len)
+ * {
+ *     msk = 0x0101010101010101
+ *     idx = 0
+ * Lflip:
+ *     if (idx >= len) return
+ *     ptr = (uint64_t*)&str[idx]
+ *     val = *ptr
+ *     val ^= msk
+ *     *ptr = val
+ *     idx += 8
+ *     goto Lflip
+ * }
+ */
+	.align	4
+	.global	dt_strtok_flip
+	.type	dt_strtok_flip, @function
+dt_strtok_flip:
+
+#define STR %r1
+#define LEN %r2
+#define MSK %r3
+#define IDX %r4
+#define PTR %r5
+#define VAL %r6
+
+	lddw	MSK, 0x0101010101010101	/* msk = 0x0101010101010101 */
+	mov	IDX, 0			/* idx = 0 */
+
+.Lflip:
+	jlt	IDX, LEN, 1		/* if (idx >= siz) return */
+	exit
+
+	mov	PTR, STR
+	add	PTR, IDX		/* ptr = (uint64_t*)&str[idx] */
+
+	ldxdw	VAL, [PTR+0]		/* val = *ptr */
+	xor	VAL, MSK		/* val ^= msk */
+	stxdw	[PTR+0], VAL		/* *ptr = val */
+
+	add	IDX, 8			/* idx += 8 */
+
+	ja	.Lflip			/* goto Lflip */
+#undef STR
+#undef LEN
+#undef MSK
+#undef IDX
+#undef PTR
+#undef VAL
+	.size	dt_strtok_flip, .-dt_strtok_flip
+
+/* ================================================== */
+
+/*
+ * // str has an 8-byte prefix giving the offset to start at.
+ * // So the actual string starts at str+8+*((uint64_t*)str).
+ * void dt_strtok(char *dst, char *str, const char *del, char *tmp)
+ * {
+ *     // discard delimiter length prefix; we look for the NULL terminator anyhow
+ *     del += DT_STRLEN_BYTES
+ *
+ *     // len = roundup(STRSZ + 1, 8)
+ *     len = ((STRSZ + 1) + 7) & -8
+ *
+ *     // zero the delimiter table (do here so we see the stack usage)
+ *     *((uint64_t *)(*fp +  -8)) = 0
+ *     *((uint64_t *)(*fp + -16)) = 0
+ *     *((uint64_t *)(*fp + -24)) = 0
+ *     *((uint64_t *)(*fp + -32)) = 0
+ *
+ *     // initialize the delimiter table
+ *     // Note:
+ *     //   - we provide "len" as a hint to the BPF verifier
+ *     //   - we copy "del" into "tmp" to guarantee "len" room
+ *     bpf_probe_read_str(tmp, STRSZ + 1, del)
+ *     dt_strtok_init_table(fp, tmp, len)
+ *
+ *     // copy str into tmp
+ *     end = bpf_probe_read_str(tmp, STRSZ + 1, str+8+*((uint64_t*)str))
+ *     end--
+ *
+ *     // check each byte against delimiter table
+ *     dt_strtok_lookup(fp, tmp, len)
+ *
+ *     // find first non-delimiting char
+ *     bgn = bpf_probe_read_str(dst, STRSZ + 1, tmp)
+ *     if (bgn s<= 0) goto Lnull
+ *     bgn--
+ *
+ *     // make sure there is at least one char
+ *     if (bgn >= end) goto Lnull
+ *
+ *     // flip each byte
+ *     dt_strtok_flip(tmp, len)
+ *
+ *     // find actual length
+ *     len = bpf_probe_read_str(dst, STRSZ + 1 - bgn, tmp + bgn)
+ *     if (len s<= 0) goto Lnull
+ *     len--
+ *
+ *     // adjust bgn to account for strtok offset
+ *     bgn += *((uint64_t*)str)
+ *
+ *     // copy str + bgn to destination
+ *     dt_strlen_store(len, dst)
+ *     bpf_probe_read(dst + DT_STRLEN_BYTES, len, str + 8 + bgn)
+ *     dst[DT_STRLEN_BYTES + len] = '\0'
+ *
+ *     // update the 8-byte prefix (strtok offset)
+ *     bgn += len
+ *     *((uint64_t*)str) = bgn
+ *
+ *     return dst
+ *
+ * Lnull:
+ *     // advance the strtok offset to the end and return 0
+ *     *((uint64_t*)str) = end
+ *     return 0
+ * }
+ */
+
+	.align	4
+	.global	dt_strtok
+	.type	dt_strtok, @function
+dt_strtok:
+/* the stack has the delimiter table starting at %fp+-32 */
+#define DST_X [%fp+-40]
+#define END_X [%fp+-48]
+#define STR %r6
+#define DEL %r7
+#define LEN %r8
+#define TMP %r9
+
+	/* stash variables */
+	stxdw	DST_X, %r1
+	mov	STR, %r2
+	mov	DEL, %r3
+	mov	TMP, %r4
+
+	/* discard delimiter length prefix; we look for the NULL terminator anyhow */
+	add	DEL, DT_STRLEN_BYTES	/* del += DT_STRLEN_BYTES */
+
+	/* len = roundup(STRSZ + 1, 8) */
+	lddw	LEN, STRSZ
+	add	LEN, 8
+	and	LEN, -8			/* len = ((STRSZ + 1) + 7) & -8 */
+
+	/* zero the delimiter table (do here so we see the stack usage) */
+	mov	%r1, 0
+	stxdw	[%fp+- 8], %r1		/* *((uint64_t *)(*fp +  -8)) = 0 */
+	stxdw	[%fp+-16], %r1		/* *((uint64_t *)(*fp + -16)) = 0 */
+	stxdw	[%fp+-24], %r1		/* *((uint64_t *)(*fp + -24)) = 0 */
+	stxdw	[%fp+-32], %r1		/* *((uint64_t *)(*fp + -32)) = 0 */
+
+	/* initialize the delimiter table */
+	mov	%r1, TMP
+	lddw	%r2, STRSZ
+	add	%r2, 1
+	mov	%r3, DEL
+	call	BPF_FUNC_probe_read_str	/* bpf_probe_read_str(tmp, STRSZ + 1, del) */
+#undef DEL
+	mov	%r1, %fp
+	mov	%r2, TMP
+	mov	%r3, LEN
+	call	dt_strtok_init_table	/* dt_strtok_init_table(fp, tmp, len) */
+
+	/* copy str into tmp */
+	mov	%r1, TMP
+	lddw	%r2, STRSZ
+	add	%r2, 1
+	mov	%r3, STR
+	add	%r3, 8
+	ldxdw	%r4, [STR+0]
+	jsge	%r4, 0, 1
+	mov	%r4, 0
+	add	%r3, %r4
+	call	BPF_FUNC_probe_read_str	/* end = bpf_probe_read_str(tmp, STRSZ + 1, str+8+*((uint64_t*)str)) */
+	sub	%r0, 1			/* end-- */
+	stxdw	END_X, %r0
+
+	/* check each byte against delimiter table */
+	mov	%r1, %fp
+	mov	%r2, TMP
+	mov	%r3, LEN
+	call	dt_strtok_lookup	/* dt_strtok_lookup(fp, tmp, len) */
+
+	/* find first non-delimiting char */
+#define BGN %r7
+	ldxdw	%r1, DST_X
+	lddw	%r2, STRSZ
+	add	%r2, 1
+	mov	%r3, TMP
+	call	BPF_FUNC_probe_read_str	/* bgn = bpf_probe_read_str(dst, STRSZ + 1, tmp) */
+	mov	BGN, %r0
+	jsle	BGN, 0, .Lnull		/* if (bgn s<= 0) goto Lnull */
+	sub	BGN, 1			/* bgn-- */
+
+	/* make sure there is at least one char */
+	ldxdw	%r1, END_X
+	jge	BGN, %r1, .Lnull	/* if (bgn >= end) goto Lnull */
+
+	/* flip each byte */
+	mov	%r1, TMP
+	mov	%r2, LEN
+	call	dt_strtok_flip		/* dt_strtok_flip(tmp, len) */
+
+	/* find actual length */
+	ldxdw	%r1, DST_X
+	lddw	%r2, STRSZ
+	add	%r2, 1
+	sub	%r2, BGN
+	mov	%r3, TMP
+	add	%r3, BGN
+	call	BPF_FUNC_probe_read_str	/* len = bpf_probe_read_str(dst, STRSZ + 1 - bgn, tmp + bgn) */
+	mov	LEN, %r0
+	jsle	LEN, 0, .Lnull		/* if (len s<= 0) goto Lnull */
+	sub	LEN, 1			/* len-- */
+
+	/* adjust bgn to account for strtok offset */
+	ldxdw	%r1, [STR+0]
+	add	BGN, %r1		/* bgn += *((uint64_t*)str) */
+
+	/* (help the BPF verifier) */
+	jsge	BGN, 0, 1
+	mov	BGN, 0
+
+	/* copy str + bgn to destination */
+	mov	%r1, LEN
+	ldxdw	%r2, DST_X
+	call	dt_strlen_store		/* dt_strlen_store(len, dst) */
+
+	ldxdw	%r1, DST_X
+	add	%r1, DT_STRLEN_BYTES
+	mov	%r2, LEN
+	mov	%r3, STR
+	add	%r3, 8
+	add	%r3, BGN
+	call	BPF_FUNC_probe_read	/* bpf_probe_read(dst + DT_STRLEN_BYTES, len, str + 8 + bgn) */
+
+	mov	%r1, 0
+	ldxdw	%r2, DST_X
+	add	%r2, LEN
+	stxb	[%r2+DT_STRLEN_BYTES], %r1
+					/* dst[DT_STRLEN_BYTES + len] = '\0' */
+
+	/* update the 8-byte prefix (strtok offset) */
+	add	BGN, LEN		/* bgn += len */
+	stxdw	[STR+0], BGN		/* *((uint64_t*)str) = bgn */
+
+	ldxdw	%r0, DST_X		/* return dst */
+	exit
+
+	/* advance the strtok offset to the end and return 0 */
+.Lnull:
+	ldxdw	%r0, END_X
+	stxdw	[STR+0], %r0		/* *((uint64_t*)str) = end */
+	mov	%r0, 0			/* return 0 */
+	exit
+#undef STR
+#undef TMP
+#undef LEN
+#undef BGN
+#undef DST_X
+	.size	dt_strtok, .-dt_strtok
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index a85fe5af..913d26a7 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -292,20 +292,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 	 *	- maximum trace buffer record size, rounded up to the nearest
 	 *	  multiple of 8
 	 *	- the greater of:
-	 *		+ the maximum stack trace size
-	 *		+ DT_TSTRING_SLOTS times the maximum string size (plus
-	 *		  space for length and terminating '\0') rounded up to
-	 *		  the nearest multiple of 8),
-	 *		  plus the maximum string size plus space for '\0' (to
-	 *		  accomodate the BPF verifier)
+	 *		- the maximum stack trace size
+	 *		- the size for all tstrings
+	 *	- size of the internal strtok() state
+	 *		- 8 bytes for the offset index
+	 *		- the maximum string size
+	 *		- a byte for the terminating NULL char
 	 */
 	memsz = roundup(sizeof(dt_mstate_t), 8) +
 		8 +
 		roundup(dtp->dt_maxreclen, 8) +
 		MAX(sizeof(uint64_t) * dtp->dt_options[DTRACEOPT_MAXFRAMES],
-		    DT_TSTRING_SLOTS * strdatasz +
-		    dtp->dt_options[DTRACEOPT_STRSIZE] + 1
-		);
+		    DT_TSTRING_SLOTS * strdatasz) +
+		sizeof(uint64_t) + dtp->dt_options[DTRACEOPT_STRSIZE] + 1;
 	if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
 			sizeof(uint32_t), memsz, 1) == -1)
 		return -1;		/* dt_errno is set for us */
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index b01d3a76..72b680cc 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -119,6 +119,8 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
 	 *	dctx.mst->prid = PRID;	// stw [%r7 + DMST_PRID], PRID
 	 *	dctx.mst->syscall_errno = 0;
 	 *				// stw [%r7 + DMST_ERRNO], 0
+	 *	dctx.mst->strtok_init = 0;
+	 *				// stw [%r7 + DMST_STRTOK], 0
 	 */
 	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_MST), 0));
 	dt_cg_xsetx(dlp, mem, DT_LBL_NONE, BPF_REG_1, mem->di_id);
@@ -130,6 +132,7 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
 	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_MST), BPF_REG_7));
 	emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_PRID, -1), prid);
 	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_ERRNO, 0));
+	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_7, DMST_STRTOK, 0));
 
 	/*
 	 *	buf = rc + roundup(sizeof(dt_mstate_t), 8);
@@ -3872,6 +3875,104 @@ dt_cg_subr_strstr(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 	TRACE_REGSET("    subr-strstr:End  ");
 }
 
+#define DMEM_STRTOK MAX(sizeof(uint64_t) * dtp->dt_options[DTRACEOPT_MAXFRAMES], \
+    DT_TSTRING_SLOTS * roundup(DT_STRLEN_BYTES + dtp->dt_options[DTRACEOPT_STRSIZE] + 1, 8))
+static void
+dt_cg_subr_strtok(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
+{
+	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
+	dt_node_t	*str = dnp->dn_args;
+	dt_node_t	*del = str->dn_list;
+	dt_ident_t	*idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_strtok");
+	uint64_t	off;
+
+	assert(idp != NULL);
+
+	TRACE_REGSET("    subr-strtok:Begin");
+
+	/* static check for NULL string */
+	if (str->dn_op != DT_TOK_INT || str->dn_value != 0) {
+		/* string is present:  copy it to internal state */
+		dt_cg_node(str, dlp, drp);
+
+		if (dt_regset_xalloc_args(drp) == -1)
+			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
+		/* set flag that the state is initialized */
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_1, DCTX_MST));
+		emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_1, DMST_STRTOK, 1));
+
+		/* the 8-byte prefix is the offset, which we initialize to 0 */
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_1, DCTX_MEM));
+		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DMEM_STRTOK));
+		emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_1, 0, 0));
+		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8));
+		emit(dlp,  BPF_MOV_IMM(BPF_REG_2, dtp->dt_options[DTRACEOPT_STRSIZE] + 1));
+		emit(dlp,  BPF_MOV_REG(BPF_REG_3, str->dn_reg));
+		dt_regset_free(drp, str->dn_reg);
+		if (str->dn_tstring)
+			dt_cg_tstring_free(yypcb, str);
+		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, DT_STRLEN_BYTES));
+		dt_regset_xalloc(drp, BPF_REG_0);
+		emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_probe_read_str));
+		dt_regset_free(drp, BPF_REG_0);
+		dt_regset_free_args(drp);
+	} else {
+		/* NULL string:  error if internal state is uninitialized */
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DT_STK_DCTX));
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_0, DCTX_MST));
+		emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_0, DMST_STRTOK));
+		emit(dlp,  BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32));
+		dt_cg_check_notnull(dlp, drp, BPF_REG_0);
+	}
+
+	/* get delimiters */
+	dt_cg_node(del, dlp, drp);
+	dt_cg_check_notnull(dlp, drp, del->dn_reg);
+
+	/* allocate temporary string for result */
+	dnp->dn_reg = dt_regset_alloc(drp);
+	if (dnp->dn_reg == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+	dt_cg_tstring_alloc(yypcb, dnp);
+	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
+
+	/* allocate temporary string for internal purposes */
+	off = dt_cg_tstring_xalloc(yypcb);
+
+	/* function call */
+
+	if (dt_regset_xalloc_args(drp) == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
+	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dnp->dn_reg));
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_2, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_2, BPF_REG_2, DCTX_MEM));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DMEM_STRTOK));
+	emit(dlp,  BPF_MOV_REG(BPF_REG_3, del->dn_reg));
+	dt_regset_free(drp, del->dn_reg);
+	if (del->dn_tstring)
+		dt_cg_tstring_free(yypcb, del);
+
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off));
+
+	dt_regset_xalloc(drp, BPF_REG_0);
+	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
+	dt_regset_free_args(drp);
+	dt_cg_tstring_xfree(yypcb, off);
+	emit(dlp,  BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
+	dt_regset_free(drp, BPF_REG_0);
+
+	TRACE_REGSET("    subr-strtok:End  ");
+}
+#undef DMEM_STRTOK
+
 static void
 dt_cg_subr_substr(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 {
@@ -3986,7 +4087,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
 	[DIF_SUBR_STRCHR]		= &dt_cg_subr_strchr,
 	[DIF_SUBR_STRRCHR]		= &dt_cg_subr_strrchr,
 	[DIF_SUBR_STRSTR]		= &dt_cg_subr_strstr,
-	[DIF_SUBR_STRTOK]		= NULL,
+	[DIF_SUBR_STRTOK]		= &dt_cg_subr_strtok,
 	[DIF_SUBR_SUBSTR]		= &dt_cg_subr_substr,
 	[DIF_SUBR_INDEX]		= &dt_cg_subr_index,
 	[DIF_SUBR_RINDEX]		= &dt_cg_subr_rindex,
diff --git a/libdtrace/dt_dctx.h b/libdtrace/dt_dctx.h
index d19ea0cc..20105ee5 100644
--- a/libdtrace/dt_dctx.h
+++ b/libdtrace/dt_dctx.h
@@ -23,6 +23,7 @@ typedef struct dt_mstate {
 	uint32_t	clid;		/* Clause ID (unique per probe) */
 	uint32_t	tag;		/* Tag (for future use) */
 	int32_t		syscall_errno;	/* syscall errno */
+	int32_t		strtok_init;	/* Flag if strtok state inited */
 	uint64_t	fault;		/* DTrace fault flags */
 	uint64_t	tstamp;		/* cached timestamp value */
 	dt_pt_regs	regs;		/* CPU registers */
@@ -66,6 +67,7 @@ typedef struct dt_dctx {
 #define DMST_CLID	offsetof(dt_mstate_t, clid)
 #define DMST_TAG	offsetof(dt_mstate_t, tag)
 #define DMST_ERRNO	offsetof(dt_mstate_t, syscall_errno)
+#define DMST_STRTOK	offsetof(dt_mstate_t, strtok_init)
 #define DMST_FAULT	offsetof(dt_mstate_t, fault)
 #define DMST_TSTAMP	offsetof(dt_mstate_t, tstamp)
 #define DMST_REGS	offsetof(dt_mstate_t, regs)
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index 10965eb5..a15b8269 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -66,6 +66,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
 	DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strrchr, DT_IDENT_SYMBOL),
+	DT_BPF_SYMBOL(dt_strtok, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_substr, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_speculation, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_speculation_speculate, DT_IDENT_SYMBOL),
diff --git a/test/stress/fbtsafety/tst.shortstr.d b/test/stress/fbtsafety/tst.shortstr.d
index 714d4833..dbf71b07 100644
--- a/test/stress/fbtsafety/tst.shortstr.d
+++ b/test/stress/fbtsafety/tst.shortstr.d
@@ -4,7 +4,6 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 #pragma D option quiet
 #pragma D option strsize=16
diff --git a/test/unittest/funcs/tst.strtok.d b/test/unittest/funcs/strtok/tst.strtok.d
similarity index 100%
rename from test/unittest/funcs/tst.strtok.d
rename to test/unittest/funcs/strtok/tst.strtok.d
diff --git a/test/unittest/funcs/tst.strtok.r b/test/unittest/funcs/strtok/tst.strtok.r
similarity index 100%
rename from test/unittest/funcs/tst.strtok.r
rename to test/unittest/funcs/strtok/tst.strtok.r
diff --git a/test/unittest/funcs/strtok/tst.strtok2.d b/test/unittest/funcs/strtok/tst.strtok2.d
new file mode 100644
index 00000000..14f34043
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok2.d
@@ -0,0 +1,39 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+#pragma D option quiet
+
+BEGIN
+{
+	x = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	y = "abcABC";
+	printf("%s\n", strtok(x, y));
+	y = "MNO";
+	printf("%s\n", strtok(NULL, y));
+	printf("%s\n", strtok(NULL, y));
+	y = "0123456789";
+	printf("%s\n", strtok(NULL, y));
+	printf("%s\n", strtok(NULL, y) == NULL ? "got expected NULL" : "ERROR: non-NULL");
+
+	x = "";
+	y = "";
+	printf("%s\n", strtok(x, y) == NULL ? "got expected NULL" : "ERROR: non-NULL");
+
+	x = "foo";
+	printf("%s\n", strtok(x, y));
+
+	x = "";
+	y = "foo";
+	printf("%s\n", strtok(x, y) == NULL ? "got expected NULL" : "ERROR: non-NULL");
+
+	exit(0);
+}
+
+ERROR
+{
+	exit(1);
+}
diff --git a/test/unittest/funcs/strtok/tst.strtok2.r b/test/unittest/funcs/strtok/tst.strtok2.r
new file mode 100644
index 00000000..c58ea336
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok2.r
@@ -0,0 +1,9 @@
+defghijklmnopqrstuvwxyz
+ABCDEFGHIJKL
+PQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKL
+MNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+got expected NULL
+got expected NULL
+foo
+got expected NULL
+
diff --git a/test/unittest/funcs/strtok/tst.strtok_long.d b/test/unittest/funcs/strtok/tst.strtok_long.d
new file mode 100644
index 00000000..1fbe415d
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok_long.d
@@ -0,0 +1,35 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+#pragma D option quiet
+
+BEGIN
+{
+	/* 256-char string ending in "XYZ" */
+	x = "_____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________XYZ";
+
+	/* check whether the last char of a long string is seen */
+	y = "a";
+	printf("%s\n", strtok(x, y));
+
+	/* check whether the last char of a long delimiter string is seen */
+	z = "zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA";
+	printf("%s\n", strtok(z, x));
+
+	/* check that strtok internal state can handle a large offset */
+	y = "Z";
+	printf("%s\n", strtok(x, y));
+	y = "a";
+	printf("%s\n", strtok(NULL, y));
+
+	exit(0);
+}
+
+ERROR
+{
+	exit(1);
+}
diff --git a/test/unittest/funcs/strtok/tst.strtok_long.r b/test/unittest/funcs/strtok/tst.strtok_long.r
new file mode 100644
index 00000000..a752554e
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok_long.r
@@ -0,0 +1,5 @@
+_____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________XYZ
+zyxwvutsrqponmlkjihgfedcba
+_____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________XY
+Z
+
diff --git a/test/unittest/funcs/tst.strtok_null.d b/test/unittest/funcs/strtok/tst.strtok_null.d
similarity index 79%
rename from test/unittest/funcs/tst.strtok_null.d
rename to test/unittest/funcs/strtok/tst.strtok_null.d
index 45301a78..1d536d60 100644
--- a/test/unittest/funcs/tst.strtok_null.d
+++ b/test/unittest/funcs/strtok/tst.strtok_null.d
@@ -1,10 +1,9 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 /*
  * Test that a strtok(NULL, ...) without first calling strtok(string, ...)
diff --git a/test/unittest/funcs/strtok/tst.strtok_null.r b/test/unittest/funcs/strtok/tst.strtok_null.r
new file mode 100644
index 00000000..4002d6ef
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok_null.r
@@ -0,0 +1,6 @@
+                   FUNCTION:NAME
+                          :ERROR 
+
+-- @@stderr --
+dtrace: script 'test/unittest/funcs/strtok/tst.strtok_null.d' matched 2 probes
+dtrace: error on enabled probe ID 3 (ID 1: dtrace:::BEGIN): invalid address ({ptr}) in action #1
diff --git a/test/unittest/funcs/tst.strtok_null.r b/test/unittest/funcs/tst.strtok_null.r
deleted file mode 100644
index a1f82f91..00000000
--- a/test/unittest/funcs/tst.strtok_null.r
+++ /dev/null
@@ -1,6 +0,0 @@
-                   FUNCTION:NAME
-                          :ERROR 
-
--- @@stderr --
-dtrace: script 'test/unittest/funcs/tst.strtok_null.d' matched 2 probes
-dtrace: error on enabled probe ID 1 (ID 1: dtrace:::BEGIN): invalid address ({ptr}) in action #1 at DIF offset 40
-- 
2.18.4




More information about the DTrace-devel mailing list