[DTrace-devel] [PATCH] Add support for strtok() subroutine

Kris Van Hees kris.van.hees at oracle.com
Sat Nov 13 05:22:04 UTC 2021


Sending of comments already while I am still going through the assembler code.

On Mon, Nov 08, 2021 at 02:50:44PM -0500, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> Note that the space for tstrings used to allocate an extra tstring at
> the end in order to pacify the BPF verifier.  Use this extra space

The "used to allocate" makes no sense here because we still do that, and
your patch does not change that either.  Just say that the space for tstrings
allocates space for an extra string to pacify the BPF verifier.

> for the internal storage needed by strtok() between calls.  Since a
> stack() call might occur between strtok() calls, this space can no
> longer overlap the stack scratch space.

You should include some comment here on what internal storage you need,
i.e. what it is used for..

> Compute the tstring length once -- in dt_cg_tstring_reset -- so that
> its value does not need to be maintained in multiple places.

This should be done differently and in a separate patch.  Really, the only
things that needs to be done is put the string size value outside of the loop.
There should not be any need to store it in dtp (more on that below).

> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>  bpf/Build                                     |   1 +
>  bpf/strtok.S                                  | 498 ++++++++++++++++++
>  libdtrace/dt_bpf.c                            |  18 +-
>  libdtrace/dt_cg.c                             | 122 ++++-
>  libdtrace/dt_dlibs.c                          |   1 +
>  libdtrace/dt_impl.h                           |   2 +
>  test/stress/fbtsafety/tst.shortstr.d          |   1 -
>  test/unittest/funcs/{ => strtok}/tst.strtok.d |   0
>  test/unittest/funcs/{ => strtok}/tst.strtok.r |   0
>  test/unittest/funcs/strtok/tst.strtok2.d      |  39 ++
>  test/unittest/funcs/strtok/tst.strtok2.r      |   9 +
>  test/unittest/funcs/strtok/tst.strtok_long.d  |  35 ++
>  test/unittest/funcs/strtok/tst.strtok_long.r  |   5 +
>  .../funcs/{ => strtok}/tst.strtok_null.d      |   3 +-
>  test/unittest/funcs/strtok/tst.strtok_null.r  |   6 +
>  test/unittest/funcs/tst.strtok_null.r         |   6 -
>  16 files changed, 721 insertions(+), 25 deletions(-)
>  create mode 100644 bpf/strtok.S
>  rename test/unittest/funcs/{ => strtok}/tst.strtok.d (100%)
>  rename test/unittest/funcs/{ => strtok}/tst.strtok.r (100%)
>  create mode 100644 test/unittest/funcs/strtok/tst.strtok2.d
>  create mode 100644 test/unittest/funcs/strtok/tst.strtok2.r
>  create mode 100644 test/unittest/funcs/strtok/tst.strtok_long.d
>  create mode 100644 test/unittest/funcs/strtok/tst.strtok_long.r
>  rename test/unittest/funcs/{ => strtok}/tst.strtok_null.d (79%)
>  create mode 100644 test/unittest/funcs/strtok/tst.strtok_null.r
>  delete mode 100644 test/unittest/funcs/tst.strtok_null.r
> 
> diff --git a/bpf/Build b/bpf/Build
> index 5c1bc235..d8279411 100644
> --- a/bpf/Build
> +++ b/bpf/Build
> @@ -35,6 +35,7 @@ bpf_dlib_SOURCES = \
>  	strjoin.S \
>  	strlen.c \
>  	strrchr.S \
> +	strtok.S \
>  	substr.S
>  
>  bpf-check: $(objdir)/include/.dir.stamp
> diff --git a/bpf/strtok.S b/bpf/strtok.S
> new file mode 100644
> index 00000000..9b2563e0
> --- /dev/null
> +++ b/bpf/strtok.S
> @@ -0,0 +1,498 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + */
> +
> +#define DT_STRLEN_BYTES		2
> +
> +#define BPF_FUNC_probe_read	4
> +#define BPF_FUNC_probe_read_str	45
> +
> +	.text
> +
> +/*
> + * The algorithm stores information about the delimiters in a table for
> + * 256 ASCII chars, each table entry a single bit indicating whether the
> + * char is a delimiter.  This version stores the 256-bit (32-byte) table
> + * on the stack as four, 8-byte bins.  The bit for char c is bit c&63 in
> + * bin c>>6.  The bins are stored at:
> + *     bin 0    [TAB+- 8]
> + *     bin 1    [TAB+-16]
> + *     bin 2    [TAB+-24]
> + *     bin 3    [TAB+-32]
> + * where TAB will be dt_strtok()'s FP.
> + */
> +
> +/* ================================================== */
> +
> +/*
> + * // Initialize the delimiter table (place a nonzero bit for each delimiter found).
> + * // Note that:
> + * //   - we expect the delimiter string to be NULL-terminated
> + * //   - the NULL byte is always going to be set in the delimiter table
> + * //   - we also pass a "length" argument in to help the BPF verifier
> + * void dt_strtok_init_table(uint64_t *tab, char *del, uint64_t len)
> + * {
> + *     idx = 0
> + * Linit_table:
> + *     if (idx >= len) return
> + *     chr = del[idx]
> + *     bit = (1 << (chr & 63))
> + *     bin = chr >> 6
> + *     tab[bin] |= bit
> + *     if (chr == '\0') return
> + *     idx++
> + *     goto Linit_table
> + * }
> + */
> +	.align	4
> +	.global	dt_strtok_init_table
> +	.type	dt_strtok_init_table, @function
> +dt_strtok_init_table:
> +#define TAB %r1
> +#define DEL %r2
> +#define LEN %r3
> +#define IDX %r4
> +#define CHR %r5
> +#define BIN %r6
> +#define BIT %r7
> +#define TMP %r8
> +#define USE %r9
> +	mov	IDX, 0			/* idx = 0 */
> +
> +.Linit_table:
> +	jlt	IDX, LEN, 1		/* if (idx >= len) return */
> +	exit
> +
> +	mov	CHR, DEL
> +	add	CHR, IDX
> +	ldxb	CHR, [CHR+0]		/* chr = del[idx] */
> +	and	CHR, 0xff
> +
> +	mov	BIT, 1			/* bit = (1 << (chr & 63)) */
> +	mov	TMP, CHR
> +	and	TMP, 63
> +	lsh	BIT, TMP
> +
> +	mov	BIN, CHR		/* bin = chr >> 6 */
> +	rsh	BIN, 6
> +
> +	/* tab[bin] |= bit */
> +	/*
> +	 * Since the table is on the stack and we cannot use variable
> +	 * offsets into the stack, we iterate over all four bins:
> +	 *     tab[- 8] |= (0 == bin ? 1 : 0) * bit
> +	 *     tab[-16] |= (1 == bin ? 1 : 0) * bit
> +	 *     tab[-24] |= (2 == bin ? 1 : 0) * bit
> +	 *     tab[-32] |= (3 == bin ? 1 : 0) * bit
> +	 *
> +	 * Further, since the BPF verifier is easily stymied by conditional
> +	 * branching, we compute the conditionalized operators with:
> +	 *                         //    try == bin    try != bin
> +	 *     use = (try ^ bin)   //    use == 0      use >  0
> +	 *     use--               //    use <  0      use >= 0
> +	 *     use >>= 63          //    use == 1      use == 0
> +	 *     tab[off] |= use * bit
> +	 */
> +.macro macro_init try off
> +	mov	USE, BIN
> +	xor	USE, \try
> +	sub	USE, 1
> +	rsh	USE, 63
> +	mul	USE, BIT
> +
> +	ldxdw	TMP, [TAB+\off]
> +	or	TMP, USE
> +	stxdw	[TAB+\off], TMP
> +.endm
> +	macro_init 0,-8
> +	macro_init 1,-16
> +	macro_init 2,-24
> +	macro_init 3,-32
> +
> +	jne	CHR, 0, 1		/* if (chr == '\0') return */
> +	exit
> +
> +	add	IDX, 1			/* idx++ */
> +
> +	ja	.Linit_table		/* goto Linit_table */
> +#undef TAB
> +#undef DEL
> +#undef LEN
> +#undef IDX
> +#undef CHR
> +#undef BIN
> +#undef BIT
> +#undef TMP
> +#undef USE
> +	.size	dt_strtok_init_table, .-dt_strtok_init_table
> +
> +/* ================================================== */
> +
> +/*
> + * // Look up each byte, replacing it by 1 or 0 using the table.
> + * void dt_strtok_lookup(uint64_t *tab, char *str, uint64_t len)
> + * {
> + *     idx = 0
> + * Llookup:
> + *     if (idx >= len) return
> + *     ptr = &str[idx]
> + *     bin = *ptr >> 6
> + *     val = tab[bin]
> + *     val >>= *ptr & 63
> + *     val &= 1
> + *     *ptr = val
> + *     idx++
> + *     goto Llookup
> + * }
> + */
> +	.align	4
> +	.global	dt_strtok_lookup
> +	.type	dt_strtok_lookup, @function
> +dt_strtok_lookup:
> +
> +#define VAL %r0
> +#define TAB %r1
> +#define STR %r2
> +#define LEN %r3
> +#define BIN %r4
> +#define IDX %r5
> +#define PTR %r6
> +#define TMP %r7
> +#define USE %r8
> +
> +	mov	IDX, 0			/* idx = 0 */
> +
> +.Llookup:
> +	jlt	IDX, LEN, 1		/* if (idx >= len) return */
> +	exit
> +
> +	mov	PTR, STR
> +	add	PTR, IDX		/* ptr = &str[idx] */
> +
> +	ldxb	BIN, [PTR+0]
> +	and	BIN, 255
> +	rsh	BIN, 6			/* bin = *ptr >> 6 */
> +
> +	/* val = tab[bin] */
> +	/*
> +	 * The tab[bin] lookup is tricky for the same reasons as
> +	 * described for dt_strtok_init_table().  This time, we use
> +	 *     val = 0
> +	 *     val += (0 == bin ? 1 : 0) * tab[- 8]
> +	 *     val += (1 == bin ? 1 : 0) * tab[-16]
> +	 *     val += (2 == bin ? 1 : 0) * tab[-24]
> +	 *     val += (3 == bin ? 1 : 0) * tab[-32]
> +	 * with each conditionalized operator replaced by
> +	 *                         //    try == bin    try != bin
> +	 *     use = (try ^ bin)   //    use == 0      use >  0
> +	 *     use--               //    use <  0      use >= 0
> +	 *     use >>= 63          //    use == 1      use == 0
> +	 *     val += use * tab[off]
> +	 */
> +	mov	VAL, 0			/* val = 0 */
> +.macro macro_look try off
> +	mov	USE, \try
> +	xor	USE, BIN		/* use = try ^ bin */
> +	sub	USE, 1			/* use-- */
> +	rsh	USE, 63			/* use >>= 63 */
> +	ldxdw	TMP, [TAB+\off]
> +	mul	TMP, USE
> +	add	VAL, TMP		/* val += use * tab[try] */
> +.endm
> +	macro_look 0,-8
> +	macro_look 1,-16
> +	macro_look 2,-24
> +	macro_look 3,-32
> +
> +	ldxb	TMP, [PTR+0]
> +	and	TMP, 63
> +	rsh	VAL, TMP		/* val >>= *ptr & 63 */
> +
> +	and	VAL, 1			/* val &= 1 */
> +
> +	stxb	[PTR+0], VAL		/* *ptr = val */
> +
> +	add	IDX, 1			/* idx++ */
> +
> +	ja	.Llookup		/* goto Llookup */
> +#undef VAL
> +#undef TAB
> +#undef STR
> +#undef LEN
> +#undef BIN
> +#undef IDX
> +#undef PTR
> +#undef TMP
> +#undef USE
> +	.size	dt_strtok_lookup, .-dt_strtok_lookup
> +
> +/* ================================================== */
> +
> +/*
> + * // Each byte is 1 or 0, flip its value.
> + * // Do 8 bytes at a time; len must be a multiple of 8.
> + * void dt_strtok_flip(char *str, uint64_t len)
> + * {
> + *     msk = 0x0101010101010101
> + *     idx = 0
> + * Lflip:
> + *     if (idx >= len) return
> + *     ptr = (uint64_t*)&str[idx]
> + *     val = *ptr
> + *     val ^= msk
> + *     *ptr = val
> + *     idx += 8
> + *     goto Lflip
> + * }
> + */
> +	.align	4
> +	.global	dt_strtok_flip
> +	.type	dt_strtok_flip, @function
> +dt_strtok_flip:
> +
> +#define STR %r1
> +#define LEN %r2
> +#define MSK %r3
> +#define IDX %r4
> +#define PTR %r5
> +#define VAL %r6
> +
> +	lddw	MSK, 0x0101010101010101	/* msk = 0x0101010101010101 */
> +	mov	IDX, 0			/* idx = 0 */
> +
> +.Lflip:
> +	jlt	IDX, LEN, 1		/* if (idx >= siz) return */
> +	exit
> +
> +	mov	PTR, STR
> +	add	PTR, IDX		/* ptr = (uint64_t*)&str[idx] */
> +
> +	ldxdw	VAL, [PTR+0]		/* val = *ptr */
> +	xor	VAL, MSK		/* val ^= msk */
> +	stxdw	[PTR+0], VAL		/* *ptr = val */
> +
> +	add	IDX, 8			/* idx += 8 */
> +
> +	ja	.Lflip			/* goto Lflip */
> +#undef STR
> +#undef LEN
> +#undef MSK
> +#undef IDX
> +#undef PTR
> +#undef VAL
> +	.size	dt_strtok_flip, .-dt_strtok_flip
> +
> +/* ================================================== */
> +
> +/*
> + * // str has an 8-byte prefix giving the offset to start at.
> + * // So the actual string starts at str+8+*((uint64_t*)str).
> + * void dt_strtok(char *dst, char *str, const char *del, char *tmp)
> + * {
> + *     // discard delimiter length prefix; we look for the NULL terminator anyhow
> + *     del += DT_STRLEN_BYTES
> + *
> + *     // len = roundup(STRSZ + 1, 8)
> + *     len = ((STRSZ + 1) + 7) & -8
> + *
> + *     // zero the delimiter table (do here so we see the stack usage)
> + *     *((uint64_t *)(*fp +  -8)) = 0
> + *     *((uint64_t *)(*fp + -16)) = 0
> + *     *((uint64_t *)(*fp + -24)) = 0
> + *     *((uint64_t *)(*fp + -32)) = 0
> + *
> + *     // initialize the delimiter table
> + *     // Note:
> + *     //   - we provide "len" as a hint to the BPF verifier
> + *     //   - we copy "del" into "tmp" to guarantee "len" room
> + *     bpf_probe_read_str(tmp, STRSZ + 1, del)
> + *     dt_strtok_init_table(fp, tmp, len)
> + *
> + *     // copy str into tmp
> + *     end = bpf_probe_read_str(tmp, STRSZ + 1, str+8+*((uint64_t*)str))
> + *     end--
> + *
> + *     // check each byte against delimiter table
> + *     dt_strtok_lookup(fp, tmp, len)
> + *
> + *     // find first non-delimiting char
> + *     bgn = bpf_probe_read_str(dst, STRSZ + 1, tmp)
> + *     if (bgn s<= 0) goto Lnull
> + *     bgn--
> + *
> + *     // make sure there is at least one char
> + *     if (bgn >= end) goto Lnull
> + *
> + *     // flip each byte
> + *     dt_strtok_flip(tmp, len)
> + *
> + *     // find actual length
> + *     len = bpf_probe_read_str(dst, STRSZ + 1 - bgn, tmp + bgn)
> + *     if (len s<= 0) goto Lnull
> + *     len--
> + *
> + *     // adjust bgn to account for strtok offset
> + *     bgn += *((uint64_t*)str)
> + *
> + *     // copy str + bgn to destination
> + *     dt_strlen_store(len, dst)
> + *     bpf_probe_read(dst + DT_STRLEN_BYTES, len, str + 8 + bgn)
> + *     dst[DT_STRLEN_BYTES + len] = '\0'
> + *
> + *     // update the 8-byte prefix (strtok offset)
> + *     bgn += len
> + *     *((uint64_t*)str) = bgn
> + *
> + *     return dst
> + *
> + * Lnull:
> + *     // advance the strtok offset to the end and return 0
> + *     *((uint64_t*)str) = end
> + *     return 0
> + * }
> + */
> +
> +	.align	4
> +	.global	dt_strtok
> +	.type	dt_strtok, @function
> +dt_strtok:
> +/* the stack has the delimiter table starting at %fp+-32 */
> +#define DST_X [%fp+-40]
> +#define END_X [%fp+-48]
> +#define STR %r6
> +#define DEL %r7
> +#define LEN %r8
> +#define TMP %r9
> +
> +	/* stash variables */
> +	stxdw	DST_X, %r1
> +	mov	STR, %r2
> +	mov	DEL, %r3
> +	mov	TMP, %r4
> +
> +	/* discard delimiter length prefix; we look for the NULL terminator anyhow */
> +	add	DEL, DT_STRLEN_BYTES	/* del += DT_STRLEN_BYTES */
> +
> +	/* len = roundup(STRSZ + 1, 8) */
> +	lddw	LEN, STRSZ
> +	add	LEN, 8
> +	and	LEN, -8			/* len = ((STRSZ + 1) + 7) & -8 */
> +
> +	/* zero the delimiter table (do here so we see the stack usage) */
> +	mov	%r1, 0
> +	stxdw	[%fp+- 8], %r1		/* *((uint64_t *)(*fp +  -8)) = 0 */
> +	stxdw	[%fp+-16], %r1		/* *((uint64_t *)(*fp + -16)) = 0 */
> +	stxdw	[%fp+-24], %r1		/* *((uint64_t *)(*fp + -24)) = 0 */
> +	stxdw	[%fp+-32], %r1		/* *((uint64_t *)(*fp + -32)) = 0 */
> +
> +	/* initialize the delimiter table */
> +	mov	%r1, TMP
> +	lddw	%r2, STRSZ
> +	add	%r2, 1
> +	mov	%r3, DEL
> +	call	BPF_FUNC_probe_read_str	/* bpf_probe_read_str(tmp, STRSZ + 1, del) */
> +#undef DEL
> +	mov	%r1, %fp
> +	mov	%r2, TMP
> +	mov	%r3, LEN
> +	call	dt_strtok_init_table	/* dt_strtok_init_table(fp, tmp, len) */
> +
> +	/* copy str into tmp */
> +	mov	%r1, TMP
> +	lddw	%r2, STRSZ
> +	add	%r2, 1
> +	mov	%r3, STR
> +	add	%r3, 8
> +	ldxdw	%r4, [STR+0]
> +	jsge	%r4, 0, 1
> +	mov	%r4, 0
> +	add	%r3, %r4
> +	call	BPF_FUNC_probe_read_str	/* end = bpf_probe_read_str(tmp, STRSZ + 1, str+8+*((uint64_t*)str)) */
> +	sub	%r0, 1			/* end-- */
> +	stxdw	END_X, %r0
> +
> +	/* check each byte against delimiter table */
> +	mov	%r1, %fp
> +	mov	%r2, TMP
> +	mov	%r3, LEN
> +	call	dt_strtok_lookup	/* dt_strtok_lookup(fp, tmp, len) */
> +
> +	/* find first non-delimiting char */
> +#define BGN %r7
> +	ldxdw	%r1, DST_X
> +	lddw	%r2, STRSZ
> +	add	%r2, 1
> +	mov	%r3, TMP
> +	call	BPF_FUNC_probe_read_str	/* bgn = bpf_probe_read_str(dst, STRSZ + 1, tmp) */
> +	mov	BGN, %r0
> +	jsle	BGN, 0, .Lnull		/* if (bgn s<= 0) goto Lnull */
> +	sub	BGN, 1			/* bgn-- */
> +
> +	/* make sure there is at least one char */
> +	ldxdw	%r1, END_X
> +	jge	BGN, %r1, .Lnull	/* if (bgn >= end) goto Lnull */
> +
> +	/* flip each byte */
> +	mov	%r1, TMP
> +	mov	%r2, LEN
> +	call	dt_strtok_flip		/* dt_strtok_flip(tmp, len) */
> +
> +	/* find actual length */
> +	ldxdw	%r1, DST_X
> +	lddw	%r2, STRSZ
> +	add	%r2, 1
> +	sub	%r2, BGN
> +	mov	%r3, TMP
> +	add	%r3, BGN
> +	call	BPF_FUNC_probe_read_str	/* len = bpf_probe_read_str(dst, STRSZ + 1 - bgn, tmp + bgn) */
> +	mov	LEN, %r0
> +	jsle	LEN, 0, .Lnull		/* if (len s<= 0) goto Lnull */
> +	sub	LEN, 1			/* len-- */
> +
> +	/* adjust bgn to account for strtok offset */
> +	ldxdw	%r1, [STR+0]
> +	add	BGN, %r1		/* bgn += *((uint64_t*)str) */
> +
> +	/* (help the BPF verifier) */
> +	jsge	BGN, 0, 1
> +	mov	BGN, 0
> +
> +	/* copy str + bgn to destination */
> +	mov	%r1, LEN
> +	ldxdw	%r2, DST_X
> +	call	dt_strlen_store		/* dt_strlen_store(len, dst) */
> +
> +	ldxdw	%r1, DST_X
> +	add	%r1, DT_STRLEN_BYTES
> +	mov	%r2, LEN
> +	mov	%r3, STR
> +	add	%r3, 8
> +	add	%r3, BGN
> +	call	BPF_FUNC_probe_read	/* bpf_probe_read(dst + DT_STRLEN_BYTES, len, str + 8 + bgn) */
> +
> +	mov	%r1, 0
> +	ldxdw	%r2, DST_X
> +	add	%r2, LEN
> +	stxb	[%r2+DT_STRLEN_BYTES], %r1
> +					/* dst[DT_STRLEN_BYTES + len] = '\0' */
> +
> +	/* update the 8-byte prefix (strtok offset) */
> +	add	BGN, LEN		/* bgn += len */
> +	stxdw	[STR+0], BGN		/* *((uint64_t*)str) = bgn */
> +
> +	ldxdw	%r0, DST_X		/* return dst */
> +	exit
> +
> +	/* advance the strtok offset to the end and return 0 */
> +.Lnull:
> +	ldxdw	%r0, END_X
> +	stxdw	[STR+0], %r0		/* *((uint64_t*)str) = end */
> +	mov	%r0, 0			/* return 0 */
> +	exit
> +#undef STR
> +#undef TMP
> +#undef LEN
> +#undef BGN
> +#undef DST_X
> +	.size	dt_strtok, .-dt_strtok
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 3be73809..4cf8e97f 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -298,21 +298,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>  	 *	- maximum trace buffer record size, rounded up to the
>  	 *	  multiple of 8
>  	 *	- the greater of:
> -	 *		+ the maximum stack trace size
> -	 *		+ four times the maximum string size (incl. length
> -	 *		  and allowing round up to multiple of 8)
> -	 *		  plus the maximum string size (to accomodate the BPF
> -	 *		  verifier)
> +	 *		- the maximum stack trace size
> +	 *		- the size for all tstrings
> +	 *	- internal state for strtok()
> +	 *		- the offset index
> +	 *		- the maximum string size
> +	 *		- a terminating NULL char

Please follow the style of the existing comments, i.e. the main point should be
a size like "size of the internal strtok() state:" and then describe the items
in that, indicating their sizes.  So, the offset index should state how large
that data item is, etc.

>  	 */
>  	memsz = roundup(sizeof(dt_mstate_t), 8) +
>  		8 +
>  		roundup(dtp->dt_maxreclen, 8) +
>  		MAX(sizeof(uint64_t) * dtp->dt_options[DTRACEOPT_MAXFRAMES],
> -		    DT_TSTRING_SLOTS *
> -			roundup(DT_STRLEN_BYTES +
> -				dtp->dt_options[DTRACEOPT_STRSIZE], 8) +
> -		    dtp->dt_options[DTRACEOPT_STRSIZE] + 1
> -		);
> +		    DT_TSTRING_SLOTS * dtp->dt_tstringlen) +

I don't think a dtp->dt_tstringlen is warranted.  And if it were, it should be
dtp->dt_strsize or something like that because tstrings are simply string
values.  The size is not specific to tstrings.

> +		sizeof(uint64_t) + dtp->dt_options[DTRACEOPT_STRSIZE] + 1;

So, the temporary source string storage does not need to be rounded up to the
nearest multiple of 8?  And no need to accomodate the trailing NUL byte?  Why?

>  	if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
>  			sizeof(uint32_t), memsz, 1) == -1)
>  		return -1;		/* dt_errno is set for us */
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 99b4ac20..cd2a080b 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -154,6 +154,11 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>  	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, roundup(dtp->dt_maxreclen, 8)));
>  	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_MEM), BPF_REG_0));
>  
> +	/* store -1 ("uninitialized") in the offset prefix of strtok storage */
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, dtp->dt_tstrtok));
> +	emit(dlp,  BPF_MOV_IMM(BPF_REG_1, -1));
> +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_0, 0, BPF_REG_1));

It might require an extra instruction, but I think I would like it more if a
strtok state pointer were to be added to the mstate instead of having this
hardcoded offset approach in the code generator.

> +
>  	/*
>  	 * Store pointer to BPF map "name" in the DTrace context field "fld" at
>  	 * "offset".
> @@ -807,6 +812,16 @@ dt_cg_tstring_reset(dtrace_hdl_t *dtp)
>  	int		i;
>  	dt_tstring_t	*ts;
>  
> +	/*
> +	 * A tstring might be used to hold a string, along with its length
> +	 * prefix.  Alternatively, there may be a terminating NULL char.
> +	 *
> +	 * Some functions also process strings 8 chars at a time for
> +	 * greater efficiency.  So round up to a multiple of 8.
> +	 */
> +	dtp->dt_tstringlen = roundup(MAX(1, DT_STRLEN_BYTES) +
> +				     dtp->dt_options[DTRACEOPT_STRSIZE], 8);

This is wrong,  There is no alternatively...  Strings have a size prefix, their
character bytes, and a terminating NUL byte.  Anyway, as mentioned above, I am
posting a patch to put this size calculation outside the loop, but not as a
dtp member.  There may be a case to be made for a dtp->dt_strsize since we do
use the DT_STRLEN_BYTES + dtp->dt_options[DTRACEOPT_STRSIZE] + 1 value in
various places.  All probably should be rounded up to the nearest multiple of 8
since some of your functions want to go through the string in blocks of 8 bytes.

> +
>  	if (dtp->dt_tstrings == NULL) {
>  		dtp->dt_tstrings = dt_calloc(dtp, DT_TSTRING_SLOTS,
>  					    sizeof(dt_tstring_t));
> @@ -815,9 +830,7 @@ dt_cg_tstring_reset(dtrace_hdl_t *dtp)
>  
>  		ts = dtp->dt_tstrings;
>  		for (i = 0; i < DT_TSTRING_SLOTS; i++, ts++)
> -			ts->offset = i *
> -				roundup(DT_STRLEN_BYTES +
> -					dtp->dt_options[DTRACEOPT_STRSIZE], 8);
> +			ts->offset = i * dtp->dt_tstringlen;
>  	}
>  
>  	ts = dtp->dt_tstrings;
> @@ -3772,6 +3785,99 @@ dt_cg_subr_strstr(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>  	TRACE_REGSET("    subr-strstr:End  ");
>  }
>  
> +static void
> +dt_cg_subr_strtok(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> +{
> +	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> +	dt_node_t	*s1 = dnp->dn_args;
> +	dt_node_t	*s2 = s1->dn_list;

More descriptive names would be much better.  These are not just two arbitrary
strings  The first is the source string and the second is the delimiter list.
Using the name from the doc would be useful: str and delim.

> +	dt_ident_t	*idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_strtok");
> +	uint64_t	off;
> +	uint_t		Lhave_str = dt_irlist_label(dlp);
> +	int		reg;
> +
> +	assert(idp != NULL);
> +
> +	TRACE_REGSET("    subr-strtok:Begin");
> +
> +	/* get string and copy internally if not NULL */
> +	/* (the 8-byte prefix is the offset, which we initialize to 0) */
> +	dt_cg_node(s1, dlp, drp);
> +	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, s1->dn_reg, 0, Lhave_str));

Lhave_str seems to be a strange label to use for the case where str == NULL.
Perhaps using something like Lkeep_str or Lnext_token or something that
indicates this is the case where we have the string from a previous invocation.

> +	if (dt_regset_xalloc_args(drp) == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_1, DCTX_MEM));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, dtp->dt_tstrtok));

See earlier comment about using mstate (similar to the legacy version).  Also,
it would make for nicer code to allocate a reg to store the ptr to the strtok
state rather than calculating it here in %r1 and then doing that again further
down.

> +	emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_1, 0, 0));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8));
> +	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, dtp->dt_options[DTRACEOPT_STRSIZE] + 1));
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_3, s1->dn_reg));
> +	dt_regset_free(drp, s1->dn_reg);
> +	if (s1->dn_tstring)
> +		dt_cg_tstring_free(yypcb, s1);
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, DT_STRLEN_BYTES));
> +	dt_regset_xalloc(drp, BPF_REG_0);
> +	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_probe_read_str));
> +	dt_regset_free(drp, BPF_REG_0);
> +	dt_regset_free_args(drp);
> +	emitl(dlp, Lhave_str,
> +		   BPF_NOP());
> +
> +	/* make sure strtok internal storage is initialized; that is, not -1 */
> +	reg = dt_regset_alloc(drp);
> +	emit(dlp,  BPF_LOAD(BPF_DW, reg, BPF_REG_FP, DT_STK_DCTX));
> +	emit(dlp,  BPF_LOAD(BPF_DW, reg, reg, DCTX_MEM));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, reg, dtp->dt_tstrtok));

See comment above how this storing of ptr to the strtok state to a reg should
be done above and then used here rather than calculating it twice.

> +	emit(dlp,  BPF_LOAD(BPF_DW, reg, reg, 0));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, reg, 1));
> +	dt_cg_check_notnull(dlp, drp, reg);
> +	dt_regset_free(drp, reg);
> +
> +	/* get delimiters */
> +	dt_cg_node(s2, dlp, drp);
> +	dt_cg_check_notnull(dlp, drp, s2->dn_reg);
> +
> +	/* allocate temporary string for result */
> +	dnp->dn_reg = dt_regset_alloc(drp);
> +	if (dnp->dn_reg == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +	dt_cg_tstring_alloc(yypcb, dnp);
> +	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
> +	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
> +
> +	/* allocate temporary string for internal purposes */
> +	off = dt_cg_tstring_xalloc(yypcb);
> +
> +	/* function call */
> +
> +	if (dt_regset_xalloc_args(drp) == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dnp->dn_reg));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_2, BPF_REG_FP, DT_STK_DCTX));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_2, BPF_REG_2, DCTX_MEM));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, dtp->dt_tstrtok));

Third time you calculate this.

> +	emit(dlp,  BPF_MOV_REG(BPF_REG_3, s2->dn_reg));
> +	dt_regset_free(drp, s2->dn_reg);
> +	if (s2->dn_tstring)
> +		dt_cg_tstring_free(yypcb, s2);
> +
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off));
> +
> +	dt_regset_xalloc(drp, BPF_REG_0);
> +	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> +	dt_regset_free_args(drp);
> +	dt_cg_tstring_xfree(yypcb, off);
> +	emit(dlp,  BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
> +	dt_regset_free(drp, BPF_REG_0);
> +
> +	TRACE_REGSET("    subr-strtok:End  ");
> +}
> +
>  static void
>  dt_cg_subr_substr(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>  {
> @@ -3886,7 +3992,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
>  	[DIF_SUBR_STRCHR]		= &dt_cg_subr_strchr,
>  	[DIF_SUBR_STRRCHR]		= &dt_cg_subr_strrchr,
>  	[DIF_SUBR_STRSTR]		= &dt_cg_subr_strstr,
> -	[DIF_SUBR_STRTOK]		= NULL,
> +	[DIF_SUBR_STRTOK]		= &dt_cg_subr_strtok,
>  	[DIF_SUBR_SUBSTR]		= &dt_cg_subr_substr,
>  	[DIF_SUBR_INDEX]		= &dt_cg_subr_index,
>  	[DIF_SUBR_RINDEX]		= &dt_cg_subr_rindex,
> @@ -5676,17 +5782,21 @@ dt_cg(dt_pcb_t *pcb, dt_node_t *dnp)
>  {
>  	dt_xlator_t	*dxp = NULL;
>  	dt_node_t	*act;
> +	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
>  
>  	if (pcb->pcb_regs == NULL) {
>  		pcb->pcb_regs = dt_regset_create(
> -					pcb->pcb_hdl->dt_conf.dtc_difintregs,
> +					dtp->dt_conf.dtc_difintregs,
>  					dt_cg_spill_store, dt_cg_spill_load);
>  		if (pcb->pcb_regs == NULL)
>  			longjmp(pcb->pcb_jmpbuf, EDT_NOMEM);
>  	}
>  
>  	dt_regset_reset(pcb->pcb_regs);
> -	dt_cg_tstring_reset(pcb->pcb_hdl);
> +	dt_cg_tstring_reset(dtp);
> +
> +	dtp->dt_tstrtok = MAX(DT_TSTRING_SLOTS * dtp->dt_tstringlen,
> +	    sizeof(uint64_t) * dtp->dt_options[DTRACEOPT_MAXFRAMES]);

This should not be done for every dt_cg invocation.  It should be done once for
all compilations in a single dtrace invocation..

>  
>  	dt_irlist_destroy(&pcb->pcb_ir);
>  	dt_irlist_create(&pcb->pcb_ir);
> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> index 75c00e9e..50acc0dc 100644
> --- a/libdtrace/dt_dlibs.c
> +++ b/libdtrace/dt_dlibs.c
> @@ -66,6 +66,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
>  	DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_strrchr, DT_IDENT_SYMBOL),
> +	DT_BPF_SYMBOL(dt_strtok, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_substr, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_speculation, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_speculation_speculate, DT_IDENT_SYMBOL),
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index b461d73d..5682edec 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -296,6 +296,8 @@ struct dtrace_hdl {
>  	uint_t dt_maxreclen;	/* largest record size across programs */
>  	uint_t dt_maxlvaralloc;	/* largest lvar alloc across pcbs */
>  	dt_tstring_t *dt_tstrings; /* temporary string slots */
> +	size_t dt_tstringlen;	/* length of a tstring */

Not needed.

> +	uint64_t dt_tstrtok;	/* offset to internal state for strtok() calls */

I think it would make more sense to put the pointer to strtok data in mstate.

>  	dt_list_t dt_modlist;	/* linked list of dt_module_t's */
>  	dt_module_t **dt_mods;	/* hash table of dt_module_t's */
>  	uint_t dt_modbuckets;	/* number of module hash buckets */
> diff --git a/test/stress/fbtsafety/tst.shortstr.d b/test/stress/fbtsafety/tst.shortstr.d
> index 714d4833..dbf71b07 100644
> --- a/test/stress/fbtsafety/tst.shortstr.d
> +++ b/test/stress/fbtsafety/tst.shortstr.d
> @@ -4,7 +4,6 @@
>   * Licensed under the Universal Permissive License v 1.0 as shown at
>   * http://oss.oracle.com/licenses/upl.
>   */
> -/* @@xfail: dtv2 */
>  
>  #pragma D option quiet
>  #pragma D option strsize=16
> diff --git a/test/unittest/funcs/tst.strtok.d b/test/unittest/funcs/strtok/tst.strtok.d
> similarity index 100%
> rename from test/unittest/funcs/tst.strtok.d
> rename to test/unittest/funcs/strtok/tst.strtok.d
> diff --git a/test/unittest/funcs/tst.strtok.r b/test/unittest/funcs/strtok/tst.strtok.r
> similarity index 100%
> rename from test/unittest/funcs/tst.strtok.r
> rename to test/unittest/funcs/strtok/tst.strtok.r
> diff --git a/test/unittest/funcs/strtok/tst.strtok2.d b/test/unittest/funcs/strtok/tst.strtok2.d
> new file mode 100644
> index 00000000..14f34043
> --- /dev/null
> +++ b/test/unittest/funcs/strtok/tst.strtok2.d
> @@ -0,0 +1,39 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +#pragma D option quiet
> +
> +BEGIN
> +{
> +	x = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
> +	y = "abcABC";
> +	printf("%s\n", strtok(x, y));
> +	y = "MNO";
> +	printf("%s\n", strtok(NULL, y));
> +	printf("%s\n", strtok(NULL, y));
> +	y = "0123456789";
> +	printf("%s\n", strtok(NULL, y));
> +	printf("%s\n", strtok(NULL, y) == NULL ? "got expected NULL" : "ERROR: non-NULL");
> +
> +	x = "";
> +	y = "";
> +	printf("%s\n", strtok(x, y) == NULL ? "got expected NULL" : "ERROR: non-NULL");
> +
> +	x = "foo";
> +	printf("%s\n", strtok(x, y));
> +
> +	x = "";
> +	y = "foo";
> +	printf("%s\n", strtok(x, y) == NULL ? "got expected NULL" : "ERROR: non-NULL");
> +
> +	exit(0);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/funcs/strtok/tst.strtok2.r b/test/unittest/funcs/strtok/tst.strtok2.r
> new file mode 100644
> index 00000000..c58ea336
> --- /dev/null
> +++ b/test/unittest/funcs/strtok/tst.strtok2.r
> @@ -0,0 +1,9 @@
> +defghijklmnopqrstuvwxyz
> +ABCDEFGHIJKL
> +PQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKL
> +MNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
> +got expected NULL
> +got expected NULL
> +foo
> +got expected NULL
> +
> diff --git a/test/unittest/funcs/strtok/tst.strtok_long.d b/test/unittest/funcs/strtok/tst.strtok_long.d
> new file mode 100644
> index 00000000..1fbe415d
> --- /dev/null
> +++ b/test/unittest/funcs/strtok/tst.strtok_long.d
> @@ -0,0 +1,35 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +#pragma D option quiet
> +
> +BEGIN
> +{
> +	/* 256-char string ending in "XYZ" */
> +	x = "_____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________XYZ";
> +
> +	/* check whether the last char of a long string is seen */
> +	y = "a";
> +	printf("%s\n", strtok(x, y));
> +
> +	/* check whether the last char of a long delimiter string is seen */
> +	z = "zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA";
> +	printf("%s\n", strtok(z, x));
> +
> +	/* check that strtok internal state can handle a large offset */
> +	y = "Z";
> +	printf("%s\n", strtok(x, y));
> +	y = "a";
> +	printf("%s\n", strtok(NULL, y));
> +
> +	exit(0);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/funcs/strtok/tst.strtok_long.r b/test/unittest/funcs/strtok/tst.strtok_long.r
> new file mode 100644
> index 00000000..a752554e
> --- /dev/null
> +++ b/test/unittest/funcs/strtok/tst.strtok_long.r
> @@ -0,0 +1,5 @@
> +_____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________XYZ
> +zyxwvutsrqponmlkjihgfedcba
> +_____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________XY
> +Z
> +
> diff --git a/test/unittest/funcs/tst.strtok_null.d b/test/unittest/funcs/strtok/tst.strtok_null.d
> similarity index 79%
> rename from test/unittest/funcs/tst.strtok_null.d
> rename to test/unittest/funcs/strtok/tst.strtok_null.d
> index 45301a78..1d536d60 100644
> --- a/test/unittest/funcs/tst.strtok_null.d
> +++ b/test/unittest/funcs/strtok/tst.strtok_null.d
> @@ -1,10 +1,9 @@
>  /*
>   * Oracle Linux DTrace.
> - * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
>   * Licensed under the Universal Permissive License v 1.0 as shown at
>   * http://oss.oracle.com/licenses/upl.
>   */
> -/* @@xfail: dtv2 */
>  
>  /*
>   * Test that a strtok(NULL, ...) without first calling strtok(string, ...)
> diff --git a/test/unittest/funcs/strtok/tst.strtok_null.r b/test/unittest/funcs/strtok/tst.strtok_null.r
> new file mode 100644
> index 00000000..4002d6ef
> --- /dev/null
> +++ b/test/unittest/funcs/strtok/tst.strtok_null.r
> @@ -0,0 +1,6 @@
> +                   FUNCTION:NAME
> +                          :ERROR 
> +
> +-- @@stderr --
> +dtrace: script 'test/unittest/funcs/strtok/tst.strtok_null.d' matched 2 probes
> +dtrace: error on enabled probe ID 3 (ID 1: dtrace:::BEGIN): invalid address ({ptr}) in action #1
> diff --git a/test/unittest/funcs/tst.strtok_null.r b/test/unittest/funcs/tst.strtok_null.r
> deleted file mode 100644
> index a1f82f91..00000000
> --- a/test/unittest/funcs/tst.strtok_null.r
> +++ /dev/null
> @@ -1,6 +0,0 @@
> -                   FUNCTION:NAME
> -                          :ERROR 
> -
> --- @@stderr --
> -dtrace: script 'test/unittest/funcs/tst.strtok_null.d' matched 2 probes
> -dtrace: error on enabled probe ID 1 (ID 1: dtrace:::BEGIN): invalid address ({ptr}) in action #1 at DIF offset 40
> -- 
> 2.18.4
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list