[DTrace-devel] [PATCH] Add support for strtok() subroutine
eugene.loh at oracle.com
eugene.loh at oracle.com
Mon Nov 8 19:50:44 UTC 2021
From: Eugene Loh <eugene.loh at oracle.com>
Note that the space for tstrings used to allocate an extra tstring at
the end in order to pacify the BPF verifier. Use this extra space
for the internal storage needed by strtok() between calls. Since a
stack() call might occur between strtok() calls, this space can no
longer overlap the stack scratch space.
Compute the tstring length once -- in dt_cg_tstring_reset -- so that
its value does not need to be maintained in multiple places.
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
bpf/Build | 1 +
bpf/strtok.S | 498 ++++++++++++++++++
libdtrace/dt_bpf.c | 18 +-
libdtrace/dt_cg.c | 122 ++++-
libdtrace/dt_dlibs.c | 1 +
libdtrace/dt_impl.h | 2 +
test/stress/fbtsafety/tst.shortstr.d | 1 -
test/unittest/funcs/{ => strtok}/tst.strtok.d | 0
test/unittest/funcs/{ => strtok}/tst.strtok.r | 0
test/unittest/funcs/strtok/tst.strtok2.d | 39 ++
test/unittest/funcs/strtok/tst.strtok2.r | 9 +
test/unittest/funcs/strtok/tst.strtok_long.d | 35 ++
test/unittest/funcs/strtok/tst.strtok_long.r | 5 +
.../funcs/{ => strtok}/tst.strtok_null.d | 3 +-
test/unittest/funcs/strtok/tst.strtok_null.r | 6 +
test/unittest/funcs/tst.strtok_null.r | 6 -
16 files changed, 721 insertions(+), 25 deletions(-)
create mode 100644 bpf/strtok.S
rename test/unittest/funcs/{ => strtok}/tst.strtok.d (100%)
rename test/unittest/funcs/{ => strtok}/tst.strtok.r (100%)
create mode 100644 test/unittest/funcs/strtok/tst.strtok2.d
create mode 100644 test/unittest/funcs/strtok/tst.strtok2.r
create mode 100644 test/unittest/funcs/strtok/tst.strtok_long.d
create mode 100644 test/unittest/funcs/strtok/tst.strtok_long.r
rename test/unittest/funcs/{ => strtok}/tst.strtok_null.d (79%)
create mode 100644 test/unittest/funcs/strtok/tst.strtok_null.r
delete mode 100644 test/unittest/funcs/tst.strtok_null.r
diff --git a/bpf/Build b/bpf/Build
index 5c1bc235..d8279411 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -35,6 +35,7 @@ bpf_dlib_SOURCES = \
strjoin.S \
strlen.c \
strrchr.S \
+ strtok.S \
substr.S
bpf-check: $(objdir)/include/.dir.stamp
diff --git a/bpf/strtok.S b/bpf/strtok.S
new file mode 100644
index 00000000..9b2563e0
--- /dev/null
+++ b/bpf/strtok.S
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define DT_STRLEN_BYTES 2
+
+#define BPF_FUNC_probe_read 4
+#define BPF_FUNC_probe_read_str 45
+
+ .text
+
+/*
+ * The algorithm stores information about the delimiters in a table for
+ * 256 ASCII chars, each table entry a single bit indicating whether the
+ * char is a delimiter. This version stores the 256-bit (32-byte) table
+ * on the stack as four, 8-byte bins. The bit for char c is bit c&63 in
+ * bin c>>6. The bins are stored at:
+ * bin 0 [TAB+- 8]
+ * bin 1 [TAB+-16]
+ * bin 2 [TAB+-24]
+ * bin 3 [TAB+-32]
+ * where TAB will be dt_strtok()'s FP.
+ */
+
+/* ================================================== */
+
+/*
+ * // Initialize the delimiter table (place a nonzero bit for each delimiter found).
+ * // Note that:
+ * // - we expect the delimiter string to be NULL-terminated
+ * // - the NULL byte is always going to be set in the delimiter table
+ * // - we also pass a "length" argument in to help the BPF verifier
+ * void dt_strtok_init_table(uint64_t *tab, char *del, uint64_t len)
+ * {
+ * idx = 0
+ * Linit_table:
+ * if (idx >= len) return
+ * chr = del[idx]
+ * bit = (1 << (chr & 63))
+ * bin = chr >> 6
+ * tab[bin] |= bit
+ * if (chr == '\0') return
+ * idx++
+ * goto Linit_table
+ * }
+ */
+ .align 4
+ .global dt_strtok_init_table
+ .type dt_strtok_init_table, @function
+dt_strtok_init_table:
+#define TAB %r1
+#define DEL %r2
+#define LEN %r3
+#define IDX %r4
+#define CHR %r5
+#define BIN %r6
+#define BIT %r7
+#define TMP %r8
+#define USE %r9
+ mov IDX, 0 /* idx = 0 */
+
+.Linit_table:
+ jlt IDX, LEN, 1 /* if (idx >= len) return */
+ exit
+
+ mov CHR, DEL
+ add CHR, IDX
+ ldxb CHR, [CHR+0] /* chr = del[idx] */
+ and CHR, 0xff
+
+ mov BIT, 1 /* bit = (1 << (chr & 63)) */
+ mov TMP, CHR
+ and TMP, 63
+ lsh BIT, TMP
+
+ mov BIN, CHR /* bin = chr >> 6 */
+ rsh BIN, 6
+
+ /* tab[bin] |= bit */
+ /*
+ * Since the table is on the stack and we cannot use variable
+ * offsets into the stack, we iterate over all four bins:
+ * tab[- 8] |= (0 == bin ? 1 : 0) * bit
+ * tab[-16] |= (1 == bin ? 1 : 0) * bit
+ * tab[-24] |= (2 == bin ? 1 : 0) * bit
+ * tab[-32] |= (3 == bin ? 1 : 0) * bit
+ *
+ * Further, since the BPF verifier is easily stymied by conditional
+ * branching, we compute the conditionalized operators with:
+ * // try == bin try != bin
+ * use = (try ^ bin) // use == 0 use > 0
+ * use-- // use < 0 use >= 0
+ * use >>= 63 // use == 1 use == 0
+ * tab[off] |= use * bit
+ */
+.macro macro_init try off
+ mov USE, BIN
+ xor USE, \try
+ sub USE, 1
+ rsh USE, 63
+ mul USE, BIT
+
+ ldxdw TMP, [TAB+\off]
+ or TMP, USE
+ stxdw [TAB+\off], TMP
+.endm
+ macro_init 0,-8
+ macro_init 1,-16
+ macro_init 2,-24
+ macro_init 3,-32
+
+ jne CHR, 0, 1 /* if (chr == '\0') return */
+ exit
+
+ add IDX, 1 /* idx++ */
+
+ ja .Linit_table /* goto Linit_table */
+#undef TAB
+#undef DEL
+#undef LEN
+#undef IDX
+#undef CHR
+#undef BIN
+#undef BIT
+#undef TMP
+#undef USE
+ .size dt_strtok_init_table, .-dt_strtok_init_table
+
+/* ================================================== */
+
+/*
+ * // Look up each byte, replacing it by 1 or 0 using the table.
+ * void dt_strtok_lookup(uint64_t *tab, char *str, uint64_t len)
+ * {
+ * idx = 0
+ * Llookup:
+ * if (idx >= len) return
+ * ptr = &str[idx]
+ * bin = *ptr >> 6
+ * val = tab[bin]
+ * val >>= *ptr & 63
+ * val &= 1
+ * *ptr = val
+ * idx++
+ * goto Llookup
+ * }
+ */
+ .align 4
+ .global dt_strtok_lookup
+ .type dt_strtok_lookup, @function
+dt_strtok_lookup:
+
+#define VAL %r0
+#define TAB %r1
+#define STR %r2
+#define LEN %r3
+#define BIN %r4
+#define IDX %r5
+#define PTR %r6
+#define TMP %r7
+#define USE %r8
+
+ mov IDX, 0 /* idx = 0 */
+
+.Llookup:
+ jlt IDX, LEN, 1 /* if (idx >= len) return */
+ exit
+
+ mov PTR, STR
+ add PTR, IDX /* ptr = &str[idx] */
+
+ ldxb BIN, [PTR+0]
+ and BIN, 255
+ rsh BIN, 6 /* bin = *ptr >> 6 */
+
+ /* val = tab[bin] */
+ /*
+ * The tab[bin] lookup is tricky for the same reasons as
+ * described for dt_strtok_init_table(). This time, we use
+ * val = 0
+ * val += (0 == bin ? 1 : 0) * tab[- 8]
+ * val += (1 == bin ? 1 : 0) * tab[-16]
+ * val += (2 == bin ? 1 : 0) * tab[-24]
+ * val += (3 == bin ? 1 : 0) * tab[-32]
+ * with each conditionalized operator replaced by
+ * // try == bin try != bin
+ * use = (try ^ bin) // use == 0 use > 0
+ * use-- // use < 0 use >= 0
+ * use >>= 63 // use == 1 use == 0
+ * val += use * tab[off]
+ */
+ mov VAL, 0 /* val = 0 */
+.macro macro_look try off
+ mov USE, \try
+ xor USE, BIN /* use = try ^ bin */
+ sub USE, 1 /* use-- */
+ rsh USE, 63 /* use >>= 63 */
+ ldxdw TMP, [TAB+\off]
+ mul TMP, USE
+ add VAL, TMP /* val += use * tab[try] */
+.endm
+ macro_look 0,-8
+ macro_look 1,-16
+ macro_look 2,-24
+ macro_look 3,-32
+
+ ldxb TMP, [PTR+0]
+ and TMP, 63
+ rsh VAL, TMP /* val >>= *ptr & 63 */
+
+ and VAL, 1 /* val &= 1 */
+
+ stxb [PTR+0], VAL /* *ptr = val */
+
+ add IDX, 1 /* idx++ */
+
+ ja .Llookup /* goto Llookup */
+#undef VAL
+#undef TAB
+#undef STR
+#undef LEN
+#undef BIN
+#undef IDX
+#undef PTR
+#undef TMP
+#undef USE
+ .size dt_strtok_lookup, .-dt_strtok_lookup
+
+/* ================================================== */
+
+/*
+ * // Each byte is 1 or 0, flip its value.
+ * // Do 8 bytes at a time; len must be a multiple of 8.
+ * void dt_strtok_flip(char *str, uint64_t len)
+ * {
+ * msk = 0x0101010101010101
+ * idx = 0
+ * Lflip:
+ * if (idx >= len) return
+ * ptr = (uint64_t*)&str[idx]
+ * val = *ptr
+ * val ^= msk
+ * *ptr = val
+ * idx += 8
+ * goto Lflip
+ * }
+ */
+ .align 4
+ .global dt_strtok_flip
+ .type dt_strtok_flip, @function
+dt_strtok_flip:
+
+#define STR %r1
+#define LEN %r2
+#define MSK %r3
+#define IDX %r4
+#define PTR %r5
+#define VAL %r6
+
+ lddw MSK, 0x0101010101010101 /* msk = 0x0101010101010101 */
+ mov IDX, 0 /* idx = 0 */
+
+.Lflip:
+ jlt IDX, LEN, 1 /* if (idx >= siz) return */
+ exit
+
+ mov PTR, STR
+ add PTR, IDX /* ptr = (uint64_t*)&str[idx] */
+
+ ldxdw VAL, [PTR+0] /* val = *ptr */
+ xor VAL, MSK /* val ^= msk */
+ stxdw [PTR+0], VAL /* *ptr = val */
+
+ add IDX, 8 /* idx += 8 */
+
+ ja .Lflip /* goto Lflip */
+#undef STR
+#undef LEN
+#undef MSK
+#undef IDX
+#undef PTR
+#undef VAL
+ .size dt_strtok_flip, .-dt_strtok_flip
+
+/* ================================================== */
+
+/*
+ * // str has an 8-byte prefix giving the offset to start at.
+ * // So the actual string starts at str+8+*((uint64_t*)str).
+ * void dt_strtok(char *dst, char *str, const char *del, char *tmp)
+ * {
+ * // discard delimiter length prefix; we look for the NULL terminator anyhow
+ * del += DT_STRLEN_BYTES
+ *
+ * // len = roundup(STRSZ + 1, 8)
+ * len = ((STRSZ + 1) + 7) & -8
+ *
+ * // zero the delimiter table (do here so we see the stack usage)
+ * *((uint64_t *)(*fp + -8)) = 0
+ * *((uint64_t *)(*fp + -16)) = 0
+ * *((uint64_t *)(*fp + -24)) = 0
+ * *((uint64_t *)(*fp + -32)) = 0
+ *
+ * // initialize the delimiter table
+ * // Note:
+ * // - we provide "len" as a hint to the BPF verifier
+ * // - we copy "del" into "tmp" to guarantee "len" room
+ * bpf_probe_read_str(tmp, STRSZ + 1, del)
+ * dt_strtok_init_table(fp, tmp, len)
+ *
+ * // copy str into tmp
+ * end = bpf_probe_read_str(tmp, STRSZ + 1, str+8+*((uint64_t*)str))
+ * end--
+ *
+ * // check each byte against delimiter table
+ * dt_strtok_lookup(fp, tmp, len)
+ *
+ * // find first non-delimiting char
+ * bgn = bpf_probe_read_str(dst, STRSZ + 1, tmp)
+ * if (bgn s<= 0) goto Lnull
+ * bgn--
+ *
+ * // make sure there is at least one char
+ * if (bgn >= end) goto Lnull
+ *
+ * // flip each byte
+ * dt_strtok_flip(tmp, len)
+ *
+ * // find actual length
+ * len = bpf_probe_read_str(dst, STRSZ + 1 - bgn, tmp + bgn)
+ * if (len s<= 0) goto Lnull
+ * len--
+ *
+ * // adjust bgn to account for strtok offset
+ * bgn += *((uint64_t*)str)
+ *
+ * // copy str + bgn to destination
+ * dt_strlen_store(len, dst)
+ * bpf_probe_read(dst + DT_STRLEN_BYTES, len, str + 8 + bgn)
+ * dst[DT_STRLEN_BYTES + len] = '\0'
+ *
+ * // update the 8-byte prefix (strtok offset)
+ * bgn += len
+ * *((uint64_t*)str) = bgn
+ *
+ * return dst
+ *
+ * Lnull:
+ * // advance the strtok offset to the end and return 0
+ * *((uint64_t*)str) = end
+ * return 0
+ * }
+ */
+
+ .align 4
+ .global dt_strtok
+ .type dt_strtok, @function
+dt_strtok:
+/* the stack has the delimiter table starting at %fp+-32 */
+#define DST_X [%fp+-40]
+#define END_X [%fp+-48]
+#define STR %r6
+#define DEL %r7
+#define LEN %r8
+#define TMP %r9
+
+ /* stash variables */
+ stxdw DST_X, %r1
+ mov STR, %r2
+ mov DEL, %r3
+ mov TMP, %r4
+
+ /* discard delimiter length prefix; we look for the NULL terminator anyhow */
+ add DEL, DT_STRLEN_BYTES /* del += DT_STRLEN_BYTES */
+
+ /* len = roundup(STRSZ + 1, 8) */
+ lddw LEN, STRSZ
+ add LEN, 8
+ and LEN, -8 /* len = ((STRSZ + 1) + 7) & -8 */
+
+ /* zero the delimiter table (do here so we see the stack usage) */
+ mov %r1, 0
+ stxdw [%fp+- 8], %r1 /* *((uint64_t *)(*fp + -8)) = 0 */
+ stxdw [%fp+-16], %r1 /* *((uint64_t *)(*fp + -16)) = 0 */
+ stxdw [%fp+-24], %r1 /* *((uint64_t *)(*fp + -24)) = 0 */
+ stxdw [%fp+-32], %r1 /* *((uint64_t *)(*fp + -32)) = 0 */
+
+ /* initialize the delimiter table */
+ mov %r1, TMP
+ lddw %r2, STRSZ
+ add %r2, 1
+ mov %r3, DEL
+ call BPF_FUNC_probe_read_str /* bpf_probe_read_str(tmp, STRSZ + 1, del) */
+#undef DEL
+ mov %r1, %fp
+ mov %r2, TMP
+ mov %r3, LEN
+ call dt_strtok_init_table /* dt_strtok_init_table(fp, tmp, len) */
+
+ /* copy str into tmp */
+ mov %r1, TMP
+ lddw %r2, STRSZ
+ add %r2, 1
+ mov %r3, STR
+ add %r3, 8
+ ldxdw %r4, [STR+0]
+ jsge %r4, 0, 1
+ mov %r4, 0
+ add %r3, %r4
+ call BPF_FUNC_probe_read_str /* end = bpf_probe_read_str(tmp, STRSZ + 1, str+8+*((uint64_t*)str)) */
+ sub %r0, 1 /* end-- */
+ stxdw END_X, %r0
+
+ /* check each byte against delimiter table */
+ mov %r1, %fp
+ mov %r2, TMP
+ mov %r3, LEN
+ call dt_strtok_lookup /* dt_strtok_lookup(fp, tmp, len) */
+
+ /* find first non-delimiting char */
+#define BGN %r7
+ ldxdw %r1, DST_X
+ lddw %r2, STRSZ
+ add %r2, 1
+ mov %r3, TMP
+ call BPF_FUNC_probe_read_str /* bgn = bpf_probe_read_str(dst, STRSZ + 1, tmp) */
+ mov BGN, %r0
+ jsle BGN, 0, .Lnull /* if (bgn s<= 0) goto Lnull */
+ sub BGN, 1 /* bgn-- */
+
+ /* make sure there is at least one char */
+ ldxdw %r1, END_X
+ jge BGN, %r1, .Lnull /* if (bgn >= end) goto Lnull */
+
+ /* flip each byte */
+ mov %r1, TMP
+ mov %r2, LEN
+ call dt_strtok_flip /* dt_strtok_flip(tmp, len) */
+
+ /* find actual length */
+ ldxdw %r1, DST_X
+ lddw %r2, STRSZ
+ add %r2, 1
+ sub %r2, BGN
+ mov %r3, TMP
+ add %r3, BGN
+ call BPF_FUNC_probe_read_str /* len = bpf_probe_read_str(dst, STRSZ + 1 - bgn, tmp + bgn) */
+ mov LEN, %r0
+ jsle LEN, 0, .Lnull /* if (len s<= 0) goto Lnull */
+ sub LEN, 1 /* len-- */
+
+ /* adjust bgn to account for strtok offset */
+ ldxdw %r1, [STR+0]
+ add BGN, %r1 /* bgn += *((uint64_t*)str) */
+
+ /* (help the BPF verifier) */
+ jsge BGN, 0, 1
+ mov BGN, 0
+
+ /* copy str + bgn to destination */
+ mov %r1, LEN
+ ldxdw %r2, DST_X
+ call dt_strlen_store /* dt_strlen_store(len, dst) */
+
+ ldxdw %r1, DST_X
+ add %r1, DT_STRLEN_BYTES
+ mov %r2, LEN
+ mov %r3, STR
+ add %r3, 8
+ add %r3, BGN
+ call BPF_FUNC_probe_read /* bpf_probe_read(dst + DT_STRLEN_BYTES, len, str + 8 + bgn) */
+
+ mov %r1, 0
+ ldxdw %r2, DST_X
+ add %r2, LEN
+ stxb [%r2+DT_STRLEN_BYTES], %r1
+ /* dst[DT_STRLEN_BYTES + len] = '\0' */
+
+ /* update the 8-byte prefix (strtok offset) */
+ add BGN, LEN /* bgn += len */
+ stxdw [STR+0], BGN /* *((uint64_t*)str) = bgn */
+
+ ldxdw %r0, DST_X /* return dst */
+ exit
+
+ /* advance the strtok offset to the end and return 0 */
+.Lnull:
+ ldxdw %r0, END_X
+ stxdw [STR+0], %r0 /* *((uint64_t*)str) = end */
+ mov %r0, 0 /* return 0 */
+ exit
+#undef STR
+#undef TMP
+#undef LEN
+#undef BGN
+#undef DST_X
+ .size dt_strtok, .-dt_strtok
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index 3be73809..4cf8e97f 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -298,21 +298,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
* - maximum trace buffer record size, rounded up to the
* multiple of 8
* - the greater of:
- * + the maximum stack trace size
- * + four times the maximum string size (incl. length
- * and allowing round up to multiple of 8)
- * plus the maximum string size (to accomodate the BPF
- * verifier)
+ * - the maximum stack trace size
+ * - the size for all tstrings
+ * - internal state for strtok()
+ * - the offset index
+ * - the maximum string size
+ * - a terminating NULL char
*/
memsz = roundup(sizeof(dt_mstate_t), 8) +
8 +
roundup(dtp->dt_maxreclen, 8) +
MAX(sizeof(uint64_t) * dtp->dt_options[DTRACEOPT_MAXFRAMES],
- DT_TSTRING_SLOTS *
- roundup(DT_STRLEN_BYTES +
- dtp->dt_options[DTRACEOPT_STRSIZE], 8) +
- dtp->dt_options[DTRACEOPT_STRSIZE] + 1
- );
+ DT_TSTRING_SLOTS * dtp->dt_tstringlen) +
+ sizeof(uint64_t) + dtp->dt_options[DTRACEOPT_STRSIZE] + 1;
if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
sizeof(uint32_t), memsz, 1) == -1)
return -1; /* dt_errno is set for us */
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 99b4ac20..cd2a080b 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -154,6 +154,11 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, roundup(dtp->dt_maxreclen, 8)));
emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DCTX_FP(DCTX_MEM), BPF_REG_0));
+ /* store -1 ("uninitialized") in the offset prefix of strtok storage */
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, dtp->dt_tstrtok));
+ emit(dlp, BPF_MOV_IMM(BPF_REG_1, -1));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, 0, BPF_REG_1));
+
/*
* Store pointer to BPF map "name" in the DTrace context field "fld" at
* "offset".
@@ -807,6 +812,16 @@ dt_cg_tstring_reset(dtrace_hdl_t *dtp)
int i;
dt_tstring_t *ts;
+ /*
+ * A tstring might be used to hold a string, along with its length
+ * prefix. Alternatively, there may be a terminating NULL char.
+ *
+ * Some functions also process strings 8 chars at a time for
+ * greater efficiency. So round up to a multiple of 8.
+ */
+ dtp->dt_tstringlen = roundup(MAX(1, DT_STRLEN_BYTES) +
+ dtp->dt_options[DTRACEOPT_STRSIZE], 8);
+
if (dtp->dt_tstrings == NULL) {
dtp->dt_tstrings = dt_calloc(dtp, DT_TSTRING_SLOTS,
sizeof(dt_tstring_t));
@@ -815,9 +830,7 @@ dt_cg_tstring_reset(dtrace_hdl_t *dtp)
ts = dtp->dt_tstrings;
for (i = 0; i < DT_TSTRING_SLOTS; i++, ts++)
- ts->offset = i *
- roundup(DT_STRLEN_BYTES +
- dtp->dt_options[DTRACEOPT_STRSIZE], 8);
+ ts->offset = i * dtp->dt_tstringlen;
}
ts = dtp->dt_tstrings;
@@ -3772,6 +3785,99 @@ dt_cg_subr_strstr(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
TRACE_REGSET(" subr-strstr:End ");
}
+static void
+dt_cg_subr_strtok(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
+{
+ dtrace_hdl_t *dtp = yypcb->pcb_hdl;
+ dt_node_t *s1 = dnp->dn_args;
+ dt_node_t *s2 = s1->dn_list;
+ dt_ident_t *idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_strtok");
+ uint64_t off;
+ uint_t Lhave_str = dt_irlist_label(dlp);
+ int reg;
+
+ assert(idp != NULL);
+
+ TRACE_REGSET(" subr-strtok:Begin");
+
+ /* get string and copy internally if not NULL */
+ /* (the 8-byte prefix is the offset, which we initialize to 0) */
+ dt_cg_node(s1, dlp, drp);
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, s1->dn_reg, 0, Lhave_str));
+ if (dt_regset_xalloc_args(drp) == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_1, DCTX_MEM));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, dtp->dt_tstrtok));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_1, 0, 0));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8));
+ emit(dlp, BPF_MOV_IMM(BPF_REG_2, dtp->dt_options[DTRACEOPT_STRSIZE] + 1));
+ emit(dlp, BPF_MOV_REG(BPF_REG_3, s1->dn_reg));
+ dt_regset_free(drp, s1->dn_reg);
+ if (s1->dn_tstring)
+ dt_cg_tstring_free(yypcb, s1);
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, DT_STRLEN_BYTES));
+ dt_regset_xalloc(drp, BPF_REG_0);
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read_str));
+ dt_regset_free(drp, BPF_REG_0);
+ dt_regset_free_args(drp);
+ emitl(dlp, Lhave_str,
+ BPF_NOP());
+
+ /* make sure strtok internal storage is initialized; that is, not -1 */
+ reg = dt_regset_alloc(drp);
+ emit(dlp, BPF_LOAD(BPF_DW, reg, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, reg, reg, DCTX_MEM));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, reg, dtp->dt_tstrtok));
+ emit(dlp, BPF_LOAD(BPF_DW, reg, reg, 0));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, reg, 1));
+ dt_cg_check_notnull(dlp, drp, reg);
+ dt_regset_free(drp, reg);
+
+ /* get delimiters */
+ dt_cg_node(s2, dlp, drp);
+ dt_cg_check_notnull(dlp, drp, s2->dn_reg);
+
+ /* allocate temporary string for result */
+ dnp->dn_reg = dt_regset_alloc(drp);
+ if (dnp->dn_reg == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+ dt_cg_tstring_alloc(yypcb, dnp);
+ emit(dlp, BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
+
+ /* allocate temporary string for internal purposes */
+ off = dt_cg_tstring_xalloc(yypcb);
+
+ /* function call */
+
+ if (dt_regset_xalloc_args(drp) == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
+ emit(dlp, BPF_MOV_REG(BPF_REG_1, dnp->dn_reg));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_2, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_2, BPF_REG_2, DCTX_MEM));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, dtp->dt_tstrtok));
+ emit(dlp, BPF_MOV_REG(BPF_REG_3, s2->dn_reg));
+ dt_regset_free(drp, s2->dn_reg);
+ if (s2->dn_tstring)
+ dt_cg_tstring_free(yypcb, s2);
+
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off));
+
+ dt_regset_xalloc(drp, BPF_REG_0);
+ emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
+ dt_regset_free_args(drp);
+ dt_cg_tstring_xfree(yypcb, off);
+ emit(dlp, BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
+ dt_regset_free(drp, BPF_REG_0);
+
+ TRACE_REGSET(" subr-strtok:End ");
+}
+
static void
dt_cg_subr_substr(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
{
@@ -3886,7 +3992,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
[DIF_SUBR_STRCHR] = &dt_cg_subr_strchr,
[DIF_SUBR_STRRCHR] = &dt_cg_subr_strrchr,
[DIF_SUBR_STRSTR] = &dt_cg_subr_strstr,
- [DIF_SUBR_STRTOK] = NULL,
+ [DIF_SUBR_STRTOK] = &dt_cg_subr_strtok,
[DIF_SUBR_SUBSTR] = &dt_cg_subr_substr,
[DIF_SUBR_INDEX] = &dt_cg_subr_index,
[DIF_SUBR_RINDEX] = &dt_cg_subr_rindex,
@@ -5676,17 +5782,21 @@ dt_cg(dt_pcb_t *pcb, dt_node_t *dnp)
{
dt_xlator_t *dxp = NULL;
dt_node_t *act;
+ dtrace_hdl_t *dtp = pcb->pcb_hdl;
if (pcb->pcb_regs == NULL) {
pcb->pcb_regs = dt_regset_create(
- pcb->pcb_hdl->dt_conf.dtc_difintregs,
+ dtp->dt_conf.dtc_difintregs,
dt_cg_spill_store, dt_cg_spill_load);
if (pcb->pcb_regs == NULL)
longjmp(pcb->pcb_jmpbuf, EDT_NOMEM);
}
dt_regset_reset(pcb->pcb_regs);
- dt_cg_tstring_reset(pcb->pcb_hdl);
+ dt_cg_tstring_reset(dtp);
+
+ dtp->dt_tstrtok = MAX(DT_TSTRING_SLOTS * dtp->dt_tstringlen,
+ sizeof(uint64_t) * dtp->dt_options[DTRACEOPT_MAXFRAMES]);
dt_irlist_destroy(&pcb->pcb_ir);
dt_irlist_create(&pcb->pcb_ir);
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index 75c00e9e..50acc0dc 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -66,6 +66,7 @@ static const dt_ident_t dt_bpf_symbols[] = {
DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strrchr, DT_IDENT_SYMBOL),
+ DT_BPF_SYMBOL(dt_strtok, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_substr, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_speculation, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_speculation_speculate, DT_IDENT_SYMBOL),
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index b461d73d..5682edec 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -296,6 +296,8 @@ struct dtrace_hdl {
uint_t dt_maxreclen; /* largest record size across programs */
uint_t dt_maxlvaralloc; /* largest lvar alloc across pcbs */
dt_tstring_t *dt_tstrings; /* temporary string slots */
+ size_t dt_tstringlen; /* length of a tstring */
+ uint64_t dt_tstrtok; /* offset to internal state for strtok() calls */
dt_list_t dt_modlist; /* linked list of dt_module_t's */
dt_module_t **dt_mods; /* hash table of dt_module_t's */
uint_t dt_modbuckets; /* number of module hash buckets */
diff --git a/test/stress/fbtsafety/tst.shortstr.d b/test/stress/fbtsafety/tst.shortstr.d
index 714d4833..dbf71b07 100644
--- a/test/stress/fbtsafety/tst.shortstr.d
+++ b/test/stress/fbtsafety/tst.shortstr.d
@@ -4,7 +4,6 @@
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
#pragma D option quiet
#pragma D option strsize=16
diff --git a/test/unittest/funcs/tst.strtok.d b/test/unittest/funcs/strtok/tst.strtok.d
similarity index 100%
rename from test/unittest/funcs/tst.strtok.d
rename to test/unittest/funcs/strtok/tst.strtok.d
diff --git a/test/unittest/funcs/tst.strtok.r b/test/unittest/funcs/strtok/tst.strtok.r
similarity index 100%
rename from test/unittest/funcs/tst.strtok.r
rename to test/unittest/funcs/strtok/tst.strtok.r
diff --git a/test/unittest/funcs/strtok/tst.strtok2.d b/test/unittest/funcs/strtok/tst.strtok2.d
new file mode 100644
index 00000000..14f34043
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok2.d
@@ -0,0 +1,39 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+#pragma D option quiet
+
+BEGIN
+{
+ x = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ y = "abcABC";
+ printf("%s\n", strtok(x, y));
+ y = "MNO";
+ printf("%s\n", strtok(NULL, y));
+ printf("%s\n", strtok(NULL, y));
+ y = "0123456789";
+ printf("%s\n", strtok(NULL, y));
+ printf("%s\n", strtok(NULL, y) == NULL ? "got expected NULL" : "ERROR: non-NULL");
+
+ x = "";
+ y = "";
+ printf("%s\n", strtok(x, y) == NULL ? "got expected NULL" : "ERROR: non-NULL");
+
+ x = "foo";
+ printf("%s\n", strtok(x, y));
+
+ x = "";
+ y = "foo";
+ printf("%s\n", strtok(x, y) == NULL ? "got expected NULL" : "ERROR: non-NULL");
+
+ exit(0);
+}
+
+ERROR
+{
+ exit(1);
+}
diff --git a/test/unittest/funcs/strtok/tst.strtok2.r b/test/unittest/funcs/strtok/tst.strtok2.r
new file mode 100644
index 00000000..c58ea336
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok2.r
@@ -0,0 +1,9 @@
+defghijklmnopqrstuvwxyz
+ABCDEFGHIJKL
+PQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKL
+MNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+got expected NULL
+got expected NULL
+foo
+got expected NULL
+
diff --git a/test/unittest/funcs/strtok/tst.strtok_long.d b/test/unittest/funcs/strtok/tst.strtok_long.d
new file mode 100644
index 00000000..1fbe415d
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok_long.d
@@ -0,0 +1,35 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+#pragma D option quiet
+
+BEGIN
+{
+ /* 256-char string ending in "XYZ" */
+ x = "_____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________XYZ";
+
+ /* check whether the last char of a long string is seen */
+ y = "a";
+ printf("%s\n", strtok(x, y));
+
+ /* check whether the last char of a long delimiter string is seen */
+ z = "zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA";
+ printf("%s\n", strtok(z, x));
+
+ /* check that strtok internal state can handle a large offset */
+ y = "Z";
+ printf("%s\n", strtok(x, y));
+ y = "a";
+ printf("%s\n", strtok(NULL, y));
+
+ exit(0);
+}
+
+ERROR
+{
+ exit(1);
+}
diff --git a/test/unittest/funcs/strtok/tst.strtok_long.r b/test/unittest/funcs/strtok/tst.strtok_long.r
new file mode 100644
index 00000000..a752554e
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok_long.r
@@ -0,0 +1,5 @@
+_____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________XYZ
+zyxwvutsrqponmlkjihgfedcba
+_____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________XY
+Z
+
diff --git a/test/unittest/funcs/tst.strtok_null.d b/test/unittest/funcs/strtok/tst.strtok_null.d
similarity index 79%
rename from test/unittest/funcs/tst.strtok_null.d
rename to test/unittest/funcs/strtok/tst.strtok_null.d
index 45301a78..1d536d60 100644
--- a/test/unittest/funcs/tst.strtok_null.d
+++ b/test/unittest/funcs/strtok/tst.strtok_null.d
@@ -1,10 +1,9 @@
/*
* Oracle Linux DTrace.
- * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
/*
* Test that a strtok(NULL, ...) without first calling strtok(string, ...)
diff --git a/test/unittest/funcs/strtok/tst.strtok_null.r b/test/unittest/funcs/strtok/tst.strtok_null.r
new file mode 100644
index 00000000..4002d6ef
--- /dev/null
+++ b/test/unittest/funcs/strtok/tst.strtok_null.r
@@ -0,0 +1,6 @@
+ FUNCTION:NAME
+ :ERROR
+
+-- @@stderr --
+dtrace: script 'test/unittest/funcs/strtok/tst.strtok_null.d' matched 2 probes
+dtrace: error on enabled probe ID 3 (ID 1: dtrace:::BEGIN): invalid address ({ptr}) in action #1
diff --git a/test/unittest/funcs/tst.strtok_null.r b/test/unittest/funcs/tst.strtok_null.r
deleted file mode 100644
index a1f82f91..00000000
--- a/test/unittest/funcs/tst.strtok_null.r
+++ /dev/null
@@ -1,6 +0,0 @@
- FUNCTION:NAME
- :ERROR
-
--- @@stderr --
-dtrace: script 'test/unittest/funcs/tst.strtok_null.d' matched 2 probes
-dtrace: error on enabled probe ID 1 (ID 1: dtrace:::BEGIN): invalid address ({ptr}) in action #1 at DIF offset 40
--
2.18.4
More information about the DTrace-devel
mailing list