[DTrace-devel] [PATCH] Add support for index() subroutine

eugene.loh at oracle.com eugene.loh at oracle.com
Tue Sep 7 22:27:33 PDT 2021


From: Eugene Loh <eugene.loh at oracle.com>

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 bpf/Build                     |   1 +
 bpf/index.S                   | 232 ++++++++++++++++++++++++++++++++++
 libdtrace/dt_cg.c             |  63 ++++++++-
 libdtrace/dt_dlibs.c          |   1 +
 test/unittest/dif/index2arg.d |   1 -
 test/unittest/dif/index3arg.d |   1 -
 6 files changed, 296 insertions(+), 3 deletions(-)
 create mode 100644 bpf/index.S

diff --git a/bpf/Build b/bpf/Build
index 6e9ffa20..5f359293 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -25,6 +25,7 @@ bpf_dlib_SOURCES = \
 	agg_lqbin.c agg_qbin.c \
 	get_bvar.c \
 	get_tvar.c set_tvar.c \
+	index.S \
 	probe_error.c \
 	strchr.S \
 	strcmp.S \
diff --git a/bpf/index.S b/bpf/index.S
new file mode 100644
index 00000000..1c813d9f
--- /dev/null
+++ b/bpf/index.S
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This approach to implementing index(s, t) is rather different
+ * from what one would do in normal C programming, in which loops
+ * would exit as soon as possible to avoid unnecessary computation.
+ * The reason is that the BPF verifier tries to walk every possible
+ * instruction path and is easily overwhelmed by branching.  Since
+ * the BPF verifier inherently walks the worst-case scenario, the
+ * approach here is to implement the worst case with as little
+ * branching as possible.
+ */
+
+#define DT_STRLEN_BYTES		2
+
+#define BPF_FUNC_probe_read	4
+#define BPF_FUNC_probe_read_str	45
+
+	.text
+/*
+ * For two fixed-length buffers, return:
+ *     1 if the two buffers match in every bit
+ *     0 if the two buffers mismatch in any bit
+ * Minimize branching by:
+ *   - operating on 64 bits at a time
+ *   - replacing conditional branching with arithmetic operations
+ *       such as ^ | & etc.
+ *
+ * uint64_t dt_index_match(char *tmp1, char *tmp2, uint64_t len)
+ * {
+ *     r0 = 0;          // accumulate bits indicating mismatches
+ *     r6 = 0;          // loop counter
+ *     L1:
+ *         if (r6 >= len) goto L2;
+ *
+ *         r4 = *((uint64_t*)&tmp1[r6]);
+ *         r5 = *((uint64_t*)&tmp2[r6]);
+ *         r0 |= (r4 ^ r5);
+ *
+ *         r6 += 8;
+ *         goto L1;
+ *
+ *     L2:               //            value of r0
+ *
+ *                       //   perfect match    any mismatches
+ *                       //        == 0              != 0
+ *     r0 |= (r0 >> 32); //        == 0              != 0
+ *     r0 &= 0xffffffff; //        == 0              >  0
+ *     r0 -= 1;          //        <  0              >= 0
+ *     r0 >>= 63;        //        == 1              == 0
+ *
+ *     return r0;
+ * }
+ */
+	.align	4
+	.global	dt_index_match
+dt_index_match:
+	mov	%r0, 0
+	mov	%r6, 0
+
+.L1:
+	jge	%r6, %r3, .L2
+
+	mov	%r4, %r1
+	add	%r4, %r6
+	ldxdw	%r4, [%r4+0]
+	mov	%r5, %r2
+	add	%r5, %r6
+	ldxdw	%r5, [%r5+0]
+	xor	%r4, %r5
+	or	%r0, %r4
+
+	add	%r6, 8
+	ja	.L1
+
+.L2:
+	mov	%r4, %r0
+	rsh	%r4, 32
+	or	%r0, %r4
+	and	%r0, 0xffffffff
+	sub	%r0, 1
+	rsh	%r0, 63
+
+	exit
+/*
+ * int dt_index(const char *s, const char *t, int start, char *tmp1, char *tmp2)
+ * {
+ *     uint64_t r0, tlen;
+ *     uint64_t idx, nonesofar, buflen;
+ *
+ *     s += DT_STRLEN_BYTES;
+ *     t += DT_STRLEN_BYTES;
+ *     if (start < 0) start = 0;
+ *
+ *     buflen = STRSZ rounded up to multiple of 8;
+ *
+ *     tlen = bpf_probe_read_str(tmp2, buflen, t);
+ *     maxi = bpf_probe_read_str(tmp1, buflen, s);
+ *     maxi -= tlen;
+ *     tlen--;           // drop terminating NULL
+ *
+ *     nonesofar = 1;    // no matches so far
+ *     idx = 0;          // matching index (value "0" is ambiguous!)
+ *     Lloop:
+ *         if (start >= buflen) goto Ldone;
+ *
+ *         // Fill tmp1 with s, starting at the proposed index.
+ *         // Fill the end of tmp1 with contents from tmp2.
+ *         // That will suppress spurious mismatches.
+ *         bpf_probe_read_str(tmp1, buflen, s + start);
+ *         bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen);
+ *
+ *         r0 = dt_index_match(tmp1, tmp2);
+ *
+ *         // if (r0 == 1 && nonesofar) idx = start;
+ *         // if (r0 == 1) nonesofar = 0;
+ *         idx += r0 * nonesofar * start;
+ *         nonesofar *= (1 - r0);
+ *
+ *         start++;
+ *         goto Lloop;
+ *
+ *     Ldone:
+ *         // resolve the ambiguity:  if nonesofar, idx==0 means nothing found
+ *         idx -= nonesofar;
+ *         // idx > maxi means a spurious match
+ *         if (idx > maxi) idx = -1;
+ *         return idx;
+ * }
+ *
+ * Note that some variables are kept in registers:
+ *     r6=start    r7=tmp1    r8=tmp2    r9=tlen
+ * Some are spilled to the stack:
+ *     [%fp+-8]=s    [%fp+-16]=idx    [%fp+-24]=nonesofar    [%fp+-32]=buflen    [%fp+-40]=maxi
+ * And t is not needed once we have copied its contents to tmp2.
+ */
+	.align	4
+	.global	dt_index
+dt_index:
+	add	%r1, DT_STRLEN_BYTES		/* s += DT_STRLEN_BYTES */
+	add	%r2, DT_STRLEN_BYTES		/* t += DT_STRLEN_BYTES */
+	jsge	%r3, 0, 1
+	mov	%r3, 0				/* if (start < 0) start = 0 */
+
+	lddw	%r6, STRSZ
+	add	%r6, 7
+	mov	%r7, %r6
+	and	%r7, 7
+	sub	%r6, %r7
+	stxdw	[%fp+-32], %r6			/* buflen = round up to multiple of 8 */
+
+	stxdw	[%fp+-8], %r1
+	mov	%r6, %r3
+	mov	%r7, %r4
+	mov	%r8, %r5
+
+	mov	%r3, %r2
+	ldxdw	%r2, [%fp+-32]
+	mov	%r1, %r8
+	call	BPF_FUNC_probe_read_str		/* tlen = bpf_probe_read_str(tmp2, buflen, t) */
+	jsgt	%r0, 0, 1
+	exit
+	mov	%r9, %r0
+
+	mov	%r1, %r7
+	ldxdw	%r2, [%fp+-32]
+	ldxdw	%r3, [%fp+-8]
+	call	BPF_FUNC_probe_read_str		/* maxi = bpf_probe_read_str(tmp1, buflen, s) */
+
+	sub	%r0, %r9			/* maxi -= tlen */
+	stxdw	[%fp+-40], %r0
+
+	sub	%r9, 1				/* tlen--;     // drop terminating NULL */
+
+	mov	%r4, 1
+	stxdw	[%fp+-24], %r4			/* nonesofar = 1 */
+	mov	%r4, 0
+	stxdw	[%fp+-16], %r4			/* idx = 0 */
+
+.Lloop:
+	ldxdw	%r2, [%fp+-32]
+	jge	%r6, %r2, .Ldone		/* if (start >= buflen) goto Ldone */
+
+	mov	%r1, %r7
+	ldxdw	%r2, [%fp+-32]
+	ldxdw	%r3, [%fp+-8]
+	add	%r3, %r6
+	call	BPF_FUNC_probe_read_str		/* bpf_probe_read_str(tmp1, buflen, s + start) */
+
+	mov	%r1, %r7
+	add	%r1, %r9
+	ldxdw	%r2, [%fp+-32]
+	sub	%r2, %r9
+	mov	%r3, %r8
+	add	%r3, %r9
+	call	BPF_FUNC_probe_read		/* bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen) */
+
+	mov	%r1, %r7
+	mov	%r2, %r8
+	ldxdw	%r3, [%fp+-32]
+	call	dt_index_match			/* r0 = dt_index_match(tmp1, tmp2) */
+
+	ldxdw	%r1, [%fp+-24]
+	mul	%r1, %r6
+	mul	%r1, %r0
+	ldxdw	%r2, [%fp+-16]
+	add	%r2, %r1
+	stxdw	[%fp+-16], %r2			/* idx += r0 * nonesofar * start */
+
+	ldxdw	%r1, [%fp+-24]
+	mov	%r2, 1
+	sub	%r2, %r0
+	mul	%r1, %r2
+	stxdw	[%fp+-24], %r1			/* nonesofar *= (1 - r0) */
+
+	add	%r6, 1				/* start++ */
+	ja	.Lloop				/* goto Lloop */
+
+.Ldone:
+	ldxdw	%r0, [%fp+-16]
+
+	ldxdw	%r1, [%fp+-24]
+	sub	%r0, %r1			/* idx -= nonesofar */
+
+	ldxdw	%r1, [%fp+-40]
+	jsle	%r0, %r1, 1			/* if (idx > maxi) idx = -1 */
+	mov	%r0, -1
+
+	exit
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index d71ce6fd..7d2a26c6 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -3156,6 +3156,67 @@ dt_cg_array_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 	emit(dlp, BPF_ALU64_REG((dnp->dn_flags & DT_NF_SIGNED) ? BPF_ARSH : BPF_RSH, dnp->dn_reg, n));
 }
 
+static void
+dt_cg_subr_index(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
+{
+	dt_node_t	*s = dnp->dn_args;
+	dt_node_t	*t = s->dn_list;
+	dt_node_t	*start = t->dn_list;
+	dt_ident_t	*idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_index");
+	uint64_t	off1, off2;
+
+	assert(idp != NULL);
+
+	TRACE_REGSET("    subr-index:Begin");
+
+	dt_cg_node(s, dlp, drp);
+	dt_cg_check_notnull(dlp, drp, s->dn_reg);
+	dt_cg_node(t, dlp, drp);
+	dt_cg_check_notnull(dlp, drp, t->dn_reg);
+	if (start != NULL)
+		dt_cg_node(start, dlp, drp);
+
+	if (dt_regset_xalloc_args(drp) == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+	emit(dlp,  BPF_MOV_REG(BPF_REG_1, s->dn_reg));
+	dt_regset_free(drp, s->dn_reg);
+	if (s->dn_tstring)
+		dt_cg_tstring_free(yypcb, s);
+	emit(dlp,  BPF_MOV_REG(BPF_REG_2, t->dn_reg));
+	dt_regset_free(drp, t->dn_reg);
+	if (t->dn_tstring)
+		dt_cg_tstring_free(yypcb, t);
+	if (start) {
+		emit(dlp,  BPF_MOV_REG(BPF_REG_3, start->dn_reg));
+		dt_regset_free(drp, start->dn_reg);
+	} else
+		emit(dlp,  BPF_MOV_IMM(BPF_REG_3, 0));
+
+	off1 = dt_cg_tstring_xalloc(yypcb);
+	off2 = dt_cg_tstring_xalloc(yypcb);
+
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
+	emit(dlp,  BPF_MOV_REG(BPF_REG_5, BPF_REG_4));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off1));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, off2));
+
+	dt_regset_xalloc(drp, BPF_REG_0);
+	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
+	dt_regset_free_args(drp);
+
+	dt_cg_tstring_xfree(yypcb, off1);
+	dt_cg_tstring_xfree(yypcb, off2);
+
+	dnp->dn_reg = dt_regset_alloc(drp);
+	if (dnp->dn_reg == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+	emit(dlp,  BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
+	dt_regset_free(drp, BPF_REG_0);
+
+	TRACE_REGSET("    subr-index:End  ");
+}
+
 static void
 dt_cg_subr_speculation(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 {
@@ -3425,7 +3486,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
 	[DIF_SUBR_STRSTR]		= NULL,
 	[DIF_SUBR_STRTOK]		= NULL,
 	[DIF_SUBR_SUBSTR]		= &dt_cg_subr_substr,
-	[DIF_SUBR_INDEX]		= NULL,
+	[DIF_SUBR_INDEX]		= &dt_cg_subr_index,
 	[DIF_SUBR_RINDEX]		= NULL,
 	[DIF_SUBR_HTONS]		= NULL,
 	[DIF_SUBR_HTONL]		= NULL,
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index d9836f40..1fec0147 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -59,6 +59,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
 	DT_BPF_SYMBOL(dt_get_string, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
+	DT_BPF_SYMBOL(dt_index, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strchr, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
diff --git a/test/unittest/dif/index2arg.d b/test/unittest/dif/index2arg.d
index 7d80538f..7eb8f15b 100644
--- a/test/unittest/dif/index2arg.d
+++ b/test/unittest/dif/index2arg.d
@@ -1,4 +1,3 @@
-/* @@xfail: dtv2 */
 BEGIN
 {
 	exit(index("BEGINNING", "G") == 2 ? 0 : 1);
diff --git a/test/unittest/dif/index3arg.d b/test/unittest/dif/index3arg.d
index d8c3db4f..886ba93e 100644
--- a/test/unittest/dif/index3arg.d
+++ b/test/unittest/dif/index3arg.d
@@ -1,4 +1,3 @@
-/* @@xfail: dtv2 */
 BEGIN
 {
 	exit(index("BEGINNING", "G", 3) == 8 ? 0 : 1);
-- 
2.18.4




More information about the DTrace-devel mailing list