[DTrace-devel] [PATCH] Add support for index() subroutine
eugene.loh at oracle.com
eugene.loh at oracle.com
Tue Sep 7 22:27:33 PDT 2021
From: Eugene Loh <eugene.loh at oracle.com>
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
bpf/Build | 1 +
bpf/index.S | 232 ++++++++++++++++++++++++++++++++++
libdtrace/dt_cg.c | 63 ++++++++-
libdtrace/dt_dlibs.c | 1 +
test/unittest/dif/index2arg.d | 1 -
test/unittest/dif/index3arg.d | 1 -
6 files changed, 296 insertions(+), 3 deletions(-)
create mode 100644 bpf/index.S
diff --git a/bpf/Build b/bpf/Build
index 6e9ffa20..5f359293 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -25,6 +25,7 @@ bpf_dlib_SOURCES = \
agg_lqbin.c agg_qbin.c \
get_bvar.c \
get_tvar.c set_tvar.c \
+ index.S \
probe_error.c \
strchr.S \
strcmp.S \
diff --git a/bpf/index.S b/bpf/index.S
new file mode 100644
index 00000000..1c813d9f
--- /dev/null
+++ b/bpf/index.S
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This approach to implementing index(s, t) is rather different
+ * from what one would do in normal C programming, in which loops
+ * would exit as soon as possible to avoid unnecessary computation.
+ * The reason is that the BPF verifier tries to walk every possible
+ * instruction path and is easily overwhelmed by branching. Since
+ * the BPF verifier inherently walks the worst-case scenario, the
+ * approach here is to implement the worst case with as little
+ * branching as possible.
+ */
+
+#define DT_STRLEN_BYTES 2
+
+#define BPF_FUNC_probe_read 4
+#define BPF_FUNC_probe_read_str 45
+
+ .text
+/*
+ * For two fixed-length buffers, return:
+ * 1 if the two buffers match in every bit
+ * 0 if the two buffers mismatch in any bit
+ * Minimize branching by:
+ * - operating on 64 bits at a time
+ * - replacing conditional branching with arithmetic operations
+ * such as ^ | & etc.
+ *
+ * uint64_t dt_index_match(char *tmp1, char *tmp2, uint64_t len)
+ * {
+ * r0 = 0; // accumulate bits indicating mismatches
+ * r6 = 0; // loop counter
+ * L1:
+ * if (r6 >= len) goto L2;
+ *
+ * r4 = *((uint64_t*)&tmp1[r6]);
+ * r5 = *((uint64_t*)&tmp2[r6]);
+ * r0 |= (r4 ^ r5);
+ *
+ * r6 += 8;
+ * goto L1;
+ *
+ * L2: // value of r0
+ *
+ * // perfect match any mismatches
+ * // == 0 != 0
+ * r0 |= (r0 >> 32); // == 0 != 0
+ * r0 &= 0xffffffff; // == 0 > 0
+ * r0 -= 1; // < 0 >= 0
+ * r0 >>= 63; // == 1 == 0
+ *
+ * return r0;
+ * }
+ */
+ .align 4
+ .global dt_index_match
+dt_index_match:
+ mov %r0, 0
+ mov %r6, 0
+
+.L1:
+ jge %r6, %r3, .L2
+
+ mov %r4, %r1
+ add %r4, %r6
+ ldxdw %r4, [%r4+0]
+ mov %r5, %r2
+ add %r5, %r6
+ ldxdw %r5, [%r5+0]
+ xor %r4, %r5
+ or %r0, %r4
+
+ add %r6, 8
+ ja .L1
+
+.L2:
+ mov %r4, %r0
+ rsh %r4, 32
+ or %r0, %r4
+ and %r0, 0xffffffff
+ sub %r0, 1
+ rsh %r0, 63
+
+ exit
+/*
+ * int dt_index(const char *s, const char *t, int start, char *tmp1, char *tmp2)
+ * {
+ * uint64_t r0, tlen;
+ * uint64_t idx, nonesofar, buflen;
+ *
+ * s += DT_STRLEN_BYTES;
+ * t += DT_STRLEN_BYTES;
+ * if (start < 0) start = 0;
+ *
+ * buflen = STRSZ rounded up to multiple of 8;
+ *
+ * tlen = bpf_probe_read_str(tmp2, buflen, t);
+ * maxi = bpf_probe_read_str(tmp1, buflen, s);
+ * maxi -= tlen;
+ * tlen--; // drop terminating NULL
+ *
+ * nonesofar = 1; // no matches so far
+ * idx = 0; // matching index (value "0" is ambiguous!)
+ * Lloop:
+ * if (start >= buflen) goto Ldone;
+ *
+ * // Fill tmp1 with s, starting at the proposed index.
+ * // Fill the end of tmp1 with contents from tmp2.
+ * // That will suppress spurious mismatches.
+ * bpf_probe_read_str(tmp1, buflen, s + start);
+ * bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen);
+ *
+ * r0 = dt_index_match(tmp1, tmp2);
+ *
+ * // if (r0 == 1 && nonesofar) idx = start;
+ * // if (r0 == 1) nonesofar = 0;
+ * idx += r0 * nonesofar * start;
+ * nonesofar *= (1 - r0);
+ *
+ * start++;
+ * goto Lloop;
+ *
+ * Ldone:
+ * // resolve the ambiguity: if nonesofar, idx==0 means nothing found
+ * idx -= nonesofar;
+ * // idx > maxi means a spurious match
+ * if (idx > maxi) idx = -1;
+ * return idx;
+ * }
+ *
+ * Note that some variables are kept in registers:
+ * r6=start r7=tmp1 r8=tmp2 r9=tlen
+ * Some are spilled to the stack:
+ * [%fp+-8]=s [%fp+-16]=idx [%fp+-24]=nonesofar [%fp+-32]=buflen [%fp+-40]=maxi
+ * And t is not needed once we have copied its contents to tmp2.
+ */
+ .align 4
+ .global dt_index
+dt_index:
+ add %r1, DT_STRLEN_BYTES /* s += DT_STRLEN_BYTES */
+ add %r2, DT_STRLEN_BYTES /* t += DT_STRLEN_BYTES */
+ jsge %r3, 0, 1
+ mov %r3, 0 /* if (start < 0) start = 0 */
+
+ lddw %r6, STRSZ
+ add %r6, 7
+ mov %r7, %r6
+ and %r7, 7
+ sub %r6, %r7
+ stxdw [%fp+-32], %r6 /* buflen = round up to multiple of 8 */
+
+ stxdw [%fp+-8], %r1
+ mov %r6, %r3
+ mov %r7, %r4
+ mov %r8, %r5
+
+ mov %r3, %r2
+ ldxdw %r2, [%fp+-32]
+ mov %r1, %r8
+ call BPF_FUNC_probe_read_str /* tlen = bpf_probe_read_str(tmp2, buflen, t) */
+ jsgt %r0, 0, 1
+ exit
+ mov %r9, %r0
+
+ mov %r1, %r7
+ ldxdw %r2, [%fp+-32]
+ ldxdw %r3, [%fp+-8]
+ call BPF_FUNC_probe_read_str /* maxi = bpf_probe_read_str(tmp1, buflen, s) */
+
+ sub %r0, %r9 /* maxi -= tlen */
+ stxdw [%fp+-40], %r0
+
+ sub %r9, 1 /* tlen--; // drop terminating NULL */
+
+ mov %r4, 1
+ stxdw [%fp+-24], %r4 /* nonesofar = 1 */
+ mov %r4, 0
+ stxdw [%fp+-16], %r4 /* idx = 0 */
+
+.Lloop:
+ ldxdw %r2, [%fp+-32]
+ jge %r6, %r2, .Ldone /* if (start >= buflen) goto Ldone */
+
+ mov %r1, %r7
+ ldxdw %r2, [%fp+-32]
+ ldxdw %r3, [%fp+-8]
+ add %r3, %r6
+ call BPF_FUNC_probe_read_str /* bpf_probe_read_str(tmp1, buflen, s + start) */
+
+ mov %r1, %r7
+ add %r1, %r9
+ ldxdw %r2, [%fp+-32]
+ sub %r2, %r9
+ mov %r3, %r8
+ add %r3, %r9
+ call BPF_FUNC_probe_read /* bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen) */
+
+ mov %r1, %r7
+ mov %r2, %r8
+ ldxdw %r3, [%fp+-32]
+ call dt_index_match /* r0 = dt_index_match(tmp1, tmp2) */
+
+ ldxdw %r1, [%fp+-24]
+ mul %r1, %r6
+ mul %r1, %r0
+ ldxdw %r2, [%fp+-16]
+ add %r2, %r1
+ stxdw [%fp+-16], %r2 /* idx += r0 * nonesofar * start */
+
+ ldxdw %r1, [%fp+-24]
+ mov %r2, 1
+ sub %r2, %r0
+ mul %r1, %r2
+ stxdw [%fp+-24], %r1 /* nonesofar *= (1 - r0) */
+
+ add %r6, 1 /* start++ */
+ ja .Lloop /* goto Lloop */
+
+.Ldone:
+ ldxdw %r0, [%fp+-16]
+
+ ldxdw %r1, [%fp+-24]
+ sub %r0, %r1 /* idx -= nonesofar */
+
+ ldxdw %r1, [%fp+-40]
+ jsle %r0, %r1, 1 /* if (idx > maxi) idx = -1 */
+ mov %r0, -1
+
+ exit
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index d71ce6fd..7d2a26c6 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -3156,6 +3156,67 @@ dt_cg_array_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
emit(dlp, BPF_ALU64_REG((dnp->dn_flags & DT_NF_SIGNED) ? BPF_ARSH : BPF_RSH, dnp->dn_reg, n));
}
+static void
+dt_cg_subr_index(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
+{
+ dt_node_t *s = dnp->dn_args;
+ dt_node_t *t = s->dn_list;
+ dt_node_t *start = t->dn_list;
+ dt_ident_t *idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_index");
+ uint64_t off1, off2;
+
+ assert(idp != NULL);
+
+ TRACE_REGSET(" subr-index:Begin");
+
+ dt_cg_node(s, dlp, drp);
+ dt_cg_check_notnull(dlp, drp, s->dn_reg);
+ dt_cg_node(t, dlp, drp);
+ dt_cg_check_notnull(dlp, drp, t->dn_reg);
+ if (start != NULL)
+ dt_cg_node(start, dlp, drp);
+
+ if (dt_regset_xalloc_args(drp) == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+ emit(dlp, BPF_MOV_REG(BPF_REG_1, s->dn_reg));
+ dt_regset_free(drp, s->dn_reg);
+ if (s->dn_tstring)
+ dt_cg_tstring_free(yypcb, s);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, t->dn_reg));
+ dt_regset_free(drp, t->dn_reg);
+ if (t->dn_tstring)
+ dt_cg_tstring_free(yypcb, t);
+ if (start) {
+ emit(dlp, BPF_MOV_REG(BPF_REG_3, start->dn_reg));
+ dt_regset_free(drp, start->dn_reg);
+ } else
+ emit(dlp, BPF_MOV_IMM(BPF_REG_3, 0));
+
+ off1 = dt_cg_tstring_xalloc(yypcb);
+ off2 = dt_cg_tstring_xalloc(yypcb);
+
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
+ emit(dlp, BPF_MOV_REG(BPF_REG_5, BPF_REG_4));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off1));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, off2));
+
+ dt_regset_xalloc(drp, BPF_REG_0);
+ emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
+ dt_regset_free_args(drp);
+
+ dt_cg_tstring_xfree(yypcb, off1);
+ dt_cg_tstring_xfree(yypcb, off2);
+
+ dnp->dn_reg = dt_regset_alloc(drp);
+ if (dnp->dn_reg == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+ emit(dlp, BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
+ dt_regset_free(drp, BPF_REG_0);
+
+ TRACE_REGSET(" subr-index:End ");
+}
+
static void
dt_cg_subr_speculation(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
{
@@ -3425,7 +3486,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
[DIF_SUBR_STRSTR] = NULL,
[DIF_SUBR_STRTOK] = NULL,
[DIF_SUBR_SUBSTR] = &dt_cg_subr_substr,
- [DIF_SUBR_INDEX] = NULL,
+ [DIF_SUBR_INDEX] = &dt_cg_subr_index,
[DIF_SUBR_RINDEX] = NULL,
[DIF_SUBR_HTONS] = NULL,
[DIF_SUBR_HTONL] = NULL,
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index d9836f40..1fec0147 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -59,6 +59,7 @@ static const dt_ident_t dt_bpf_symbols[] = {
DT_BPF_SYMBOL(dt_get_string, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
+ DT_BPF_SYMBOL(dt_index, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strchr, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
diff --git a/test/unittest/dif/index2arg.d b/test/unittest/dif/index2arg.d
index 7d80538f..7eb8f15b 100644
--- a/test/unittest/dif/index2arg.d
+++ b/test/unittest/dif/index2arg.d
@@ -1,4 +1,3 @@
-/* @@xfail: dtv2 */
BEGIN
{
exit(index("BEGINNING", "G") == 2 ? 0 : 1);
diff --git a/test/unittest/dif/index3arg.d b/test/unittest/dif/index3arg.d
index d8c3db4f..886ba93e 100644
--- a/test/unittest/dif/index3arg.d
+++ b/test/unittest/dif/index3arg.d
@@ -1,4 +1,3 @@
-/* @@xfail: dtv2 */
BEGIN
{
exit(index("BEGINNING", "G", 3) == 8 ? 0 : 1);
--
2.18.4
More information about the DTrace-devel
mailing list