[DTrace-devel] [PATCH 3/5] Work in Progress: Add support for strchr()
eugene.loh at oracle.com
eugene.loh at oracle.com
Wed Aug 25 19:21:36 PDT 2021
From: Eugene Loh <eugene.loh at oracle.com>
This implementation tries to minimize branching and looping in BPF code.
It does so by using bpf_probe_read() to copy the string into scratch
memory. Then, the byte in question is xor'ed with every byte in scratch
memory. This means the matching byte will now be a NULL byte while all
other bytes are non-null. This means that bpf_probe_read_str() can now
give us the location of the byte in question.
The dt_cg.c changes are such that they will apply to a branch that is
still under development. The management of scratch space and some
hardwired "256" values are likewise to be reworked once some other
development has been resolved.
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
bpf/Build | 1 +
bpf/strchr.S | 148 +++++++++++++++++++++++++++++++++++++
libdtrace/dt_cg.c | 63 +++++-----------
libdtrace/dt_dlibs.c | 1 +
test/unittest/dif/strchr.d | 1 -
5 files changed, 170 insertions(+), 44 deletions(-)
create mode 100644 bpf/strchr.S
diff --git a/bpf/Build b/bpf/Build
index 287e6f6c..e7ecf9e5 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -27,6 +27,7 @@ bpf_dlib_SOURCES = \
get_tvar.c set_tvar.c \
probe_error.c \
strcmp.S \
+ strchr.S \
strnlen.c \
substr.S \
varint.c
diff --git a/bpf/strchr.S b/bpf/strchr.S
new file mode 100644
index 00000000..3c0d734d
--- /dev/null
+++ b/bpf/strchr.S
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define BPF_FUNC_probe_read 4
+#define BPF_FUNC_probe_read_str 45
+
+/*
+ * char *dt_strchr(char *s, uint64_t c, char *t) {
+ *
+ * c &= 0xff;
+ * c |= (c << 8);
+ * c |= (c << 16);
+ * c |= (c << 32);
+ *
+ * [%fp-8]=s
+ * [%fp-16]=c
+ * [%fp-24]=t
+ *
+ * r6 = dt_vint2int(s);
+ * r7 = dt_vint_size(r6);
+ *
+ * bpf_probe_read(t, r6, s + r7);
+ * for (r3 = 0; r3 < roundup(r6, 8); r3 += 8)
+ * ((uint64_t *)t)[r3] ^= c;
+ * t[r6] = '\0';
+ *
+ * r8 = bpf_probe_read_str(t + 256, r6 + 1, t);
+ * r8 -= 1;
+ * if (r8 >= r6) return NULL;
+ * r6 -= r8;
+ * r9 = dt_int2vint(r6, t);
+ * r7 += r8;
+ * bpf_probe_read(t + r9, r6, s + r7);
+ *
+ * r6 += r9;
+ * t[r6] = '\0';
+ *
+ * return t;
+ * }
+ */
+ .text
+ .align 4
+ .global dt_strchr
+dt_strchr :
+ and %r2, 0xff /* c &= 0xff */
+ mov %r4, %r2
+ lsh %r4, 8
+ or %r2, %r4 /* c |= (c << 8) */
+ mov %r4, %r2
+ lsh %r4, 16
+ or %r2, %r4 /* c |= (c << 16) */
+ mov %r4, %r2
+ lsh %r4, 32
+ or %r2, %r4 /* c |= (c << 32) */
+
+ stxdw [%fp+-8], %r1 /* Spill s */
+ stxdw [%fp+-16], %r2 /* Spill c */
+ stxdw [%fp+-24], %r3 /* Spill t */
+
+ call dt_vint2int /* r6 = dt_vint2int(s) */
+
+ and %r0, 0xfffffff /* bound r6 */
+ lddw %r1, STRSZ
+ jle %r0, %r1, 1
+ mov %r0, %r1
+ mov %r6, %r0
+
+ mov %r1, %r6
+ call dt_vint_size /* r7 = dt_vint_size(r6) */
+ mov %r7, %r0
+
+ ldxdw %r1, [%fp+-24]
+ mov %r2, %r6
+ ldxdw %r3, [%fp+-8]
+ add %r3, %r7
+ call BPF_FUNC_probe_read /* bpf_probe_read(t, r6, s + r7) */
+
+ ldxdw %r2, [%fp+-24]
+ ldxdw %r1, [%fp+-16]
+ mov %r3, 0
+ mov %r4, %r6 /* r4 = roundup(r6, 8) */
+ add %r4, 7
+ rsh %r4, 3
+ lsh %r4, 3
+ lddw %r5, STRSZ
+ jle %r4, %r5, 1
+ mov %r4, %r5
+.Lloop: /* for (r3 = 0; r3 < r4; r3 += 8) */
+ mov %r5, %r2
+ add %r5, %r3
+ ldxdw %r0, [%r5+0]
+ xor %r0, %r1 /* ((uint64_t *)t)[r3] ^= c; */
+ stxdw [%r5+0], %r0
+ add %r3, 8
+ jlt %r3, %r4, .Lloop
+
+ add %r2, %r6
+ mov %r0, 0
+ stxb [%r2+0], %r0 /* t[r6] = '\0' */
+
+ ldxdw %r1, [%fp+-24]
+ add %r1, 256
+ mov %r2, %r6
+ add %r2, 1
+ ldxdw %r3, [%fp+-24]
+ call BPF_FUNC_probe_read_str /* r8 = bpf_probe_read_str(t + 256, r6 + 1, t) */
+ lsh %r0, 32
+ arsh %r0, 32
+ add %r0, -1 /* r8 -= 1 */
+ mov %r8, %r0
+
+ jgt %r6, %r8, .Lfound /* if (r8 >= r6) return NULL */
+ mov %r0, 0
+ exit
+.Lfound:
+
+ sub %r6, %r8 /* r6 -= r8 */
+ jge %r6, 0, 1
+ mov %r6, 0
+ and %r6, 0xffff
+ lddw %r1, STRSZ
+ jle %r6, %r1, 1
+ mov %r6, %r1
+
+ mov %r1, %r6
+ ldxdw %r2, [%fp+-24]
+ call dt_int2vint /* r9 = dt_int2vint(r6, t) */
+ mov %r9, %r0
+
+ add %r7, %r8 /* r7 += r8 */
+
+ ldxdw %r1, [%fp+-24]
+ add %r1, %r9
+ mov %r2, %r6
+ ldxdw %r3, [%fp+-8]
+ add %r3, %r7
+ call BPF_FUNC_probe_read /* bpf_probe_read(t + r9, r6, s + r7) */
+
+ add %r6, %r9 /* r6 += r9 */
+ ldxdw %r1, [%fp+-24]
+ add %r1, %r6 /* t[r6] = '\0' */
+ mov %r2, 0
+ stxb [%r1+0], %r2
+
+ ldxdw %r0, [%fp+-24] /* return t */
+ exit
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index a4416ffb..3227c0a3 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -3089,60 +3089,37 @@ dt_cg_subr_bcopy(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
static void
dt_cg_subr_strchr(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
{
+ dt_ident_t *idp;
dt_node_t *str = dnp->dn_args;
dt_node_t *chr = str->dn_list;
- dt_ident_t *idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_memcpy");
- int reg = dt_regset_alloc(drp);
- uint_t loop = dt_irlist_label(dlp);
- uint_t out = dt_irlist_label(dlp);
- size_t maxsize = dt_node_type_size(str);
-
- assert(idp != NULL);
TRACE_REGSET(" subr-strchr:Begin");
dt_cg_node(str, dlp, drp);
+ dt_cg_check_notnull(dlp, drp, str->dn_reg);
dt_cg_node(chr, dlp, drp);
- /*
- * The str value must be not NULL.
- */
- emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, str->dn_reg, 0, yypcb->pcb_exitlbl));
+ if (dt_regset_xalloc_args(drp) == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
- /*
- * Get the length of the string.
- */
- dt_cg_strlen(dlp, drp, reg, str->dn_reg);
+ emit(dlp, BPF_MOV_REG(BPF_REG_1, str->dn_reg));
+ dt_regset_free(drp, str->dn_reg);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, chr->dn_reg));
+ dt_regset_free(drp, chr->dn_reg);
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_3, DCTX_BUF));
+ emit(dlp, BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 512 + 8));
- /*
- * Loop over the string, looking for the first occurence of 'chr'. We
- * limit the iterations to the maximum strimg size.
- */
dt_regset_xalloc(drp, BPF_REG_0);
- emit(dlp, BPF_MOV_IMM(BPF_REG_0, 0));
-#if 0
-emit(dlp, BPF_MOV_IMM(reg, 10));
-#endif
- emit(dlp, BPF_BRANCH_IMM(BPF_JLT, reg, maxsize, loop));
-#if 0
- emit(dlp, BPF_MOV_IMM(reg, 10));
-#else
- emit(dlp, BPF_MOV_IMM(reg, maxsize));
-#endif
- emitl(dlp, loop,
- BPF_BRANCH_REG(BPF_JGE, BPF_REG_0, reg, out));
- emit(dlp, BPF_LOAD(BPF_B, BPF_REG_1, str->dn_reg, BPF_REG_0));
- emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1));
- emit(dlp, BPF_BRANCH_REG(BPF_JNE, BPF_REG_1, chr->dn_reg, loop));
- emitl(dlp, out,
- BPF_NOP());
+ idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_strchr");
+ assert(idp != NULL);
+ emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
+ dt_regset_free_args(drp);
+ dnp->dn_reg = dt_regset_alloc(drp);
+ if (dnp->dn_reg == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+ emit(dlp, BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
dt_regset_free(drp, BPF_REG_0);
- dt_regset_free(drp, reg);
- dt_regset_free(drp, chr->dn_reg);
-#if 0
- dt_regset_free(drp, str->dn_reg);
-#else
- dnp->dn_reg = str->dn_reg;
-#endif
+
TRACE_REGSET(" subr-strchr:End ");
}
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index 547fd10f..459f2cc8 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -60,6 +60,7 @@ static const dt_ident_t dt_bpf_symbols[] = {
DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
+ DT_BPF_SYMBOL(dt_strchr, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strnlen, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_substr, DT_IDENT_SYMBOL),
/* BPF maps */
diff --git a/test/unittest/dif/strchr.d b/test/unittest/dif/strchr.d
index 0a8ddbbf..d42425c6 100644
--- a/test/unittest/dif/strchr.d
+++ b/test/unittest/dif/strchr.d
@@ -1,4 +1,3 @@
-/* @@xfail: dtv2 */
BEGIN
{
trace(strchr(probename, 'B'));
--
2.18.4
More information about the DTrace-devel
mailing list