[DTrace-devel] [PATCH 3/4] Add support for strchr() subroutine
eugene.loh at oracle.com
eugene.loh at oracle.com
Tue Sep 7 17:06:41 PDT 2021
From: Eugene Loh <eugene.loh at oracle.com>
This implementation tries to minimize branching and looping in BPF code.
It does so by using bpf_probe_read() to copy the string into scratch
memory. Then, the byte in question is xor'ed with every byte in scratch
memory. This means the matching byte will now be a NULL byte while all
other bytes are non-null. This means that bpf_probe_read_str() can now
give us the location of the byte in question.
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
bpf/Build | 1 +
bpf/strchr.S | 146 +++++++++++++++++++++++++++++++++++++
libdtrace/dt_cg.c | 60 ++++++++++++++-
libdtrace/dt_dlibs.c | 1 +
test/unittest/dif/strchr.d | 1 -
5 files changed, 207 insertions(+), 2 deletions(-)
create mode 100644 bpf/strchr.S
diff --git a/bpf/Build b/bpf/Build
index 34150b20..1aa9dd62 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -26,6 +26,7 @@ bpf_dlib_SOURCES = \
get_bvar.c \
get_tvar.c set_tvar.c \
probe_error.c \
+ strchr.S \
strcmp.S \
strjoin.S \
substr.S \
diff --git a/bpf/strchr.S b/bpf/strchr.S
new file mode 100644
index 00000000..8a9144a1
--- /dev/null
+++ b/bpf/strchr.S
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define DT_STRLEN_BYTES 2
+
+#define BPF_FUNC_probe_read 4
+#define BPF_FUNC_probe_read_str 45
+
+/*
+ * uint64_t dt_strchr(char *src, uint64_t c, char *dst, char *tmp) {
+ *
+ * // make a copy of the char in every byte of the register
+ * c &= 0xff;
+ * c |= (c << 8);
+ * c |= (c << 16);
+ * c |= (c << 32);
+ *
+ * // spill arguments to stack
+ * [%fp-8]=src
+ * [%fp-16]=c
+ * [%fp-24]=dst
+ * [%fp-32]=tmp
+ *
+ * // make temporary copy of string and get string length
+ * r6 = bpf_probe_read_str(dst, STRSZ, src + DT_STRLEN_BYTES);
+ * r6--;
+ *
+ * // xor the char with every byte; a match results in NULL byte
+ * r4 = roundup(r6, 8);
+ * for (r3 = 0; r3 < r4; r3 += 8)
+ * ((uint64_t *)dst)[r3] ^= c;
+ *
+ * // look for that NULL byte (and put a safeguard in place)
+ * dst[r6] = '\0';
+ * r8 = bpf_probe_read_str(tmp, r6 + 1, dst);
+ * r8--;
+ *
+ * // determine length of output string
+ * r6 -= r8;
+ * if (r6 <= 0) return -1;
+ * dt_strlen_store(r6, dst);
+ *
+ * // write output string
+ * r8 += DT_STRLEN_BYTES;
+ * bpf_probe_read(dst + DT_STRLEN_BYTES, r6, src + r8);
+ * r6 += DT_STRLEN_BYTES;
+ * dst[r6] = '\0';
+ *
+ * return 0;
+ * }
+ */
+ .text
+ .align 4
+ .global dt_strchr
+dt_strchr :
+ and %r2, 0xff /* c &= 0xff */
+ mov %r5, %r2
+ lsh %r5, 8
+ or %r2, %r5 /* c |= (c << 8) */
+ mov %r5, %r2
+ lsh %r5, 16
+ or %r2, %r5 /* c |= (c << 16) */
+ mov %r5, %r2
+ lsh %r5, 32
+ or %r2, %r5 /* c |= (c << 32) */
+
+ stxdw [%fp+-8], %r1 /* Spill src */
+ stxdw [%fp+-16], %r2 /* Spill c */
+ stxdw [%fp+-24], %r3 /* Spill dst */
+ stxdw [%fp+-32], %r4 /* Spill tmp */
+
+ ldxdw %r1, [%fp+-24]
+ lddw %r2, STRSZ
+ ldxdw %r3, [%fp+-8]
+ add %r3, DT_STRLEN_BYTES
+ call BPF_FUNC_probe_read_str /* r6 = bpf_probe_read_str(dst, STRSZ, src + DT_STRLEN_BYTES) */
+ mov %r6, %r0
+
+ jsle %r6, 0, .Lerror
+
+ sub %r6, 1 /* r6-- */
+
+ mov %r4, %r6 /* r4 = roundup(r6, 8) */
+ add %r4, 7
+ and %r4, -8
+
+ ldxdw %r1, [%fp+-16]
+ mov %r3, 0
+.Lloop: /* for (r3 = 0; r3 < r4; r3 += 8) */
+ ldxdw %r5, [%fp+-24]
+ add %r5, %r3
+ ldxdw %r0, [%r5+0]
+ xor %r0, %r1 /* ((uint64_t *)dst)[r3] ^= c; */
+ stxdw [%r5+0], %r0
+ add %r3, 8
+ jlt %r3, %r4, .Lloop
+
+ ldxdw %r2, [%fp+-24]
+ add %r2, %r6
+ mov %r0, 0
+ stxb [%r2+0], %r0 /* dst[r6] = '\0' */
+
+ ldxdw %r1, [%fp+-32]
+ mov %r2, %r6
+ add %r2, 1
+ ldxdw %r3, [%fp+-24]
+ call BPF_FUNC_probe_read_str /* r8 = bpf_probe_read_str(tmp, r6 + 1, dst) */
+ jsle %r0, 0, .Lerror
+ lsh %r0, 32
+ arsh %r0, 32
+ mov %r8, %r0
+
+ add %r8, -1 /* r8-- */
+
+ sub %r6, %r8 /* r6 -= r8 */
+
+ jsle %r6, 0, .Lerror /* if (r6 <= 0) return -1 */
+
+ mov %r1, %r6
+ ldxdw %r2, [%fp+-24]
+ call dt_strlen_store /* dt_strlen_store(r6, dst) */
+
+ add %r8, DT_STRLEN_BYTES /* r8 += DT_STRLEN_BYTES */
+
+ ldxdw %r1, [%fp+-24]
+ add %r1, DT_STRLEN_BYTES
+ mov %r2, %r6
+ ldxdw %r3, [%fp+-8]
+ add %r3, %r8
+ call BPF_FUNC_probe_read /* bpf_probe_read(dst + DT_STRLEN_BYTES, r6, src + r8) */
+
+ add %r6, DT_STRLEN_BYTES /* r6 += DT_STRLEN_BYTES */
+
+ ldxdw %r1, [%fp+-24]
+ add %r1, %r6 /* dst[r6] = '\0' */
+ mov %r2, 0
+ stxb [%r1+0], %r2
+
+ mov %r0, 0 /* return 0 */
+ exit
+
+.Lerror:
+ mov %r0, -1
+ exit
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 7132b260..bf88c862 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -3165,6 +3165,64 @@ dt_cg_subr_speculation(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
TRACE_REGSET(" subr-speculation:End ");
}
+static void
+dt_cg_subr_strchr(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
+{
+ dt_ident_t *idp;
+ dt_node_t *str = dnp->dn_args;
+ dt_node_t *chr = str->dn_list;
+ uint64_t off;
+ uint_t Lfound = dt_irlist_label(dlp);
+
+ TRACE_REGSET(" subr-strchr:Begin");
+ dt_cg_node(str, dlp, drp);
+ dt_cg_check_notnull(dlp, drp, str->dn_reg);
+ dt_cg_node(chr, dlp, drp);
+
+ if (dt_regset_xalloc_args(drp) == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
+ emit(dlp, BPF_MOV_REG(BPF_REG_1, str->dn_reg));
+ dt_regset_free(drp, str->dn_reg);
+ if (str->dn_tstring)
+ dt_cg_tstring_free(yypcb, str);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, chr->dn_reg));
+ dt_regset_free(drp, chr->dn_reg);
+
+ /*
+ * The result needs be be a temporary string, so we request one.
+ */
+ dnp->dn_reg = dt_regset_alloc(drp);
+ if (dnp->dn_reg == -1)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+ dt_cg_tstring_alloc(yypcb, dnp);
+
+ emit(dlp, BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
+ emit(dlp, BPF_MOV_REG(BPF_REG_3, dnp->dn_reg));
+
+ off = dt_cg_tstring_xalloc(yypcb);
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off));
+
+ dt_regset_xalloc(drp, BPF_REG_0);
+ idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_strchr");
+ assert(idp != NULL);
+ emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
+ dt_regset_free_args(drp);
+ dt_cg_tstring_xfree(yypcb, off);
+
+ emit (dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lfound));
+ emit (dlp, BPF_MOV_IMM(dnp->dn_reg, 0));
+ emitl(dlp, Lfound,
+ BPF_NOP());
+ dt_regset_free(drp, BPF_REG_0);
+
+ TRACE_REGSET(" subr-strchr:End ");
+}
+
static void
dt_cg_subr_strlen(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
{
@@ -3311,7 +3369,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
[DIF_SUBR_BASENAME] = NULL,
[DIF_SUBR_DIRNAME] = NULL,
[DIF_SUBR_CLEANPATH] = NULL,
- [DIF_SUBR_STRCHR] = NULL,
+ [DIF_SUBR_STRCHR] = dt_cg_subr_strchr,
[DIF_SUBR_STRRCHR] = NULL,
[DIF_SUBR_STRSTR] = NULL,
[DIF_SUBR_STRTOK] = NULL,
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index e65dc70b..c4e495e0 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -59,6 +59,7 @@ static const dt_ident_t dt_bpf_symbols[] = {
DT_BPF_SYMBOL(dt_get_string, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
+ DT_BPF_SYMBOL(dt_strchr, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_substr, DT_IDENT_SYMBOL),
diff --git a/test/unittest/dif/strchr.d b/test/unittest/dif/strchr.d
index 0a8ddbbf..d42425c6 100644
--- a/test/unittest/dif/strchr.d
+++ b/test/unittest/dif/strchr.d
@@ -1,4 +1,3 @@
-/* @@xfail: dtv2 */
BEGIN
{
trace(strchr(probename, 'B'));
--
2.18.4
More information about the DTrace-devel
mailing list