[DTrace-devel] [PATCH 3/4] Add support for strchr() subroutine

eugene.loh at oracle.com eugene.loh at oracle.com
Tue Sep 7 17:06:41 PDT 2021


From: Eugene Loh <eugene.loh at oracle.com>

This implementation tries to minimize branching and looping in BPF code.
It does so by using bpf_probe_read() to copy the string into scratch
memory.  Then, the byte in question is xor'ed with every byte in scratch
memory.  This means the matching byte will now be a NULL byte while all
other bytes are non-null.  This means that bpf_probe_read_str() can now
give us the location of the byte in question.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 bpf/Build                  |   1 +
 bpf/strchr.S               | 146 +++++++++++++++++++++++++++++++++++++
 libdtrace/dt_cg.c          |  60 ++++++++++++++-
 libdtrace/dt_dlibs.c       |   1 +
 test/unittest/dif/strchr.d |   1 -
 5 files changed, 207 insertions(+), 2 deletions(-)
 create mode 100644 bpf/strchr.S

diff --git a/bpf/Build b/bpf/Build
index 34150b20..1aa9dd62 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -26,6 +26,7 @@ bpf_dlib_SOURCES = \
 	get_bvar.c \
 	get_tvar.c set_tvar.c \
 	probe_error.c \
+	strchr.S \
 	strcmp.S \
 	strjoin.S \
 	substr.S \
diff --git a/bpf/strchr.S b/bpf/strchr.S
new file mode 100644
index 00000000..8a9144a1
--- /dev/null
+++ b/bpf/strchr.S
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define DT_STRLEN_BYTES		2
+
+#define BPF_FUNC_probe_read	4
+#define BPF_FUNC_probe_read_str	45
+
+/*
+ * uint64_t dt_strchr(char *src, uint64_t c, char *dst, char *tmp) {
+ *
+ *     // make a copy of the char in every byte of the register
+ *     c &= 0xff;
+ *     c |= (c << 8);
+ *     c |= (c << 16);
+ *     c |= (c << 32);
+ *
+ *     // spill arguments to stack
+ *     [%fp-8]=src
+ *     [%fp-16]=c
+ *     [%fp-24]=dst
+ *     [%fp-32]=tmp
+ *
+ *     // make temporary copy of string and get string length
+ *     r6 = bpf_probe_read_str(dst, STRSZ, src + DT_STRLEN_BYTES);
+ *     r6--;
+ *
+ *     // xor the char with every byte;  a match results in NULL byte
+ *     r4 = roundup(r6, 8);
+ *     for (r3 = 0; r3 < r4; r3 += 8)
+ *         ((uint64_t *)dst)[r3] ^= c;
+ *
+ *     // look for that NULL byte (and put a safeguard in place)
+ *     dst[r6] = '\0';
+ *     r8 = bpf_probe_read_str(tmp, r6 + 1, dst);
+ *     r8--;
+ *
+ *     // determine length of output string
+ *     r6 -= r8;
+ *     if (r6 <= 0) return -1;
+ *     dt_strlen_store(r6, dst);
+ *
+ *     // write output string
+ *     r8 += DT_STRLEN_BYTES;
+ *     bpf_probe_read(dst + DT_STRLEN_BYTES, r6, src + r8);
+ *     r6 += DT_STRLEN_BYTES;
+ *     dst[r6] = '\0';
+ *
+ *     return 0;
+ * }
+ */
+	.text
+	.align	4
+	.global	dt_strchr
+dt_strchr :
+	and	%r2, 0xff		/* c &= 0xff */
+	mov	%r5, %r2
+	lsh	%r5, 8
+	or	%r2, %r5		/* c |= (c << 8) */
+	mov	%r5, %r2
+	lsh	%r5, 16
+	or	%r2, %r5		/* c |= (c << 16) */
+	mov	%r5, %r2
+	lsh	%r5, 32
+	or	%r2, %r5		/* c |= (c << 32) */
+
+	stxdw	[%fp+-8], %r1		/* Spill src */
+	stxdw	[%fp+-16], %r2		/* Spill c */
+	stxdw	[%fp+-24], %r3		/* Spill dst */
+	stxdw	[%fp+-32], %r4		/* Spill tmp */
+
+	ldxdw	%r1, [%fp+-24]
+	lddw	%r2, STRSZ
+	ldxdw	%r3, [%fp+-8]
+	add	%r3, DT_STRLEN_BYTES
+	call	BPF_FUNC_probe_read_str	/* r6 = bpf_probe_read_str(dst, STRSZ, src + DT_STRLEN_BYTES) */
+	mov	%r6, %r0
+
+	jsle	%r6, 0, .Lerror
+
+	sub	%r6, 1			/* r6-- */
+
+	mov	%r4, %r6		/* r4 = roundup(r6, 8) */
+	add	%r4, 7
+	and	%r4, -8
+
+	ldxdw	%r1, [%fp+-16]
+	mov	%r3, 0
+.Lloop:					/* for (r3 = 0; r3 < r4; r3 += 8) */
+	ldxdw	%r5, [%fp+-24]
+	add	%r5, %r3
+	ldxdw	%r0, [%r5+0]
+	xor	%r0, %r1		/* 	((uint64_t *)dst)[r3] ^= c; */
+	stxdw	[%r5+0], %r0
+	add	%r3, 8
+	jlt	%r3, %r4, .Lloop
+
+	ldxdw	%r2, [%fp+-24]
+	add	%r2, %r6
+	mov	%r0, 0
+	stxb	[%r2+0], %r0		/* dst[r6] = '\0' */
+
+	ldxdw	%r1, [%fp+-32]
+	mov	%r2, %r6
+	add	%r2, 1
+	ldxdw	%r3, [%fp+-24]
+	call	BPF_FUNC_probe_read_str	/* r8 = bpf_probe_read_str(tmp, r6 + 1, dst) */
+	jsle	%r0, 0, .Lerror
+	lsh	%r0, 32
+	arsh	%r0, 32
+	mov	%r8, %r0
+
+	add	%r8, -1			/* r8-- */
+
+	sub	%r6, %r8		/* r6 -= r8 */
+
+	jsle	%r6, 0, .Lerror		/* if (r6 <= 0) return -1 */
+
+	mov	%r1, %r6
+	ldxdw	%r2, [%fp+-24]
+	call	dt_strlen_store		/* dt_strlen_store(r6, dst) */
+
+	add	%r8, DT_STRLEN_BYTES	/* r8 += DT_STRLEN_BYTES */
+
+	ldxdw	%r1, [%fp+-24]
+	add	%r1, DT_STRLEN_BYTES
+	mov	%r2, %r6
+	ldxdw	%r3, [%fp+-8]
+	add	%r3, %r8
+	call	BPF_FUNC_probe_read	/* bpf_probe_read(dst + DT_STRLEN_BYTES, r6, src + r8) */
+
+	add	%r6, DT_STRLEN_BYTES	/* r6 += DT_STRLEN_BYTES */
+
+	ldxdw	%r1, [%fp+-24]
+	add	%r1, %r6		/* dst[r6] = '\0' */
+	mov	%r2, 0
+	stxb	[%r1+0], %r2
+
+	mov	%r0, 0			/* return 0 */
+	exit
+
+.Lerror:
+	mov	%r0, -1
+	exit
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 7132b260..bf88c862 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -3165,6 +3165,64 @@ dt_cg_subr_speculation(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 	TRACE_REGSET("    subr-speculation:End  ");
 }
 
+static void
+dt_cg_subr_strchr(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
+{
+	dt_ident_t	*idp;
+	dt_node_t	*str = dnp->dn_args;
+	dt_node_t	*chr = str->dn_list;
+	uint64_t	off;
+	uint_t		Lfound = dt_irlist_label(dlp);
+
+	TRACE_REGSET("    subr-strchr:Begin");
+	dt_cg_node(str, dlp, drp);
+	dt_cg_check_notnull(dlp, drp, str->dn_reg);
+	dt_cg_node(chr, dlp, drp);
+
+	if (dt_regset_xalloc_args(drp) == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
+	emit(dlp, BPF_MOV_REG(BPF_REG_1, str->dn_reg));
+	dt_regset_free(drp, str->dn_reg);
+	if (str->dn_tstring)
+		dt_cg_tstring_free(yypcb, str);
+	emit(dlp, BPF_MOV_REG(BPF_REG_2, chr->dn_reg));
+	dt_regset_free(drp, chr->dn_reg);
+
+	/*
+	 * The result needs be be a temporary string, so we request one.
+	 */
+	dnp->dn_reg = dt_regset_alloc(drp);
+	if (dnp->dn_reg == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+	dt_cg_tstring_alloc(yypcb, dnp);
+
+	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
+	emit(dlp,  BPF_MOV_REG(BPF_REG_3, dnp->dn_reg));
+
+	off = dt_cg_tstring_xalloc(yypcb);
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off));
+
+	dt_regset_xalloc(drp, BPF_REG_0);
+	idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_strchr");
+	assert(idp != NULL);
+	emite(dlp,  BPF_CALL_FUNC(idp->di_id), idp);
+	dt_regset_free_args(drp);
+	dt_cg_tstring_xfree(yypcb, off);
+
+	emit (dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lfound));
+	emit (dlp, BPF_MOV_IMM(dnp->dn_reg, 0));
+	emitl(dlp, Lfound,
+		   BPF_NOP());
+	dt_regset_free(drp, BPF_REG_0);
+
+	TRACE_REGSET("    subr-strchr:End  ");
+}
+
 static void
 dt_cg_subr_strlen(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 {
@@ -3311,7 +3369,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
 	[DIF_SUBR_BASENAME]		= NULL,
 	[DIF_SUBR_DIRNAME]		= NULL,
 	[DIF_SUBR_CLEANPATH]		= NULL,
-	[DIF_SUBR_STRCHR]		= NULL,
+	[DIF_SUBR_STRCHR]		= dt_cg_subr_strchr,
 	[DIF_SUBR_STRRCHR]		= NULL,
 	[DIF_SUBR_STRSTR]		= NULL,
 	[DIF_SUBR_STRTOK]		= NULL,
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index e65dc70b..c4e495e0 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -59,6 +59,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
 	DT_BPF_SYMBOL(dt_get_string, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
+	DT_BPF_SYMBOL(dt_strchr, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_substr, DT_IDENT_SYMBOL),
diff --git a/test/unittest/dif/strchr.d b/test/unittest/dif/strchr.d
index 0a8ddbbf..d42425c6 100644
--- a/test/unittest/dif/strchr.d
+++ b/test/unittest/dif/strchr.d
@@ -1,4 +1,3 @@
-/* @@xfail: dtv2 */
 BEGIN
 {
 	trace(strchr(probename, 'B'));
-- 
2.18.4




More information about the DTrace-devel mailing list