[DTrace-devel] [PATCH 6/8] Add support for rindex() subroutine

eugene.loh at oracle.com eugene.loh at oracle.com
Wed Sep 29 08:13:39 PDT 2021


From: Eugene Loh <eugene.loh at oracle.com>

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 bpf/Build                            |   1 +
 bpf/rindex.S                         | 166 +++++++++++++++++++++++++++
 libdtrace/dt_cg.c                    |  87 +++++++++++++-
 libdtrace/dt_dlibs.c                 |   1 +
 test/stress/fbtsafety/tst.shortstr.d |   3 +-
 test/unittest/funcs/tst.rindex.d     |  60 ++++++++++
 test/unittest/funcs/tst.rindex.r     |  27 +++++
 7 files changed, 342 insertions(+), 3 deletions(-)
 create mode 100644 bpf/rindex.S
 create mode 100644 test/unittest/funcs/tst.rindex.d
 create mode 100644 test/unittest/funcs/tst.rindex.r

diff --git a/bpf/Build b/bpf/Build
index 5f359293..0b39764f 100644
--- a/bpf/Build
+++ b/bpf/Build
@@ -27,6 +27,7 @@ bpf_dlib_SOURCES = \
 	get_tvar.c set_tvar.c \
 	index.S \
 	probe_error.c \
+	rindex.S \
 	strchr.S \
 	strcmp.S \
 	strjoin.S \
diff --git a/bpf/rindex.S b/bpf/rindex.S
new file mode 100644
index 00000000..b2ad398b
--- /dev/null
+++ b/bpf/rindex.S
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This approach to implementing rindex(s, t[, start]) mimics the approach
+ * used for index().  Notably, it uses dt_index_match().
+ *
+ * The rindex() semantics are to find the highest index where t appears in s,
+ * but at most the start value.
+ *
+ * The user semantics are that if the start is not specified, it is as though
+ * len(s)-len(t) were used.  A negative start means no match will be found.
+ * These user semantics are modified internally, within the implementation:
+ *   - In dt_cg.c, start<0 is immediately rejected.
+ *   - In the call to rindex.S, start<0 means to use the default.
+ */
+
+#define DT_STRLEN_BYTES		2
+
+#define BPF_FUNC_probe_read	4
+#define BPF_FUNC_probe_read_str	45
+
+	.text
+/*
+ * int dt_rindex(const char *s, const char *t, int start, char *tmp1, char *tmp2)
+ * {
+ *     uint64_t r0, tlen;
+ *     uint64_t buflen;
+ *
+ *     // ignore length prefix
+ *     s += DT_STRLEN_BYTES;
+ *     t += DT_STRLEN_BYTES;
+ *
+ *     // round buflen for dt_index_match()
+ *     buflen = STRSZ rounded up to multiple of 8;
+ *
+ *     // keep a copy of t in tmp2
+ *     tlen = bpf_probe_read_str(tmp2, buflen, t);
+ *
+ *     // determine actual start index
+ *     r0 = bpf_probe_read_str(tmp1, buflen, s);
+ *     r0 -= tlen;     // maximum possible index value
+ *     if (start < 0) start = r0;
+ *     if (start > r0) start = r0;
+ *
+ *     // drop terminating NULL
+ *     tlen--;
+ *
+ *     // Fill end of tmp1 with contents from tmp2
+ *     // to suppress spurious mismatches.
+ *     bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen);
+ *
+ *     // Apparently, the BPF verifier prefers incrementing loop counts.
+ *     // So loop cnt goes from 0 to start; idx goes from start to 0.
+ *     cnt = 0;
+ *     Lloop:
+ *         // check loop
+ *         if (cnt > start) return -1;
+ *         idx = start - cnt;
+ *
+ *         // fill start of tmp1 with s, starting at the proposed index
+ *         bpf_probe_read(tmp1, tlen, s + idx);
+ *
+ *         // keep looping if not a match
+ *         r0 = dt_index_match(tmp1, tmp2, buflen);
+ *         cnt++;
+ *         if (r0 == 0) goto Lloop;
+ *
+ *      return idx;
+ * }
+ *
+ * Some variables are kept in registers or spilled to the stack:
+ *     r6 = idx          [%fp+-8] = s
+ *     r7 = tmp1         [%fp+-16] = buflen
+ *     r8 = tmp2         [%fp+-24] = start
+ *     r9 = tlen         [%fp+-32] = cnt
+ * but t is not needed once we have copied its contents to tmp2.
+ */
+	.align	4
+	.global	dt_rindex
+dt_rindex:
+	add	%r1, DT_STRLEN_BYTES		/* s += DT_STRLEN_BYTES */
+	add	%r2, DT_STRLEN_BYTES		/* t += DT_STRLEN_BYTES */
+
+	lddw	%r6, STRSZ
+	add	%r6, 7
+	and	%r6, -8
+	stxdw	[%fp+-16], %r6			/* buflen = STRSZ rounded up to multiple of 8 */
+
+	stxdw	[%fp+-8], %r1			/* stash copies of some variables */
+	stxdw	[%fp+-24], %r3
+	mov	%r7, %r4
+	mov	%r8, %r5
+
+	mov	%r3, %r2
+	ldxdw	%r2, [%fp+-16]
+	mov	%r1, %r8
+	call	BPF_FUNC_probe_read_str		/* tlen = bpf_probe_read_str(tmp2, buflen, t) */
+	jsle	%r0, 0, .Lerror
+	mov	%r9, %r0
+
+	mov	%r1, %r7
+	ldxdw	%r2, [%fp+-16]
+	ldxdw	%r3, [%fp+-8]
+	call	BPF_FUNC_probe_read_str		/* r0 = bpf_probe_read_str(tmp1, buflen, s) */
+
+	sub	%r0, %r9			/* r0 -= tlen */
+	jslt	%r0, 0, .Lerror
+	ldxdw	%r3, [%fp+-24]
+	jsge	%r3, 0, 1			/* if (start < 0) start = r0 */
+	mov	%r3, %r0
+	jle	%r3, %r0, 1			/* if (start > r0) start = r0 */
+	mov	%r3, %r0
+	stxdw	[%fp+-24], %r3
+
+	sub	%r9, 1				/* tlen-- */
+
+	mov	%r1, %r7
+	add	%r1, %r9
+	ldxdw	%r2, [%fp+-16]
+	sub	%r2, %r9
+	mov	%r3, %r8
+	add	%r3, %r9
+	call	BPF_FUNC_probe_read		/* bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen) */
+
+	mov	%r0, 0
+	stxdw	[%fp+-32], %r0			/* cnt = 0 */
+.Lloop:
+	ldxdw	%r0, [%fp+-32]
+	ldxdw	%r6, [%fp+-24]
+	jgt	%r0, %r6, .Lerror		/* if (cnt > start) return -1 */
+
+	/* help the BPF verifier */
+	ldxdw	%r2, [%fp+-16]
+	jge	%r0, %r2, .Lerror		/* if (cnt >= buflen) return -1 */
+
+	sub	%r6, %r0			/* idx = start - cnt */
+	jlt	%r6, 0, .Lerror
+	jge	%r6, %r2, .Lerror
+
+	mov	%r1, %r7
+	mov	%r2, %r9
+	ldxdw	%r3, [%fp+-8]
+	add	%r3, %r6
+	call	BPF_FUNC_probe_read		/* bpf_probe_read(tmp1, tlen, s + idx) */
+
+	mov	%r1, %r7
+	mov	%r2, %r8
+	ldxdw	%r3, [%fp+-16]
+	call	dt_index_match			/* r0 = dt_index_match(tmp1, tmp2, buflen) */
+
+	ldxdw	%r1, [%fp+-32]
+	add	%r1, 1				/* cnt++ */
+	stxdw	[%fp+-32], %r1
+
+	jeq	%r0, 0, .Lloop			/* if (r0 == 0) goto Lloop */
+
+	/* done */
+	mov	%r0, %r6			/* return idx */
+	exit
+
+.Lerror:
+	mov	%r0, -1
+	exit
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 49403986..5b6ec4a9 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -3231,6 +3231,91 @@ dt_cg_subr_index(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 	TRACE_REGSET("    subr-index:End  ");
 }
 
+static void
+dt_cg_subr_rindex(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
+{
+	dt_node_t	*s = dnp->dn_args;
+	dt_node_t	*t = s->dn_list;
+	dt_node_t	*start = t->dn_list;
+	dt_ident_t	*idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_rindex");
+	uint64_t	off1, off2;
+	uint_t		Lneg_start = dt_irlist_label(dlp);
+
+	assert(idp != NULL);
+
+	TRACE_REGSET("    subr-rindex:Begin");
+
+	/* evaluate arguments to D subroutine rindex() */
+	dt_cg_node(s, dlp, drp);
+	dt_cg_check_notnull(dlp, drp, s->dn_reg);
+	dt_cg_node(t, dlp, drp);
+	dt_cg_check_notnull(dlp, drp, t->dn_reg);
+	if (start != NULL)
+		dt_cg_node(start, dlp, drp);
+
+	/* start setting up function call to BPF function dt_rindex() */
+	if (dt_regset_xalloc_args(drp) == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
+	/* string s and substring t */
+	emit(dlp,  BPF_MOV_REG(BPF_REG_1, s->dn_reg));
+	dt_regset_free(drp, s->dn_reg);
+	if (s->dn_tstring)
+		dt_cg_tstring_free(yypcb, s);
+	emit(dlp,  BPF_MOV_REG(BPF_REG_2, t->dn_reg));
+	dt_regset_free(drp, t->dn_reg);
+	if (t->dn_tstring)
+		dt_cg_tstring_free(yypcb, t);
+
+	/* scratch memory */
+	off1 = dt_cg_tstring_xalloc(yypcb);
+	off2 = dt_cg_tstring_xalloc(yypcb);
+
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
+	emit(dlp,  BPF_MOV_REG(BPF_REG_5, BPF_REG_4));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off1));
+	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, off2));
+
+	/* allocate return register */
+	dt_regset_xalloc(drp, BPF_REG_0);
+
+	/*
+	 * Now deal with the "start" argument.  If it's negative, we will
+	 * bypass the function call.  That means we set all those other
+	 * arguments up for nothing, but doing it this way simplifies
+	 * code generation and start<0 should be a rare condition.
+	 *
+	 * If there is no "start" argument, then specify -1.  That is,
+	 * we are modifying the semantics of start<0 for internal purposes.
+	 */
+	if (start) {
+		emit(dlp,  BPF_MOV_IMM(BPF_REG_0, -1));
+		emit(dlp,  BPF_BRANCH_IMM(BPF_JSLT, start->dn_reg, 0, Lneg_start));
+		emit(dlp,  BPF_MOV_REG(BPF_REG_3, start->dn_reg));
+		dt_regset_free(drp, start->dn_reg);
+	} else
+		emit(dlp,  BPF_MOV_IMM(BPF_REG_3, -1));
+
+	/* function call */
+	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
+	emitl(dlp, Lneg_start,
+		   BPF_NOP());
+	dt_regset_free_args(drp);
+
+	/* clean up */
+	dt_cg_tstring_xfree(yypcb, off1);
+	dt_cg_tstring_xfree(yypcb, off2);
+
+	dnp->dn_reg = dt_regset_alloc(drp);
+	if (dnp->dn_reg == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+	emit(dlp,  BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
+	dt_regset_free(drp, BPF_REG_0);
+
+	TRACE_REGSET("    subr-rindex:End  ");
+}
+
 static void
 dt_cg_subr_speculation(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 {
@@ -3525,7 +3610,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
 	[DIF_SUBR_STRTOK]		= NULL,
 	[DIF_SUBR_SUBSTR]		= &dt_cg_subr_substr,
 	[DIF_SUBR_INDEX]		= &dt_cg_subr_index,
-	[DIF_SUBR_RINDEX]		= NULL,
+	[DIF_SUBR_RINDEX]		= &dt_cg_subr_rindex,
 	[DIF_SUBR_HTONS]		= &dt_cg_subr_htons,
 	[DIF_SUBR_HTONL]		= &dt_cg_subr_htonl,
 	[DIF_SUBR_HTONLL]		= &dt_cg_subr_htonll,
diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
index 1fec0147..0e3ebb85 100644
--- a/libdtrace/dt_dlibs.c
+++ b/libdtrace/dt_dlibs.c
@@ -60,6 +60,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
 	DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_index, DT_IDENT_SYMBOL),
+	DT_BPF_SYMBOL(dt_rindex, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strchr, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
 	DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
diff --git a/test/stress/fbtsafety/tst.shortstr.d b/test/stress/fbtsafety/tst.shortstr.d
index a7e51af6..8d07c533 100644
--- a/test/stress/fbtsafety/tst.shortstr.d
+++ b/test/stress/fbtsafety/tst.shortstr.d
@@ -4,8 +4,7 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@skip: dtv2 */
-/* @@no-xfail */
+/* @@xfail: dtv2 */
 
 #pragma D option quiet
 #pragma D option strsize=16
diff --git a/test/unittest/funcs/tst.rindex.d b/test/unittest/funcs/tst.rindex.d
new file mode 100644
index 00000000..7af96412
--- /dev/null
+++ b/test/unittest/funcs/tst.rindex.d
@@ -0,0 +1,60 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+/*
+ * Start at the prescribed index and work backwards for the first match.
+ * The default start is len(str)-len(substr).
+ */
+
+#pragma D option quiet
+
+/* cut string size back a little to ease pressure on BPF verifier */
+#pragma D option strsize=144
+
+BEGIN {
+	x = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#";
+	y = "abcdefghijklmnopqrstuvwxyz";
+	printf(" 64 %3d\n", rindex(x, y));
+	printf(" -1 %3d\n", rindex(x, y,  -1));
+	printf("  0 %3d\n", rindex(x, y,   0));
+	printf("  0 %3d\n", rindex(x, y,   1));
+	printf(" 64 %3d\n", rindex(x, y,  70));
+	printf(" 64 %3d\n", rindex(x, y, 200));
+
+	y = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#a";
+	printf(" -1 %3d\n", rindex(x, y, -1));
+	printf("  0 %3d\n", rindex(x, y));
+	printf("  0 %3d\n", rindex(x, y, 1));
+
+	x = "";
+	y = "klmnopqrstuvw";
+	printf(" -1 %3d\n", rindex(x, y));
+	printf(" 13 %3d\n", rindex(y, x));
+
+	x = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+	y = "klmnopqrstuvw";
+	printf("114 %3d\n", rindex(x, y));
+	printf(" -1 %3d\n", rindex(x, y,  -1));
+	printf(" -1 %3d\n", rindex(x, y,   0));
+	printf(" -1 %3d\n", rindex(x, y,   5));
+	printf(" 10 %3d\n", rindex(x, y,  10));
+	printf(" 10 %3d\n", rindex(x, y,  30));
+	printf(" 36 %3d\n", rindex(x, y,  40));
+	printf(" 36 %3d\n", rindex(x, y,  60));
+	printf(" 62 %3d\n", rindex(x, y,  70));
+	printf(" 62 %3d\n", rindex(x, y,  80));
+	printf(" 88 %3d\n", rindex(x, y,  90));
+	printf("114 %3d\n", rindex(x, y, 120));
+	printf("114 %3d\n", rindex(x, y, 200));
+
+	x = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+	y = "aaaa";
+	printf(" 28 %3d\n", rindex(x, y));
+	printf(" -1 %3d\n", rindex(y, x));
+
+	exit(0);
+}
diff --git a/test/unittest/funcs/tst.rindex.r b/test/unittest/funcs/tst.rindex.r
new file mode 100644
index 00000000..b6aebb10
--- /dev/null
+++ b/test/unittest/funcs/tst.rindex.r
@@ -0,0 +1,27 @@
+ 64  64
+ -1  -1
+  0   0
+  0   0
+ 64  64
+ 64  64
+ -1  -1
+  0   0
+  0   0
+ -1  -1
+ 13  13
+114 114
+ -1  -1
+ -1  -1
+ -1  -1
+ 10  10
+ 10  10
+ 36  36
+ 36  36
+ 62  62
+ 62  62
+ 88  88
+114 114
+114 114
+ 28  28
+ -1  -1
+
-- 
2.18.4




More information about the DTrace-devel mailing list