[DTrace-devel] [PATCH 6/8] Add support for rindex() subroutine
Kris Van Hees
kris.van.hees at oracle.com
Thu Oct 14 21:07:29 PDT 2021
On Wed, Sep 29, 2021 at 11:13:39AM -0400, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
... and added to my staging dev branch. I did add the .type and .size
markers to the strchr.S file.
> ---
> bpf/Build | 1 +
> bpf/rindex.S | 166 +++++++++++++++++++++++++++
> libdtrace/dt_cg.c | 87 +++++++++++++-
> libdtrace/dt_dlibs.c | 1 +
> test/stress/fbtsafety/tst.shortstr.d | 3 +-
> test/unittest/funcs/tst.rindex.d | 60 ++++++++++
> test/unittest/funcs/tst.rindex.r | 27 +++++
> 7 files changed, 342 insertions(+), 3 deletions(-)
> create mode 100644 bpf/rindex.S
> create mode 100644 test/unittest/funcs/tst.rindex.d
> create mode 100644 test/unittest/funcs/tst.rindex.r
>
> diff --git a/bpf/Build b/bpf/Build
> index 5f359293..0b39764f 100644
> --- a/bpf/Build
> +++ b/bpf/Build
> @@ -27,6 +27,7 @@ bpf_dlib_SOURCES = \
> get_tvar.c set_tvar.c \
> index.S \
> probe_error.c \
> + rindex.S \
> strchr.S \
> strcmp.S \
> strjoin.S \
> diff --git a/bpf/rindex.S b/bpf/rindex.S
> new file mode 100644
> index 00000000..b2ad398b
> --- /dev/null
> +++ b/bpf/rindex.S
> @@ -0,0 +1,166 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + */
> +
> +/*
> + * This approach to implementing rindex(s, t[, start]) mimics the approach
> + * used for index(). Notably, it uses dt_index_match().
> + *
> + * The rindex() semantics are to find the highest index where t appears in s,
> + * but at most the start value.
> + *
> + * The user semantics are that if the start is not specified, it is as though
> + * len(s)-len(t) were used. A negative start means no match will be found.
> + * These user semantics are modified internally, within the implementation:
> + * - In dt_cg.c, start<0 is immediately rejected.
> + * - In the call to rindex.S, start<0 means to use the default.
> + */
> +
> +#define DT_STRLEN_BYTES 2
> +
> +#define BPF_FUNC_probe_read 4
> +#define BPF_FUNC_probe_read_str 45
> +
> + .text
> +/*
> + * int dt_rindex(const char *s, const char *t, int start, char *tmp1, char *tmp2)
> + * {
> + * uint64_t r0, tlen;
> + * uint64_t buflen;
> + *
> + * // ignore length prefix
> + * s += DT_STRLEN_BYTES;
> + * t += DT_STRLEN_BYTES;
> + *
> + * // round buflen for dt_index_match()
> + * buflen = STRSZ rounded up to multiple of 8;
> + *
> + * // keep a copy of t in tmp2
> + * tlen = bpf_probe_read_str(tmp2, buflen, t);
> + *
> + * // determine actual start index
> + * r0 = bpf_probe_read_str(tmp1, buflen, s);
> + * r0 -= tlen; // maximum possible index value
> + * if (start < 0) start = r0;
> + * if (start > r0) start = r0;
> + *
> + * // drop terminating NULL
> + * tlen--;
> + *
> + * // Fill end of tmp1 with contents from tmp2
> + * // to suppress spurious mismatches.
> + * bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen);
> + *
> + * // Apparently, the BPF verifier prefers incrementing loop counts.
> + * // So loop cnt goes from 0 to start; idx goes from start to 0.
> + * cnt = 0;
> + * Lloop:
> + * // check loop
> + * if (cnt > start) return -1;
> + * idx = start - cnt;
> + *
> + * // fill start of tmp1 with s, starting at the proposed index
> + * bpf_probe_read(tmp1, tlen, s + idx);
> + *
> + * // keep looping if not a match
> + * r0 = dt_index_match(tmp1, tmp2, buflen);
> + * cnt++;
> + * if (r0 == 0) goto Lloop;
> + *
> + * return idx;
> + * }
> + *
> + * Some variables are kept in registers or spilled to the stack:
> + * r6 = idx [%fp+-8] = s
> + * r7 = tmp1 [%fp+-16] = buflen
> + * r8 = tmp2 [%fp+-24] = start
> + * r9 = tlen [%fp+-32] = cnt
> + * but t is not needed once we have copied its contents to tmp2.
> + */
> + .align 4
> + .global dt_rindex
> +dt_rindex:
> + add %r1, DT_STRLEN_BYTES /* s += DT_STRLEN_BYTES */
> + add %r2, DT_STRLEN_BYTES /* t += DT_STRLEN_BYTES */
> +
> + lddw %r6, STRSZ
> + add %r6, 7
> + and %r6, -8
> + stxdw [%fp+-16], %r6 /* buflen = STRSZ rounded up to multiple of 8 */
> +
> + stxdw [%fp+-8], %r1 /* stash copies of some variables */
> + stxdw [%fp+-24], %r3
> + mov %r7, %r4
> + mov %r8, %r5
> +
> + mov %r3, %r2
> + ldxdw %r2, [%fp+-16]
> + mov %r1, %r8
> + call BPF_FUNC_probe_read_str /* tlen = bpf_probe_read_str(tmp2, buflen, t) */
> + jsle %r0, 0, .Lerror
> + mov %r9, %r0
> +
> + mov %r1, %r7
> + ldxdw %r2, [%fp+-16]
> + ldxdw %r3, [%fp+-8]
> + call BPF_FUNC_probe_read_str /* r0 = bpf_probe_read_str(tmp1, buflen, s) */
> +
> + sub %r0, %r9 /* r0 -= tlen */
> + jslt %r0, 0, .Lerror
> + ldxdw %r3, [%fp+-24]
> + jsge %r3, 0, 1 /* if (start < 0) start = r0 */
> + mov %r3, %r0
> + jle %r3, %r0, 1 /* if (start > r0) start = r0 */
> + mov %r3, %r0
> + stxdw [%fp+-24], %r3
> +
> + sub %r9, 1 /* tlen-- */
> +
> + mov %r1, %r7
> + add %r1, %r9
> + ldxdw %r2, [%fp+-16]
> + sub %r2, %r9
> + mov %r3, %r8
> + add %r3, %r9
> + call BPF_FUNC_probe_read /* bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen) */
> +
> + mov %r0, 0
> + stxdw [%fp+-32], %r0 /* cnt = 0 */
> +.Lloop:
> + ldxdw %r0, [%fp+-32]
> + ldxdw %r6, [%fp+-24]
> + jgt %r0, %r6, .Lerror /* if (cnt > start) return -1 */
> +
> + /* help the BPF verifier */
> + ldxdw %r2, [%fp+-16]
> + jge %r0, %r2, .Lerror /* if (cnt >= buflen) return -1 */
> +
> + sub %r6, %r0 /* idx = start - cnt */
> + jlt %r6, 0, .Lerror
> + jge %r6, %r2, .Lerror
> +
> + mov %r1, %r7
> + mov %r2, %r9
> + ldxdw %r3, [%fp+-8]
> + add %r3, %r6
> + call BPF_FUNC_probe_read /* bpf_probe_read(tmp1, tlen, s + idx) */
> +
> + mov %r1, %r7
> + mov %r2, %r8
> + ldxdw %r3, [%fp+-16]
> + call dt_index_match /* r0 = dt_index_match(tmp1, tmp2, buflen) */
> +
> + ldxdw %r1, [%fp+-32]
> + add %r1, 1 /* cnt++ */
> + stxdw [%fp+-32], %r1
> +
> + jeq %r0, 0, .Lloop /* if (r0 == 0) goto Lloop */
> +
> + /* done */
> + mov %r0, %r6 /* return idx */
> + exit
> +
> +.Lerror:
> + mov %r0, -1
> + exit
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 49403986..5b6ec4a9 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -3231,6 +3231,91 @@ dt_cg_subr_index(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> TRACE_REGSET(" subr-index:End ");
> }
>
> +static void
> +dt_cg_subr_rindex(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> +{
> + dt_node_t *s = dnp->dn_args;
> + dt_node_t *t = s->dn_list;
> + dt_node_t *start = t->dn_list;
> + dt_ident_t *idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_rindex");
> + uint64_t off1, off2;
> + uint_t Lneg_start = dt_irlist_label(dlp);
> +
> + assert(idp != NULL);
> +
> + TRACE_REGSET(" subr-rindex:Begin");
> +
> + /* evaluate arguments to D subroutine rindex() */
> + dt_cg_node(s, dlp, drp);
> + dt_cg_check_notnull(dlp, drp, s->dn_reg);
> + dt_cg_node(t, dlp, drp);
> + dt_cg_check_notnull(dlp, drp, t->dn_reg);
> + if (start != NULL)
> + dt_cg_node(start, dlp, drp);
> +
> + /* start setting up function call to BPF function dt_rindex() */
> + if (dt_regset_xalloc_args(drp) == -1)
> + longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +
> + /* string s and substring t */
> + emit(dlp, BPF_MOV_REG(BPF_REG_1, s->dn_reg));
> + dt_regset_free(drp, s->dn_reg);
> + if (s->dn_tstring)
> + dt_cg_tstring_free(yypcb, s);
> + emit(dlp, BPF_MOV_REG(BPF_REG_2, t->dn_reg));
> + dt_regset_free(drp, t->dn_reg);
> + if (t->dn_tstring)
> + dt_cg_tstring_free(yypcb, t);
> +
> + /* scratch memory */
> + off1 = dt_cg_tstring_xalloc(yypcb);
> + off2 = dt_cg_tstring_xalloc(yypcb);
> +
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
> + emit(dlp, BPF_MOV_REG(BPF_REG_5, BPF_REG_4));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off1));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, off2));
> +
> + /* allocate return register */
> + dt_regset_xalloc(drp, BPF_REG_0);
> +
> + /*
> + * Now deal with the "start" argument. If it's negative, we will
> + * bypass the function call. That means we set all those other
> + * arguments up for nothing, but doing it this way simplifies
> + * code generation and start<0 should be a rare condition.
> + *
> + * If there is no "start" argument, then specify -1. That is,
> + * we are modifying the semantics of start<0 for internal purposes.
> + */
> + if (start) {
> + emit(dlp, BPF_MOV_IMM(BPF_REG_0, -1));
> + emit(dlp, BPF_BRANCH_IMM(BPF_JSLT, start->dn_reg, 0, Lneg_start));
> + emit(dlp, BPF_MOV_REG(BPF_REG_3, start->dn_reg));
> + dt_regset_free(drp, start->dn_reg);
> + } else
> + emit(dlp, BPF_MOV_IMM(BPF_REG_3, -1));
> +
> + /* function call */
> + emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> + emitl(dlp, Lneg_start,
> + BPF_NOP());
> + dt_regset_free_args(drp);
> +
> + /* clean up */
> + dt_cg_tstring_xfree(yypcb, off1);
> + dt_cg_tstring_xfree(yypcb, off2);
> +
> + dnp->dn_reg = dt_regset_alloc(drp);
> + if (dnp->dn_reg == -1)
> + longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> + emit(dlp, BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
> + dt_regset_free(drp, BPF_REG_0);
> +
> + TRACE_REGSET(" subr-rindex:End ");
> +}
> +
> static void
> dt_cg_subr_speculation(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> {
> @@ -3525,7 +3610,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
> [DIF_SUBR_STRTOK] = NULL,
> [DIF_SUBR_SUBSTR] = &dt_cg_subr_substr,
> [DIF_SUBR_INDEX] = &dt_cg_subr_index,
> - [DIF_SUBR_RINDEX] = NULL,
> + [DIF_SUBR_RINDEX] = &dt_cg_subr_rindex,
> [DIF_SUBR_HTONS] = &dt_cg_subr_htons,
> [DIF_SUBR_HTONL] = &dt_cg_subr_htonl,
> [DIF_SUBR_HTONLL] = &dt_cg_subr_htonll,
> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> index 1fec0147..0e3ebb85 100644
> --- a/libdtrace/dt_dlibs.c
> +++ b/libdtrace/dt_dlibs.c
> @@ -60,6 +60,7 @@ static const dt_ident_t dt_bpf_symbols[] = {
> DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
> DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
> DT_BPF_SYMBOL(dt_index, DT_IDENT_SYMBOL),
> + DT_BPF_SYMBOL(dt_rindex, DT_IDENT_SYMBOL),
> DT_BPF_SYMBOL(dt_strchr, DT_IDENT_SYMBOL),
> DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
> DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
> diff --git a/test/stress/fbtsafety/tst.shortstr.d b/test/stress/fbtsafety/tst.shortstr.d
> index a7e51af6..8d07c533 100644
> --- a/test/stress/fbtsafety/tst.shortstr.d
> +++ b/test/stress/fbtsafety/tst.shortstr.d
> @@ -4,8 +4,7 @@
> * Licensed under the Universal Permissive License v 1.0 as shown at
> * http://oss.oracle.com/licenses/upl.
> */
> -/* @@skip: dtv2 */
> -/* @@no-xfail */
> +/* @@xfail: dtv2 */
>
> #pragma D option quiet
> #pragma D option strsize=16
> diff --git a/test/unittest/funcs/tst.rindex.d b/test/unittest/funcs/tst.rindex.d
> new file mode 100644
> index 00000000..7af96412
> --- /dev/null
> +++ b/test/unittest/funcs/tst.rindex.d
> @@ -0,0 +1,60 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * Start at the prescribed index and work backwards for the first match.
> + * The default start is len(str)-len(substr).
> + */
> +
> +#pragma D option quiet
> +
> +/* cut string size back a little to ease pressure on BPF verifier */
> +#pragma D option strsize=144
> +
> +BEGIN {
> + x = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#";
> + y = "abcdefghijklmnopqrstuvwxyz";
> + printf(" 64 %3d\n", rindex(x, y));
> + printf(" -1 %3d\n", rindex(x, y, -1));
> + printf(" 0 %3d\n", rindex(x, y, 0));
> + printf(" 0 %3d\n", rindex(x, y, 1));
> + printf(" 64 %3d\n", rindex(x, y, 70));
> + printf(" 64 %3d\n", rindex(x, y, 200));
> +
> + y = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#a";
> + printf(" -1 %3d\n", rindex(x, y, -1));
> + printf(" 0 %3d\n", rindex(x, y));
> + printf(" 0 %3d\n", rindex(x, y, 1));
> +
> + x = "";
> + y = "klmnopqrstuvw";
> + printf(" -1 %3d\n", rindex(x, y));
> + printf(" 13 %3d\n", rindex(y, x));
> +
> + x = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
> + y = "klmnopqrstuvw";
> + printf("114 %3d\n", rindex(x, y));
> + printf(" -1 %3d\n", rindex(x, y, -1));
> + printf(" -1 %3d\n", rindex(x, y, 0));
> + printf(" -1 %3d\n", rindex(x, y, 5));
> + printf(" 10 %3d\n", rindex(x, y, 10));
> + printf(" 10 %3d\n", rindex(x, y, 30));
> + printf(" 36 %3d\n", rindex(x, y, 40));
> + printf(" 36 %3d\n", rindex(x, y, 60));
> + printf(" 62 %3d\n", rindex(x, y, 70));
> + printf(" 62 %3d\n", rindex(x, y, 80));
> + printf(" 88 %3d\n", rindex(x, y, 90));
> + printf("114 %3d\n", rindex(x, y, 120));
> + printf("114 %3d\n", rindex(x, y, 200));
> +
> + x = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
> + y = "aaaa";
> + printf(" 28 %3d\n", rindex(x, y));
> + printf(" -1 %3d\n", rindex(y, x));
> +
> + exit(0);
> +}
> diff --git a/test/unittest/funcs/tst.rindex.r b/test/unittest/funcs/tst.rindex.r
> new file mode 100644
> index 00000000..b6aebb10
> --- /dev/null
> +++ b/test/unittest/funcs/tst.rindex.r
> @@ -0,0 +1,27 @@
> + 64 64
> + -1 -1
> + 0 0
> + 0 0
> + 64 64
> + 64 64
> + -1 -1
> + 0 0
> + 0 0
> + -1 -1
> + 13 13
> +114 114
> + -1 -1
> + -1 -1
> + -1 -1
> + 10 10
> + 10 10
> + 36 36
> + 36 36
> + 62 62
> + 62 62
> + 88 88
> +114 114
> +114 114
> + 28 28
> + -1 -1
> +
> --
> 2.18.4
>
>
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list