[DTrace-devel] [PATCH 5/8] Add support for index() subroutine

Kris Van Hees kris.van.hees at oracle.com
Thu Oct 14 21:06:56 PDT 2021


On Wed, Sep 29, 2021 at 11:13:38AM -0400, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>

Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

... and added to my staging dev branch.  I did add the .type and .size
markers to the strchr.S file.

> ---
>  bpf/Build                        |   1 +
>  bpf/index.S                      | 206 +++++++++++++++++++++++++++++++
>  libdtrace/dt_cg.c                |  63 +++++++++-
>  libdtrace/dt_dlibs.c             |   1 +
>  test/unittest/dif/index2arg.d    |   1 -
>  test/unittest/dif/index3arg.d    |   1 -
>  test/unittest/funcs/tst.index2.d |  48 +++++++
>  test/unittest/funcs/tst.index2.r |  23 ++++
>  8 files changed, 341 insertions(+), 3 deletions(-)
>  create mode 100644 bpf/index.S
>  create mode 100644 test/unittest/funcs/tst.index2.d
>  create mode 100644 test/unittest/funcs/tst.index2.r
> 
> diff --git a/bpf/Build b/bpf/Build
> index 6e9ffa20..5f359293 100644
> --- a/bpf/Build
> +++ b/bpf/Build
> @@ -25,6 +25,7 @@ bpf_dlib_SOURCES = \
>  	agg_lqbin.c agg_qbin.c \
>  	get_bvar.c \
>  	get_tvar.c set_tvar.c \
> +	index.S \
>  	probe_error.c \
>  	strchr.S \
>  	strcmp.S \
> diff --git a/bpf/index.S b/bpf/index.S
> new file mode 100644
> index 00000000..2ec9f133
> --- /dev/null
> +++ b/bpf/index.S
> @@ -0,0 +1,206 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + */
> +
> +#define DT_STRLEN_BYTES		2
> +
> +#define BPF_FUNC_probe_read	4
> +#define BPF_FUNC_probe_read_str	45
> +
> +	.text
> +/*
> + * For two buffers, return:
> + *     1 if the two buffers match in every bit
> + *     0 if the two buffers mismatch in any bit
> + * To help the BPF verifier, minimize branching by:
> + *   - using a len whose exact value is known to the BPF verifier
> + *   - operating on 64 bits at a time (len is a multiple of 8)
> + *   - replacing conditional branching with arithmetic operations
> + *       such as ^ | & etc.
> + *
> + * uint64_t dt_index_match(char *tmp1, char *tmp2, uint64_t len)
> + * {
> + *     r0 = 0;          // accumulate bits indicating mismatches
> + *     r6 = 0;          // loop counter
> + *     L1:
> + *         if (r6 >= len) goto L2;
> + *
> + *         r4 = *((uint64_t*)&tmp1[r6]);
> + *         r5 = *((uint64_t*)&tmp2[r6]);
> + *         r0 |= (r4 ^ r5);
> + *
> + *         r6 += 8;
> + *         goto L1;
> + *
> + *     L2:
> + *                       //            value of r0
> + *                       //   perfect match    any mismatches
> + *                       //        == 0              != 0
> + *     r0 |= (r0 >> 32); //        == 0              != 0
> + *     r0 &= 0xffffffff; //        == 0              >  0
> + *     r0 -= 1;          //        <  0              >= 0
> + *     r0 >>= 63;        //        == 1              == 0
> + *
> + *     return r0;
> + * }
> + */
> +	.align	4
> +	.global	dt_index_match
> +dt_index_match:
> +	mov	%r0, 0
> +	mov	%r6, 0
> +
> +.L1:
> +	jge	%r6, %r3, .L2
> +
> +	mov	%r4, %r1
> +	add	%r4, %r6
> +	ldxdw	%r4, [%r4+0]
> +	mov	%r5, %r2
> +	add	%r5, %r6
> +	ldxdw	%r5, [%r5+0]
> +	xor	%r4, %r5
> +	or	%r0, %r4
> +
> +	add	%r6, 8
> +	ja	.L1
> +
> +.L2:
> +	mov	%r4, %r0
> +	rsh	%r4, 32
> +	or	%r0, %r4
> +	and	%r0, 0xffffffff
> +	sub	%r0, 1
> +	rsh	%r0, 63
> +
> +	exit
> +
> +/*
> + * int dt_index(const char *s, const char *t, int start, char *tmp1, char *tmp2)
> + * {
> + *     uint64_t r0, tlen;
> + *     uint64_t buflen;
> + *
> + *     // ignore length prefix
> + *     s += DT_STRLEN_BYTES;
> + *     t += DT_STRLEN_BYTES;
> + *
> + *     // determine actual start index
> + *     if (start < 0) start = 0;
> + *
> + *     // round buflen for dt_index_match()
> + *     buflen = STRSZ rounded up to multiple of 8;
> + *
> + *     // keep a copy of t in tmp2
> + *     tlen = bpf_probe_read_str(tmp2, buflen, t);
> + *
> + *     // determine maximum possible index value
> + *     maxi = bpf_probe_read_str(tmp1, buflen, s);
> + *     maxi -= tlen;
> + *
> + *     // drop terminating NULL
> + *     tlen--;
> + *
> + *     // Fill end of tmp1 with contents from tmp2
> + *     // to suppress spurious mismatches.
> + *     bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen);
> + *
> + *     Lloop:
> + *         // check loop
> + *         if (start > maxi) return -1;
> + *
> + *         // fill start of tmp1 with s, starting at the proposed index
> + *         bpf_probe_read(tmp1, tlen, s + start);
> + *
> + *         // keep looping if not a match
> + *         r0 = dt_index_match(tmp1, tmp2, buflen);
> + *         start++;
> + *         if (r0 == 0) goto Lloop;
> + *
> + *      start--;
> + *      return start;
> + * }
> + *
> + * Some variables are kept in registers or spilled to the stack:
> + *     r6 = start        [%fp+-8] = s
> + *     r7 = tmp1         [%fp+-16] = buflen
> + *     r8 = tmp2         [%fp+-24] = maxi
> + *     r9 = tlen
> + * but t is not needed once we have copied its contents to tmp2.
> + */
> +	.align	4
> +	.global	dt_index
> +dt_index:
> +	add	%r1, DT_STRLEN_BYTES		/* s += DT_STRLEN_BYTES */
> +	add	%r2, DT_STRLEN_BYTES		/* t += DT_STRLEN_BYTES */
> +
> +	jsge	%r3, 0, 1
> +	mov	%r3, 0				/* if (start < 0) start = 0 */
> +
> +	lddw	%r6, STRSZ
> +	add	%r6, 7
> +	and	%r6, -8
> +	stxdw	[%fp+-16], %r6			/* buflen = STRSZ rounded up to multiple of 8 */
> +
> +	stxdw	[%fp+-8], %r1			/* stash copies of some variables */
> +	mov	%r6, %r3
> +	mov	%r7, %r4
> +	mov	%r8, %r5
> +
> +	mov	%r3, %r2
> +	ldxdw	%r2, [%fp+-16]
> +	mov	%r1, %r8
> +	call	BPF_FUNC_probe_read_str		/* tlen = bpf_probe_read_str(tmp2, buflen, t) */
> +	jsle	%r0, 0, .Lerror
> +	mov	%r9, %r0
> +
> +	mov	%r1, %r7
> +	ldxdw	%r2, [%fp+-16]
> +	ldxdw	%r3, [%fp+-8]
> +	call	BPF_FUNC_probe_read_str		/* maxi = bpf_probe_read_str(tmp1, buflen, s) */
> +
> +	sub	%r0, %r9			/* maxi -= tlen */
> +	jslt	%r0, 0, .Lerror
> +	stxdw	[%fp+-24], %r0
> +
> +	sub	%r9, 1				/* tlen-- */
> +
> +	mov	%r1, %r7
> +	add	%r1, %r9
> +	ldxdw	%r2, [%fp+-16]
> +	sub	%r2, %r9
> +	mov	%r3, %r8
> +	add	%r3, %r9
> +	call	BPF_FUNC_probe_read		/* bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen) */
> +
> +.Lloop:
> +	/* help the BPF verifier */
> +	ldxdw	%r0, [%fp+-16]
> +	jge	%r6, %r0, .Lerror		/* if (start >= buflen) goto Lerror */
> +
> +	ldxdw	%r0, [%fp+-24]
> +	jgt	%r6, %r0, .Lerror		/* if (start > maxi) goto Lerror */
> +
> +	mov	%r1, %r7
> +	mov	%r2, %r9
> +	ldxdw	%r3, [%fp+-8]
> +	add	%r3, %r6
> +	call	BPF_FUNC_probe_read		/* bpf_probe_read(tmp1, tlen, s + start) */
> +
> +	mov	%r1, %r7
> +	mov	%r2, %r8
> +	ldxdw	%r3, [%fp+-16]
> +	call	dt_index_match			/* r0 = dt_index_match(tmp1, tmp2, buflen) */
> +
> +	add	%r6, 1				/* start++ */
> +	jeq	%r0, 0, .Lloop			/* if (r0 == 0) goto Lloop */
> +
> +	/* done */
> +	sub	%r6, 1				/* start-- */
> +	mov	%r0, %r6			/* return start */
> +	exit
> +
> +.Lerror:
> +	mov	%r0, -1
> +	exit
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index cb58f230..49403986 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -3170,6 +3170,67 @@ dt_cg_array_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>  	emit(dlp, BPF_ALU64_REG((dnp->dn_flags & DT_NF_SIGNED) ? BPF_ARSH : BPF_RSH, dnp->dn_reg, n));
>  }
>  
> +static void
> +dt_cg_subr_index(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> +{
> +	dt_node_t	*s = dnp->dn_args;
> +	dt_node_t	*t = s->dn_list;
> +	dt_node_t	*start = t->dn_list;
> +	dt_ident_t	*idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_index");
> +	uint64_t	off1, off2;
> +
> +	assert(idp != NULL);
> +
> +	TRACE_REGSET("    subr-index:Begin");
> +
> +	dt_cg_node(s, dlp, drp);
> +	dt_cg_check_notnull(dlp, drp, s->dn_reg);
> +	dt_cg_node(t, dlp, drp);
> +	dt_cg_check_notnull(dlp, drp, t->dn_reg);
> +	if (start != NULL)
> +		dt_cg_node(start, dlp, drp);
> +
> +	if (dt_regset_xalloc_args(drp) == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, s->dn_reg));
> +	dt_regset_free(drp, s->dn_reg);
> +	if (s->dn_tstring)
> +		dt_cg_tstring_free(yypcb, s);
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, t->dn_reg));
> +	dt_regset_free(drp, t->dn_reg);
> +	if (t->dn_tstring)
> +		dt_cg_tstring_free(yypcb, t);
> +	if (start) {
> +		emit(dlp,  BPF_MOV_REG(BPF_REG_3, start->dn_reg));
> +		dt_regset_free(drp, start->dn_reg);
> +	} else
> +		emit(dlp,  BPF_MOV_IMM(BPF_REG_3, 0));
> +
> +	off1 = dt_cg_tstring_xalloc(yypcb);
> +	off2 = dt_cg_tstring_xalloc(yypcb);
> +
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_5, BPF_REG_4));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off1));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, off2));
> +
> +	dt_regset_xalloc(drp, BPF_REG_0);
> +	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> +	dt_regset_free_args(drp);
> +
> +	dt_cg_tstring_xfree(yypcb, off1);
> +	dt_cg_tstring_xfree(yypcb, off2);
> +
> +	dnp->dn_reg = dt_regset_alloc(drp);
> +	if (dnp->dn_reg == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +	emit(dlp,  BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
> +	dt_regset_free(drp, BPF_REG_0);
> +
> +	TRACE_REGSET("    subr-index:End  ");
> +}
> +
>  static void
>  dt_cg_subr_speculation(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>  {
> @@ -3463,7 +3524,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
>  	[DIF_SUBR_STRSTR]		= NULL,
>  	[DIF_SUBR_STRTOK]		= NULL,
>  	[DIF_SUBR_SUBSTR]		= &dt_cg_subr_substr,
> -	[DIF_SUBR_INDEX]		= NULL,
> +	[DIF_SUBR_INDEX]		= &dt_cg_subr_index,
>  	[DIF_SUBR_RINDEX]		= NULL,
>  	[DIF_SUBR_HTONS]		= &dt_cg_subr_htons,
>  	[DIF_SUBR_HTONL]		= &dt_cg_subr_htonl,
> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> index d9836f40..1fec0147 100644
> --- a/libdtrace/dt_dlibs.c
> +++ b/libdtrace/dt_dlibs.c
> @@ -59,6 +59,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
>  	DT_BPF_SYMBOL(dt_get_string, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
> +	DT_BPF_SYMBOL(dt_index, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_strchr, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
> diff --git a/test/unittest/dif/index2arg.d b/test/unittest/dif/index2arg.d
> index 7d80538f..7eb8f15b 100644
> --- a/test/unittest/dif/index2arg.d
> +++ b/test/unittest/dif/index2arg.d
> @@ -1,4 +1,3 @@
> -/* @@xfail: dtv2 */
>  BEGIN
>  {
>  	exit(index("BEGINNING", "G") == 2 ? 0 : 1);
> diff --git a/test/unittest/dif/index3arg.d b/test/unittest/dif/index3arg.d
> index d8c3db4f..886ba93e 100644
> --- a/test/unittest/dif/index3arg.d
> +++ b/test/unittest/dif/index3arg.d
> @@ -1,4 +1,3 @@
> -/* @@xfail: dtv2 */
>  BEGIN
>  {
>  	exit(index("BEGINNING", "G", 3) == 8 ? 0 : 1);
> diff --git a/test/unittest/funcs/tst.index2.d b/test/unittest/funcs/tst.index2.d
> new file mode 100644
> index 00000000..cf3cb92a
> --- /dev/null
> +++ b/test/unittest/funcs/tst.index2.d
> @@ -0,0 +1,48 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +#pragma D option quiet
> +
> +/* cut string size back a little to ease pressure on BPF verifier */
> +#pragma D option strsize=184
> +
> +BEGIN {
> +	x = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#";
> +	y = "abcdefghijklmnopqrstuvwxyz";
> +	printf("  0 %3d\n", index(x, y));
> +	printf("  0 %3d\n", index(x, y,  -1));
> +	printf("  0 %3d\n", index(x, y,   0));
> +	printf(" 64 %3d\n", index(x, y,   1));
> +	printf(" -1 %3d\n", index(x, y, 100));
> +	printf(" -1 %3d\n", index(x, y, 200));
> +
> +	y = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#a";
> +	printf("  0 %3d\n", index(x, y, -1));
> +	printf("  0 %3d\n", index(x, y));
> +	printf(" -1 %3d\n", index(x, y, 1));
> +
> +	x = "";
> +	y = "klmnopqrstuvw";
> +	printf(" -1 %3d\n", index(x, y));
> +	printf("  0 %3d\n", index(y, x));
> +
> +	x = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
> +	y = "klmnopqrstuvw";
> +	printf(" 10 %3d\n", index(x, y, -1));
> +	printf(" 10 %3d\n", index(x, y));
> +	printf(" 10 %3d\n", index(x, y, 10));
> +	printf(" 36 %3d\n", index(x, y, 20));
> +	printf(" 36 %3d\n", index(x, y, 30));
> +	printf(" 62 %3d\n", index(x, y, 40));
> +	printf(" 62 %3d\n", index(x, y, 50));
> +	printf(" 62 %3d\n", index(x, y, 60));
> +	printf(" 88 %3d\n", index(x, y, 70));
> +	printf(" 88 %3d\n", index(x, y, 80));
> +	printf("114 %3d\n", index(x, y, 90));
> +
> +	exit(0);
> +}
> diff --git a/test/unittest/funcs/tst.index2.r b/test/unittest/funcs/tst.index2.r
> new file mode 100644
> index 00000000..0372e9f5
> --- /dev/null
> +++ b/test/unittest/funcs/tst.index2.r
> @@ -0,0 +1,23 @@
> +  0   0
> +  0   0
> +  0   0
> + 64  64
> + -1  -1
> + -1  -1
> +  0   0
> +  0   0
> + -1  -1
> + -1  -1
> +  0   0
> + 10  10
> + 10  10
> + 10  10
> + 36  36
> + 36  36
> + 62  62
> + 62  62
> + 62  62
> + 88  88
> + 88  88
> +114 114
> +
> -- 
> 2.18.4
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list