[DTrace-devel] [PATCH 1/2] Add support for the basename() action

Kris Van Hees kris.van.hees at oracle.com
Fri Nov 19 00:04:31 UTC 2021


On Tue, Nov 16, 2021 at 05:47:59PM -0500, eugene.loh--- via DTrace-devel wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>  bpf/Build                           |   1 +
>  bpf/basename.S                      | 139 ++++++++++++++++++++++++++++
>  libdtrace/dt_cg.c                   |  55 ++++++++++-
>  libdtrace/dt_dlibs.c                |   1 +
>  test/unittest/dif/basename.d        |   1 -
>  test/unittest/funcs/tst.basename0.d |  38 ++++++++
>  test/unittest/funcs/tst.basename0.r |  21 +++++
>  7 files changed, 254 insertions(+), 2 deletions(-)
>  create mode 100644 bpf/basename.S
>  create mode 100644 test/unittest/funcs/tst.basename0.d
>  create mode 100644 test/unittest/funcs/tst.basename0.r
> 
> diff --git a/bpf/Build b/bpf/Build
> index d8279411..e66e8dc9 100644
> --- a/bpf/Build
> +++ b/bpf/Build
> @@ -23,6 +23,7 @@ bpf_dlib_DIR := $(current-dir)
>  bpf_dlib_SRCDEPS = $(objdir)/include/.dir.stamp
>  bpf_dlib_SOURCES = \
>  	agg_lqbin.c agg_qbin.c \
> +	basename.S \
>  	get_bvar.c \
>  	get_tvar.c set_tvar.c \
>  	index.S \
> diff --git a/bpf/basename.S b/bpf/basename.S
> new file mode 100644
> index 00000000..983457ad
> --- /dev/null
> +++ b/bpf/basename.S
> @@ -0,0 +1,139 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + */
> +
> +#define DT_STRLEN_BYTES		2
> +
> +#define BPF_FUNC_probe_read	4
> +#define BPF_FUNC_probe_read_str	45
> +
> +/*
> + * void dt_basename(char *src, char *dst);
> + */
> +	.text
> +	.align	4
> +	.global	dt_basename
> +	.type	dt_basename, @function
> +dt_basename :
> +#define SRC %r6
> +#define DST %r7
> +#define BGN %r8
> +#define END %r9
> +
> +	/* store copies of input arguments */
> +	mov	SRC, %r1
> +	mov	DST, %r2
> +
> +	/* ignore the string-length prefix */
> +	add	SRC, DT_STRLEN_BYTES
> +
> +	/* r0 = bpf_probe_read_str(dst, STRSZ + 1, src) */
> +	mov	%r1, DST
> +	lddw	%r2, STRSZ
> +	add	%r2, 1
> +	mov	%r3, SRC
> +	call	BPF_FUNC_probe_read_str
> +
> +	/* if (r0 s<= 1) goto Ldot */
> +	jsle	%r0, 1, .Ldot
> +
> +	/* end = r0 - 1 */
> +	mov	END, %r0
> +	sub	END, 1
> +
> +	/*
> +	 * Loop over end, backing it up to find a non-'/' char.
> +	 */
> +.Lend:
> +	/* end-- */
> +	sub	END, 1
> +	/* if (end s< 0) goto Lslash */
> +	jslt	END, 0, .Lslash
> +	/* if (src[end] == '/') goto Lend */
> +	mov	%r1, SRC
> +	add	%r1, END
> +	ldxb	%r1, [%r1+0]
> +	and	%r1, 0xff
> +	jeq	%r1, '/', .Lend
> +
> +	/*
> +	 * Loop over bgn, backing it up to find a '/' char.
> +	 */
> +	/* bgn = end */
> +	mov	BGN, END
> +.Lbgn:
> +	/* bgn-- */
> +	sub	BGN, 1
> +	/* if (bgn s< 0) goto Lcopy */
> +	jslt	BGN, 0, .Lcopy
> +	/* if (src[bgn] != '/') goto Lbgn */
> +	mov	%r1, SRC
> +	add	%r1, BGN
> +	ldxb	%r1, [%r1+0]
> +	and	%r1, 0xff
> +	jne	%r1, '/', .Lbgn
> +
> +.Lcopy:
> +	/*
> +	 * The output string is a copy of the designated substring.
> +	 */
> +	/* end -= bgn (and help the BPF verifier) */
> +	sub	END, BGN

Err, this kind of breaks your whole scheme of using defines for register
assignments because you just changed the meaning of END here.  It was the
index of the end and now it becomes a byte count.

> +	jsge	END, 0, 1
> +	mov	END, 0
> +	/* bgn++ */
> +	add	BGN, 1
> +	/* bpf_probe_read(dst + DT_STRLEN_BYTES, end, &src[bgn]) */
> +	mov	%r1, DST
> +	add	%r1, DT_STRLEN_BYTES
> +	mov	%r2, END
> +	mov	%r3, SRC
> +	add	%r3, BGN
> +	call	BPF_FUNC_probe_read

Why not use probe_read_str() and let that one place the terminating \0 for you
so you don't have to do it here.

> +	/* dst[DT_STRLEN_BYTES + end] = '\0\ */
> +	mov	%r1, DST
> +	add	%r1, END
> +	stb	[%r1+DT_STRLEN_BYTES], 0
> +
> +	/* dt_strlen_store(end, dst) */
> +	mov	%r1, END
> +	mov	%r2, DST
> +	call	dt_strlen_store
> +
> +	/* return */
> +	exit
> +
> +.Ldot:
> +	/*
> +	 * The output string is simply ".".
> +	 */
> +	/* dt_strlen_store(1, dst) */
> +	mov	%r1, 1
> +	mov	%r2, DST
> +	call	dt_strlen_store
> +	/* dst[DT_STRLEN_BYTES+0] = '.' */
> +	stb	[DST+DT_STRLEN_BYTES], '.'
> +	/* dst[DT_STRLEN_BYTES+1] = '\0' */
> +	stb	[DST+(DT_STRLEN_BYTES+1)], '\0'
> +	exit
> +
> +.Lslash:
> +	/*
> +	 * The output string is simply "/".
> +	 */
> +	/* dt_strlen_store(1, dst) */
> +	mov	%r1, 1
> +	mov	%r2, DST
> +	call	dt_strlen_store
> +	/* dst[DT_STRLEN_BYTES+0] = '/' */
> +	stb	[DST+DT_STRLEN_BYTES], '/'
> +	/* dst[DT_STRLEN_BYTES+1] = '\0' */
> +	stb	[DST+(DT_STRLEN_BYTES+1)], '\0'
> +	exit

I would just make these two labels store the '.' or '/' character respectively,
and assign 1 to whatever register for the string length, and then jump to a
label right before dt_strlen_store() in the main code above.

> +#undef SRC
> +#undef DST
> +#undef BGN
> +#undef END
> +	.size	dt_basename, .-dt_basename
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 537e6706..ac7ea603 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -3294,6 +3294,59 @@ dt_cg_array_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>  	emit(dlp, BPF_ALU64_REG((dnp->dn_flags & DT_NF_SIGNED) ? BPF_ARSH : BPF_RSH, dnp->dn_reg, n));
>  }
>  
> +/*
> + * This function implements dt_cg_subr_foo() that takes a string
> + * input and produces a tstring output.

This comment doesn't really make sense.  It does not implement st_cg_subr_foo()
at all.  I know what you are getting at but I think it makes a lot more sense
to just spell it out.  This function is a helper function for subroutines that
take a string argument and return the result of applying a given BPF function
(passed by name) on it.  Whatever way you want to word that to reflect this.

> + */
> +static void
> +dt_cg_subr_str_of_str(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
> +		      const char *fname)

Perhaps name it something more specific like dt_cg_subr_path_helper() or
something like that, since it is only used for pathname-related subroutines
anyway?

> +{
> +	dt_ident_t	*idp;
> +	dt_node_t	*str = dnp->dn_args;
> +
> +	TRACE_REGSET("    subr-str_of_str:Begin");
> +	dt_cg_node(str, dlp, drp);
> +	dt_cg_check_notnull(dlp, drp, str->dn_reg);
> +
> +	/*
> +	 * The result needs be be a temporary string, so we request one.
> +	 */
> +	dnp->dn_reg = dt_regset_alloc(drp);
> +	if (dnp->dn_reg == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +	dt_cg_tstring_alloc(yypcb, dnp);
> +
> +	/* function call */
> +	if (dt_regset_xalloc_args(drp) == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +
> +	emit(dlp, BPF_MOV_REG(BPF_REG_1, str->dn_reg));
> +	dt_regset_free(drp, str->dn_reg);
> +	if (str->dn_tstring)
> +		dt_cg_tstring_free(yypcb, str);
> +
> +	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
> +	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM));
> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));

The above 3 instructions should move to right after the dt_cg_tstring_alloc()
above, to keep them together.  That reflects the way it is done with most other
cases, and will make it easier when (if) we actually merge all that together
to avoid the frequent duplication.

> +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, dnp->dn_reg));
> +
> +	dt_regset_xalloc(drp, BPF_REG_0);
> +	idp = dt_dlib_get_func(yypcb->pcb_hdl, fname);
> +	assert(idp != NULL);
> +	emite(dlp,  BPF_CALL_FUNC(idp->di_id), idp);
> +	dt_regset_free_args(drp);
> +	dt_regset_free(drp, BPF_REG_0);
> +
> +	TRACE_REGSET("    subr-str_of_str:End  ");
> +}
> +
> +static void
> +dt_cg_subr_basename(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> +{
> +	dt_cg_subr_str_of_str(dnp, dlp, drp, "dt_basename");
> +}
> +
>  /*
>   * Get and return a new speculation ID.  These are unallocated entries in the
>   * specs map, obtained by calling dt_speculation().  Return zero if none is
> @@ -3986,7 +4039,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
>  	[DIF_SUBR_DDI_PATHNAME]		= NULL,
>  	[DIF_SUBR_STRJOIN]		= dt_cg_subr_strjoin,
>  	[DIF_SUBR_LLTOSTR]		= &dt_cg_subr_lltostr,
> -	[DIF_SUBR_BASENAME]		= NULL,
> +	[DIF_SUBR_BASENAME]		= &dt_cg_subr_basename,
>  	[DIF_SUBR_DIRNAME]		= NULL,
>  	[DIF_SUBR_CLEANPATH]		= NULL,
>  	[DIF_SUBR_STRCHR]		= &dt_cg_subr_strchr,
> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> index 50acc0dc..771eba64 100644
> --- a/libdtrace/dt_dlibs.c
> +++ b/libdtrace/dt_dlibs.c
> @@ -54,6 +54,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
>  	/* BPF library (external) functions */
>  	DT_BPF_SYMBOL(dt_agg_lqbin, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_agg_qbin, DT_IDENT_SYMBOL),
> +	DT_BPF_SYMBOL(dt_basename, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_error, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_get_bvar, DT_IDENT_SYMBOL),
>  	DT_BPF_SYMBOL(dt_get_string, DT_IDENT_SYMBOL),
> diff --git a/test/unittest/dif/basename.d b/test/unittest/dif/basename.d
> index a4c50d7e..5e60b96d 100644
> --- a/test/unittest/dif/basename.d
> +++ b/test/unittest/dif/basename.d
> @@ -1,4 +1,3 @@
> -/* @@xfail: dtv2 */
>  BEGIN
>  {
>  	exit(basename("/foo/bar") == "bar" ? 0 : 1);
> diff --git a/test/unittest/funcs/tst.basename0.d b/test/unittest/funcs/tst.basename0.d
> new file mode 100644
> index 00000000..0f758996
> --- /dev/null
> +++ b/test/unittest/funcs/tst.basename0.d
> @@ -0,0 +1,38 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +#pragma D option quiet
> +
> +BEGIN
> +{
> +	printf("|%s|\n", basename("/foo/bar/baz"));
> +	printf("|%s|\n", basename("/foo/bar///baz/"));
> +	printf("|%s|\n", basename("/foo/bar/baz/"));
> +	printf("|%s|\n", basename("/foo/bar/baz//"));
> +	printf("|%s|\n", basename("/foo/bar/baz/."));
> +	printf("|%s|\n", basename("/foo/bar/baz/./"));
> +	printf("|%s|\n", basename("/foo/bar/baz/.//"));
> +	printf("|%s|\n", basename("foo/bar/baz/"));
> +	printf("|%s|\n", basename("/"));
> +	printf("|%s|\n", basename("./"));
> +	printf("|%s|\n", basename("//"));
> +	printf("|%s|\n", basename("/."));
> +	printf("|%s|\n", basename("/./"));
> +	printf("|%s|\n", basename("/./."));
> +	printf("|%s|\n", basename("/.//"));
> +	printf("|%s|\n", basename("."));
> +	printf("|%s|\n", basename("f"));
> +	printf("|%s|\n", basename("f/"));
> +	printf("|%s|\n", basename("/////"));
> +	printf("|%s|\n", basename(""));
> +	exit(0);
> +}
> +
> +ERROR
> +{
> +	exit(1);
> +}
> diff --git a/test/unittest/funcs/tst.basename0.r b/test/unittest/funcs/tst.basename0.r
> new file mode 100644
> index 00000000..37a96547
> --- /dev/null
> +++ b/test/unittest/funcs/tst.basename0.r
> @@ -0,0 +1,21 @@
> +|baz|
> +|baz|
> +|baz|
> +|baz|
> +|.|
> +|.|
> +|.|
> +|baz|
> +|/|
> +|.|
> +|/|
> +|.|
> +|.|
> +|.|
> +|.|
> +|.|
> +|f|
> +|f|
> +|/|
> +|.|
> +
> -- 
> 2.18.4
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list