[DTrace-devel] [PATCH 1/2] Add support for the basename() action

Eugene Loh eugene.loh at oracle.com
Fri Nov 19 01:45:04 UTC 2021


On 11/18/21 7:04 PM, Kris Van Hees wrote:

> On Tue, Nov 16, 2021 at 05:47:59PM -0500, eugene.loh--- via DTrace-devel wrote:
>> From: Eugene Loh <eugene.loh at oracle.com>
>>
>> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
>> ---
>>   bpf/Build                           |   1 +
>>   bpf/basename.S                      | 139 ++++++++++++++++++++++++++++
>>   libdtrace/dt_cg.c                   |  55 ++++++++++-
>>   libdtrace/dt_dlibs.c                |   1 +
>>   test/unittest/dif/basename.d        |   1 -
>>   test/unittest/funcs/tst.basename0.d |  38 ++++++++
>>   test/unittest/funcs/tst.basename0.r |  21 +++++
>>   7 files changed, 254 insertions(+), 2 deletions(-)
>>   create mode 100644 bpf/basename.S
>>   create mode 100644 test/unittest/funcs/tst.basename0.d
>>   create mode 100644 test/unittest/funcs/tst.basename0.r
>>
>> diff --git a/bpf/Build b/bpf/Build
>> index d8279411..e66e8dc9 100644
>> --- a/bpf/Build
>> +++ b/bpf/Build
>> @@ -23,6 +23,7 @@ bpf_dlib_DIR := $(current-dir)
>>   bpf_dlib_SRCDEPS = $(objdir)/include/.dir.stamp
>>   bpf_dlib_SOURCES = \
>>   	agg_lqbin.c agg_qbin.c \
>> +	basename.S \
>>   	get_bvar.c \
>>   	get_tvar.c set_tvar.c \
>>   	index.S \
>> diff --git a/bpf/basename.S b/bpf/basename.S
>> new file mode 100644
>> index 00000000..983457ad
>> --- /dev/null
>> +++ b/bpf/basename.S
>> @@ -0,0 +1,139 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
>> + */
>> +
>> +#define DT_STRLEN_BYTES		2
>> +
>> +#define BPF_FUNC_probe_read	4
>> +#define BPF_FUNC_probe_read_str	45
>> +
>> +/*
>> + * void dt_basename(char *src, char *dst);
>> + */
>> +	.text
>> +	.align	4
>> +	.global	dt_basename
>> +	.type	dt_basename, @function
>> +dt_basename :
>> +#define SRC %r6
>> +#define DST %r7
>> +#define BGN %r8
>> +#define END %r9
>> +
>> +	/* store copies of input arguments */
>> +	mov	SRC, %r1
>> +	mov	DST, %r2
>> +
>> +	/* ignore the string-length prefix */
>> +	add	SRC, DT_STRLEN_BYTES
>> +
>> +	/* r0 = bpf_probe_read_str(dst, STRSZ + 1, src) */
>> +	mov	%r1, DST
>> +	lddw	%r2, STRSZ
>> +	add	%r2, 1
>> +	mov	%r3, SRC
>> +	call	BPF_FUNC_probe_read_str
>> +
>> +	/* if (r0 s<= 1) goto Ldot */
>> +	jsle	%r0, 1, .Ldot
>> +
>> +	/* end = r0 - 1 */
>> +	mov	END, %r0
>> +	sub	END, 1
>> +
>> +	/*
>> +	 * Loop over end, backing it up to find a non-'/' char.
>> +	 */
>> +.Lend:
>> +	/* end-- */
>> +	sub	END, 1
>> +	/* if (end s< 0) goto Lslash */
>> +	jslt	END, 0, .Lslash
>> +	/* if (src[end] == '/') goto Lend */
>> +	mov	%r1, SRC
>> +	add	%r1, END
>> +	ldxb	%r1, [%r1+0]
>> +	and	%r1, 0xff
>> +	jeq	%r1, '/', .Lend
>> +
>> +	/*
>> +	 * Loop over bgn, backing it up to find a '/' char.
>> +	 */
>> +	/* bgn = end */
>> +	mov	BGN, END
>> +.Lbgn:
>> +	/* bgn-- */
>> +	sub	BGN, 1
>> +	/* if (bgn s< 0) goto Lcopy */
>> +	jslt	BGN, 0, .Lcopy
>> +	/* if (src[bgn] != '/') goto Lbgn */
>> +	mov	%r1, SRC
>> +	add	%r1, BGN
>> +	ldxb	%r1, [%r1+0]
>> +	and	%r1, 0xff
>> +	jne	%r1, '/', .Lbgn
>> +
>> +.Lcopy:
>> +	/*
>> +	 * The output string is a copy of the designated substring.
>> +	 */
>> +	/* end -= bgn (and help the BPF verifier) */
>> +	sub	END, BGN
> Err, this kind of breaks your whole scheme of using defines for register
> assignments because you just changed the meaning of END here.  It was the
> index of the end and now it becomes a byte count.

I see what you're saying, but I don't think this any more severe than 
saying "len means STRSZ+1; oh but now it means strlen."  The pseudocode 
is short enough (a few lines) that I think it's not hard to follow.  
Would it seem better to you to call it "len"?  So, "len" originally 
means from the beginning of the old string but then it means from the 
beginning of the new string?

>> +	jsge	END, 0, 1
>> +	mov	END, 0
>> +	/* bgn++ */
>> +	add	BGN, 1
>> +	/* bpf_probe_read(dst + DT_STRLEN_BYTES, end, &src[bgn]) */
>> +	mov	%r1, DST
>> +	add	%r1, DT_STRLEN_BYTES
>> +	mov	%r2, END
>> +	mov	%r3, SRC
>> +	add	%r3, BGN
>> +	call	BPF_FUNC_probe_read
> Why not use probe_read_str() and let that one place the terminating \0 for you
> so you don't have to do it here.

Yup.  Good point.  I think that didn't dawn on me until I saw you doing 
that in your patches.

>> +	/* dst[DT_STRLEN_BYTES + end] = '\0\ */
>> +	mov	%r1, DST
>> +	add	%r1, END
>> +	stb	[%r1+DT_STRLEN_BYTES], 0
>> +
>> +	/* dt_strlen_store(end, dst) */
>> +	mov	%r1, END
>> +	mov	%r2, DST
>> +	call	dt_strlen_store
>> +
>> +	/* return */
>> +	exit
>> +
>> +.Ldot:
>> +	/*
>> +	 * The output string is simply ".".
>> +	 */
>> +	/* dt_strlen_store(1, dst) */
>> +	mov	%r1, 1
>> +	mov	%r2, DST
>> +	call	dt_strlen_store
>> +	/* dst[DT_STRLEN_BYTES+0] = '.' */
>> +	stb	[DST+DT_STRLEN_BYTES], '.'
>> +	/* dst[DT_STRLEN_BYTES+1] = '\0' */
>> +	stb	[DST+(DT_STRLEN_BYTES+1)], '\0'
>> +	exit
>> +
>> +.Lslash:
>> +	/*
>> +	 * The output string is simply "/".
>> +	 */
>> +	/* dt_strlen_store(1, dst) */
>> +	mov	%r1, 1
>> +	mov	%r2, DST
>> +	call	dt_strlen_store
>> +	/* dst[DT_STRLEN_BYTES+0] = '/' */
>> +	stb	[DST+DT_STRLEN_BYTES], '/'
>> +	/* dst[DT_STRLEN_BYTES+1] = '\0' */
>> +	stb	[DST+(DT_STRLEN_BYTES+1)], '\0'
>> +	exit
> I would just make these two labels store the '.' or '/' character respectively,
> and assign 1 to whatever register for the string length, and then jump to a
> label right before dt_strlen_store() in the main code above.

Okay.

>> +#undef SRC
>> +#undef DST
>> +#undef BGN
>> +#undef END
>> +	.size	dt_basename, .-dt_basename
>> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
>> index 537e6706..ac7ea603 100644
>> --- a/libdtrace/dt_cg.c
>> +++ b/libdtrace/dt_cg.c
>> @@ -3294,6 +3294,59 @@ dt_cg_array_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>>   	emit(dlp, BPF_ALU64_REG((dnp->dn_flags & DT_NF_SIGNED) ? BPF_ARSH : BPF_RSH, dnp->dn_reg, n));
>>   }
>>   
>> +/*
>> + * This function implements dt_cg_subr_foo() that takes a string
>> + * input and produces a tstring output.
> This comment doesn't really make sense.  It does not implement st_cg_subr_foo()
> at all.  I know what you are getting at but I think it makes a lot more sense
> to just spell it out.  This function is a helper function for subroutines that
> take a string argument and return the result of applying a given BPF function
> (passed by name) on it.  Whatever way you want to word that to reflect this.

Gotcha.

>> + */
>> +static void
>> +dt_cg_subr_str_of_str(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
>> +		      const char *fname)
> Perhaps name it something more specific like dt_cg_subr_path_helper() or
> something like that, since it is only used for pathname-related subroutines
> anyway?

Well, there may be other cases we might want to support.  E.g. 
strjoin(), strstr(), and strtok() are all string(string, string). Okay, 
bad example since they set things up so differently.  Maybe strchr() and 
strrchr(), which are both string(string, char).  I don't know if we will 
want any more of these helper functions, but I was trying to name in an 
extensible way based on type signature. E.g., what if someday, D added a 
reversestring() subroutine? Unlikely, but that's the idea.

>> +{
>> +	dt_ident_t	*idp;
>> +	dt_node_t	*str = dnp->dn_args;
>> +
>> +	TRACE_REGSET("    subr-str_of_str:Begin");
>> +	dt_cg_node(str, dlp, drp);
>> +	dt_cg_check_notnull(dlp, drp, str->dn_reg);
>> +
>> +	/*
>> +	 * The result needs be be a temporary string, so we request one.
>> +	 */
>> +	dnp->dn_reg = dt_regset_alloc(drp);
>> +	if (dnp->dn_reg == -1)
>> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
>> +	dt_cg_tstring_alloc(yypcb, dnp);
>> +
>> +	/* function call */
>> +	if (dt_regset_xalloc_args(drp) == -1)
>> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
>> +
>> +	emit(dlp, BPF_MOV_REG(BPF_REG_1, str->dn_reg));
>> +	dt_regset_free(drp, str->dn_reg);
>> +	if (str->dn_tstring)
>> +		dt_cg_tstring_free(yypcb, str);
>> +
>> +	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
>> +	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM));
>> +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
> The above 3 instructions should move to right after the dt_cg_tstring_alloc()
> above, to keep them together.  That reflects the way it is done with most other
> cases, and will make it easier when (if) we actually merge all that together
> to avoid the frequent duplication.

Yipes.  And even worse, (assuming I'm thinking about this clearly) if 
dnp->dn_reg were, say, %r5, we would spill a bogus value at 
xalloc_args(), compute dnp->dn_reg, fill with the bogus value at 
free_args(), and get garbage.  Good catch.  Thanks.  Will fix.

>> +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, dnp->dn_reg));
>> +
>> +	dt_regset_xalloc(drp, BPF_REG_0);
>> +	idp = dt_dlib_get_func(yypcb->pcb_hdl, fname);
>> +	assert(idp != NULL);
>> +	emite(dlp,  BPF_CALL_FUNC(idp->di_id), idp);
>> +	dt_regset_free_args(drp);
>> +	dt_regset_free(drp, BPF_REG_0);
>> +
>> +	TRACE_REGSET("    subr-str_of_str:End  ");
>> +}
>> +
>> +static void
>> +dt_cg_subr_basename(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>> +{
>> +	dt_cg_subr_str_of_str(dnp, dlp, drp, "dt_basename");
>> +}
>> +
>>   /*
>>    * Get and return a new speculation ID.  These are unallocated entries in the
>>    * specs map, obtained by calling dt_speculation().  Return zero if none is
>> @@ -3986,7 +4039,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
>>   	[DIF_SUBR_DDI_PATHNAME]		= NULL,
>>   	[DIF_SUBR_STRJOIN]		= dt_cg_subr_strjoin,
>>   	[DIF_SUBR_LLTOSTR]		= &dt_cg_subr_lltostr,
>> -	[DIF_SUBR_BASENAME]		= NULL,
>> +	[DIF_SUBR_BASENAME]		= &dt_cg_subr_basename,
>>   	[DIF_SUBR_DIRNAME]		= NULL,
>>   	[DIF_SUBR_CLEANPATH]		= NULL,
>>   	[DIF_SUBR_STRCHR]		= &dt_cg_subr_strchr,
>> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
>> index 50acc0dc..771eba64 100644
>> --- a/libdtrace/dt_dlibs.c
>> +++ b/libdtrace/dt_dlibs.c
>> @@ -54,6 +54,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
>>   	/* BPF library (external) functions */
>>   	DT_BPF_SYMBOL(dt_agg_lqbin, DT_IDENT_SYMBOL),
>>   	DT_BPF_SYMBOL(dt_agg_qbin, DT_IDENT_SYMBOL),
>> +	DT_BPF_SYMBOL(dt_basename, DT_IDENT_SYMBOL),
>>   	DT_BPF_SYMBOL(dt_error, DT_IDENT_SYMBOL),
>>   	DT_BPF_SYMBOL(dt_get_bvar, DT_IDENT_SYMBOL),
>>   	DT_BPF_SYMBOL(dt_get_string, DT_IDENT_SYMBOL),
>> diff --git a/test/unittest/dif/basename.d b/test/unittest/dif/basename.d
>> index a4c50d7e..5e60b96d 100644
>> --- a/test/unittest/dif/basename.d
>> +++ b/test/unittest/dif/basename.d
>> @@ -1,4 +1,3 @@
>> -/* @@xfail: dtv2 */
>>   BEGIN
>>   {
>>   	exit(basename("/foo/bar") == "bar" ? 0 : 1);
>> diff --git a/test/unittest/funcs/tst.basename0.d b/test/unittest/funcs/tst.basename0.d
>> new file mode 100644
>> index 00000000..0f758996
>> --- /dev/null
>> +++ b/test/unittest/funcs/tst.basename0.d
>> @@ -0,0 +1,38 @@
>> +/*
>> + * Oracle Linux DTrace.
>> + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
>> + * Licensed under the Universal Permissive License v 1.0 as shown at
>> + * http://oss.oracle.com/licenses/upl.
>> + */
>> +
>> +#pragma D option quiet
>> +
>> +BEGIN
>> +{
>> +	printf("|%s|\n", basename("/foo/bar/baz"));
>> +	printf("|%s|\n", basename("/foo/bar///baz/"));
>> +	printf("|%s|\n", basename("/foo/bar/baz/"));
>> +	printf("|%s|\n", basename("/foo/bar/baz//"));
>> +	printf("|%s|\n", basename("/foo/bar/baz/."));
>> +	printf("|%s|\n", basename("/foo/bar/baz/./"));
>> +	printf("|%s|\n", basename("/foo/bar/baz/.//"));
>> +	printf("|%s|\n", basename("foo/bar/baz/"));
>> +	printf("|%s|\n", basename("/"));
>> +	printf("|%s|\n", basename("./"));
>> +	printf("|%s|\n", basename("//"));
>> +	printf("|%s|\n", basename("/."));
>> +	printf("|%s|\n", basename("/./"));
>> +	printf("|%s|\n", basename("/./."));
>> +	printf("|%s|\n", basename("/.//"));
>> +	printf("|%s|\n", basename("."));
>> +	printf("|%s|\n", basename("f"));
>> +	printf("|%s|\n", basename("f/"));
>> +	printf("|%s|\n", basename("/////"));
>> +	printf("|%s|\n", basename(""));
>> +	exit(0);
>> +}
>> +
>> +ERROR
>> +{
>> +	exit(1);
>> +}
>> diff --git a/test/unittest/funcs/tst.basename0.r b/test/unittest/funcs/tst.basename0.r
>> new file mode 100644
>> index 00000000..37a96547
>> --- /dev/null
>> +++ b/test/unittest/funcs/tst.basename0.r
>> @@ -0,0 +1,21 @@
>> +|baz|
>> +|baz|
>> +|baz|
>> +|baz|
>> +|.|
>> +|.|
>> +|.|
>> +|baz|
>> +|/|
>> +|.|
>> +|/|
>> +|.|
>> +|.|
>> +|.|
>> +|.|
>> +|.|
>> +|f|
>> +|f|
>> +|/|
>> +|.|
>> +
>> -- 
>> 2.18.4
>>
>>
>> _______________________________________________
>> DTrace-devel mailing list
>> DTrace-devel at oss.oracle.com
>> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list