[DTrace-devel] [PATCH 1/2] Add support for strjoin() subroutine

Kris Van Hees kris.van.hees at oracle.com
Fri Sep 3 00:10:23 PDT 2021


On Thu, Sep 02, 2021 at 04:25:02PM -0400, Eugene Loh wrote:
> Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
> I'm ignoring the similarly named "3/3" patch.  Anyhow, some extra 
> comments as usual...
> 
> On 9/2/21 11:33 AM, Kris Van Hees wrote:
> > diff --git a/bpf/strjoin.S b/bpf/strjoin.S
> > new file mode 100644
> > index 00000000..2a19f2eb
> > --- /dev/null
> > +++ b/bpf/strjoin.S
> > @@ -0,0 +1,55 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> > + */
> > +
> > +#define DT_STRLEN_BYTES		2
> > +
> > +#define BPF_FUNC_probe_read_str	45
> > +
> > +/*
> > + * void dt_strjoin(char *dst, const char *s1, const char *s2)
> > + */
> > +	.text
> > +	.align	4
> > +	.global	dt_strjoin
> > +	.type	dt_strjoin, @function
> > +dt_strjoin:
> > +	mov	%r9, %r1
> > +	mov	%r7, %r2
> > +	mov	%r8, %r3
> > +
> > +	mov	%r1, %r7
> > +	call	dt_strlen
> > +	mov	%r6, %r0
> > +	add	%r7, DT_STRLEN_BYTES
> > +
> > +	mov	%r1, %r8
> > +	call	dt_strlen
> > +	add	%r6, %r0
> > +	add	%r8, DT_STRLEN_BYTES
> > +
> > +	mov	%r1, %r6
> > +	mov	%r2, %r9
> > +	call	dt_strlen_store
> > +	add	%r9, DT_STRLEN_BYTES
> > +
> > +	lddw	%r6, STRSZ
> > +	and	%r6, 0xffffffff
> 
> Okay, but why the &=-1?  We're doing a dw ld of some "small" value 
> (e.g., 256).  So the &=-1 is a nop... doesn't even help the BPF verifier.

Technically the STRSZ could be a very large  number, well beyond a 32-bit value
while bpf_probe_read_str only supports a 32-bit int as size...  So this serves
as a way to satisfy that.
> 
> > +
> > +	mov	%r1, %r9
> > +	mov	%r2, %r6
> > +	mov	%r3, %r7
> > +	call	BPF_FUNC_probe_read_str
> > +	jslt	%r0, 0, .L1
> > +	jslt	%r0, 1, .L2
> > +	add	%r9, %r0
> > +	sub	%r9, 1
> > +.L2:
> > +	mov	%r1, %r9
> > +	mov	%r2, %r6
> > +	mov	%r3, %r8
> > +	call	BPF_FUNC_probe_read_str
> > +.L1:
> > +	exit
> > +	.size	dt_strjoin, .-dt_strjoin
> > diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> > index f9710a5e..b48b9f7b 100644
> > --- a/libdtrace/dt_cg.c
> > +++ b/libdtrace/dt_cg.c
> > @@ -753,20 +753,31 @@ dt_cg_strlen(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src)
> >   	size_t		size = yypcb->pcb_hdl->dt_options[DTRACEOPT_STRSIZE];
> >   	uint_t		lbl_ok = dt_irlist_label(dlp);
> >   
> > +	TRACE_REGSET("        strlen:Begin");
> > +
> >   	assert(idp != NULL);
> >   	if (dt_regset_xalloc_args(drp) == -1)
> >   		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> >   
> > -	emit(dlp,  BPF_MOV_REG(BPF_REG_1, src));
> > -	dt_regset_xalloc(drp, BPF_REG_0);
> > +	if (src != BPF_REG_1)
> > +		emit(dlp,  BPF_MOV_REG(BPF_REG_1, src));
> > +	if (dst != BPF_REG_0)
> > +		dt_regset_xalloc(drp, BPF_REG_0);
> 
> The src!=%r1 is okay, I guess, but doesn't it make more sense to deal 
> with this sort of thing in some later optimization phase?
> 
> The dst!=%r0 is more important, but shouldn't we be worrying about this 
> sort of thing more broadly and not just right here?  Come to think of 
> it, why has dst already been allocated?  It's exerting unnecessary 
> register pressure to allocate a register before a function call when the 
> register is not needed until afterwards.

This is a side effect of how strlen was originally implemented and used in two
different forms: directly to support storing a string in the output buffer, and
also in the strlen subroutine.  I make these modifications here to be able to
use dt_cg_strlen() during the code generation for strjoin.  This will need to
be revisited and clened up more in a separate patch because it should indeed
act more like a function call..

> >   	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> >   	dt_regset_free_args(drp);
> >   	emit(dlp,  BPF_BRANCH_IMM(BPF_JLE, BPF_REG_0, size, lbl_ok));
> >   	emit(dlp,  BPF_MOV_IMM(BPF_REG_0, size));
> > -	emitl(dlp, lbl_ok,
> > -		   BPF_MOV_REG(dst, BPF_REG_0));
> > -	dt_regset_free(drp, BPF_REG_0);
> > +
> > +	if (dst != BPF_REG_0) {
> > +		emitl(dlp, lbl_ok,
> > +			   BPF_MOV_REG(dst, BPF_REG_0));
> > +		dt_regset_free(drp, BPF_REG_0);
> > +	} else
> > +		emitl(dlp, lbl_ok,
> > +			   BPF_NOP());
> 
> To me, it'd be cleaner to write
> 
>          emit(dlp,  BPF_BRANCH_IMM(BPF_JLE, BPF_REG_0, size, lbl_ok));
>          emit(dlp,  BPF_MOV_IMM(BPF_REG_0, size));
>          emitl(dlp, lbl_ok,
>                     BPF_NOP());
> 
>          if (dst != BPF_REG_0) {
>                  emit(dlp, BPF_MOV_REG(dst, BPF_REG_0));
>                  dt_regset_free(drp, BPF_REG_0);
>          }

Sure, though it's just a matter of preference.

> > +
> > +	TRACE_REGSET("        strlen:End  ");
> >   }
> >   
> >   static void
> > @@ -3089,6 +3100,55 @@ dt_cg_subr_strlen(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> >   	TRACE_REGSET("    subr-strlen:End  ");
> >   }
> >   
> > +static void
> > +dt_cg_subr_strjoin(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> > +{
> > +	dt_node_t	*s1 = dnp->dn_args;
> > +	dt_node_t	*s2 = s1->dn_list;
> > +	dt_ident_t	*idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_strjoin");
> > +
> > +	assert(idp != NULL);
> > +
> > +	TRACE_REGSET("    subr-strjoin:Begin");
> > +
> > +	dt_cg_node(s1, dlp, drp);
> > +	dt_cg_check_notnull(dlp, drp, s1->dn_reg);
> > +	dt_cg_node(s2, dlp, drp);
> > +	dt_cg_check_notnull(dlp, drp, s2->dn_reg);
> > +
> > +	/*
> > +	 * We can re-use the register from s1 for our result.  The result needs
> > +	 * be be a temporary string, so we need to allocate one.
> > +	 */
> 
> I'm confused by the "reuse s1 reg" comment.  It looks to me like we do not.

Stale comment.

> > +	dnp->dn_reg = dt_regset_alloc(drp);
> > +	if (dnp->dn_reg == -1)
> > +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > +	dt_node_tstring(dnp, dt_cg_tstring_alloc());
> > +
> > +	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
> > +	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM));
> > +	emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
> > +
> > +	if (dt_regset_xalloc_args(drp) == -1)
> > +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > +
> > +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dnp->dn_reg));
> > +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, s1->dn_reg));
> > +	dt_regset_free(drp, s1->dn_reg);
> > +	if (s1->dn_tstring)
> > +		dt_cg_tstring_free(s1->dn_tstring->dn_value);
> > +	emit(dlp,  BPF_MOV_REG(BPF_REG_3, s2->dn_reg));
> > +	dt_regset_free(drp, s2->dn_reg);
> > +	if (s2->dn_tstring)
> > +		dt_cg_tstring_free(s2->dn_tstring->dn_value);
> > +	dt_regset_xalloc(drp, BPF_REG_0);
> > +	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> > +	dt_regset_free_args(drp);
> > +	dt_regset_free(drp, BPF_REG_0);
> > +
> > +	TRACE_REGSET("    subr-strjoin:End  ");
> > +}
> > +
> >   typedef void dt_cg_subr_f(dt_node_t *, dt_irlist_t *, dt_regset_t *);
> >   
> >   static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
> > @@ -3115,7 +3175,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
> >   	[DIF_SUBR_GETMAJOR]		= NULL,
> >   	[DIF_SUBR_GETMINOR]		= NULL,
> >   	[DIF_SUBR_DDI_PATHNAME]		= NULL,
> > -	[DIF_SUBR_STRJOIN]		= NULL,
> > +	[DIF_SUBR_STRJOIN]		= dt_cg_subr_strjoin,
> >   	[DIF_SUBR_LLTOSTR]		= NULL,
> >   	[DIF_SUBR_BASENAME]		= NULL,
> >   	[DIF_SUBR_DIRNAME]		= NULL,
> > diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> > index 04becfa6..771b7fe0 100644
> > --- a/libdtrace/dt_dlibs.c
> > +++ b/libdtrace/dt_dlibs.c
> > @@ -59,6 +59,7 @@ static const dt_ident_t		dt_bpf_symbols[] = {
> >   	DT_BPF_SYMBOL(dt_get_string, DT_IDENT_SYMBOL),
> >   	DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
> >   	DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
> > +	DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
> >   	DT_BPF_SYMBOL(dt_strnlen, DT_IDENT_SYMBOL),
> >   	/* BPF maps */
> >   	DT_BPF_SYMBOL(aggs, DT_IDENT_PTR),
> > diff --git a/test/unittest/dif/strjoin.d b/test/unittest/dif/strjoin.d
> > index e7834eb9..ee70dec8 100644
> > --- a/test/unittest/dif/strjoin.d
> > +++ b/test/unittest/dif/strjoin.d
> > @@ -1,6 +1,10 @@
> > -/* @@xfail: dtv2 */
> >   BEGIN
> >   {
> >   	trace(strjoin(probeprov, probename));
> >   	exit(0);
> >   }
> > +
> > +ERROR
> > +{
> > +	exit(1);
> > +}
> > diff --git a/test/unittest/funcs/strjoin/tst.strjoin-nested.d b/test/unittest/funcs/strjoin/tst.strjoin-nested.d
> > new file mode 100644
> > index 00000000..84ba459c
> > --- /dev/null
> > +++ b/test/unittest/funcs/strjoin/tst.strjoin-nested.d
> > @@ -0,0 +1,34 @@
> > +/*
> > + * Oracle Linux DTrace.
> > + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> > + * Licensed under the Universal Permissive License v 1.0 as shown at
> > + * http://oss.oracle.com/licenses/upl.
> > + */
> > +
> > +/*
> > + * ASSERTION: Nested strjoin() works
> > + *
> > + * SECTION: Actions and Subroutines/strjoin()
> > + */
> > +
> > +#pragma D option quiet
> > +
> > +BEGIN
> > +{
> > +	trace(strjoin(
> > +		strjoin(
> > +			strjoin(probeprov, ":"),
> > +			strjoin(probemod, ":")
> > +		),
> > +		strjoin(
> > +			strjoin(probefunc, ":"),
> > +			probename
> > +		)
> > +	      ));
> > +	exit(0);
> > +}
> > +
> > +ERROR
> > +{
> > +	exit(1);
> > +}
> > diff --git a/test/unittest/funcs/strjoin/tst.strjoin-nested.r b/test/unittest/funcs/strjoin/tst.strjoin-nested.r
> > new file mode 100644
> > index 00000000..748852fb
> > --- /dev/null
> > +++ b/test/unittest/funcs/strjoin/tst.strjoin-nested.r
> > @@ -0,0 +1 @@
> > +dtrace:::BEGIN
> > diff --git a/test/unittest/funcs/tst.strjoin.d b/test/unittest/funcs/strjoin/tst.strjoin.d
> > similarity index 80%
> > rename from test/unittest/funcs/tst.strjoin.d
> > rename to test/unittest/funcs/strjoin/tst.strjoin.d
> > index beec500c..f52443bd 100644
> > --- a/test/unittest/funcs/tst.strjoin.d
> > +++ b/test/unittest/funcs/strjoin/tst.strjoin.d
> > @@ -1,10 +1,9 @@
> >   /*
> >    * Oracle Linux DTrace.
> > - * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
> > + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
> >    * Licensed under the Universal Permissive License v 1.0 as shown at
> >    * http://oss.oracle.com/licenses/upl.
> >    */
> > -/* @@xfail: dtv2 */
> >   
> >   #pragma D option quiet
> >   
> > diff --git a/test/unittest/funcs/tst.strjoin.r b/test/unittest/funcs/strjoin/tst.strjoin.r
> > similarity index 100%
> > rename from test/unittest/funcs/tst.strjoin.r
> > rename to test/unittest/funcs/strjoin/tst.strjoin.r
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list