[DTrace-devel] [PATCH 4/4] Make sure assignment expressions work correctly with tstrings

Kris Van Hees kris.van.hees at oracle.com
Mon Nov 1 20:32:34 UTC 2021


On Mon, Nov 01, 2021 at 03:33:01PM -0400, Eugene Loh wrote:
> Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
> with minor comments.
> 
> On 10/30/21 1:41 AM, Kris Van Hees wrote:
> > The node kinds that can hold a tstring has been expanded from just
> > DT_NOT_DUNC to DT_NODE_OP1, DT_NODE_OP2, DT_NODE_OP3, DT_NODE_DEXPR.
> 
> s/DT_NOT_DUNC/DT_NODE_FUNC/

Wow - not sure how I managed that typo :)  Thanks.

> > diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> > index ddc4c0d7..0196b114 100644
> > --- a/libdtrace/dt_cg.c
> > +++ b/libdtrace/dt_cg.c
> > @@ -2303,8 +2310,13 @@ dt_cg_store(dt_node_t *src, dt_irlist_t *dlp, dt_regset_t *drp, dt_node_t *dst)
> >   	}
> >   }
> >   
> > +/*
> > + * dnp = node of the assignment
> > + *   dn_left = identifier node for the destination (idp = identifier)
> > + *   dn_right = value to store
> > + */
> >   static void
> > -dt_cg_store_var(dt_node_t *src, dt_irlist_t *dlp, dt_regset_t *drp,
> > +dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
> >   		dt_ident_t *idp)
> >   {
> >   	uint_t	varid;
> > @@ -2327,7 +2339,7 @@ dt_cg_store_var(dt_node_t *src, dt_irlist_t *dlp, dt_regset_t *drp,
> >   			emit(dlp, BPF_LOAD(BPF_DW, reg, reg, DCTX_GVARS));
> >   
> >   		/* store by value or by reference */
> > -		if (src->dn_flags & DT_NF_REF) {
> > +		if (dnp->dn_flags & DT_NF_REF) {
> >   			size_t		srcsz;
> >   
> >   			emit(dlp, BPF_ALU64_IMM(BPF_ADD, reg, idp->di_offset));
> > @@ -2337,23 +2349,22 @@ dt_cg_store_var(dt_node_t *src, dt_irlist_t *dlp, dt_regset_t *drp,
> >   			 * the lesser of the size of the identifier and the
> >   			 * size of the data being copied in.
> >   			 */
> > -			srcsz = dt_node_type_size(src->dn_right);
> > -			if (dt_node_is_string(src))
> > +			srcsz = dt_node_type_size(dnp->dn_right);
> > +			if (dt_node_is_string(dnp))
> >   				srcsz += DT_STRLEN_BYTES;
> >   			size = MIN(srcsz, idp->di_size);
> >   
> > -			dt_cg_memcpy(dlp, drp, reg, src->dn_reg, size);
> > +			dt_cg_memcpy(dlp, drp, reg, dnp->dn_reg, size);
> >   		} else {
> >   			size = idp->di_size;
> >   
> >   			assert(size > 0 && size <= 8 &&
> >   			       (size & (size - 1)) == 0);
> >   
> > -			emit(dlp, BPF_STORE(ldstw[size], reg, idp->di_offset, src->dn_reg));
> > +			emit(dlp, BPF_STORE(ldstw[size], reg, idp->di_offset, dnp->dn_reg));
> >   		}
> >   
> >   		dt_regset_free(drp, reg);
> > -		dt_cg_tstring_free(yypcb, src);
> >   		return;
> >   	}
> 
> Did you also want s/srcsz/dnpsz/?  (Of course "it does not matter.")

No, srcsz is fine because it *is* the size of the source data.

> > @@ -2370,7 +2381,7 @@ dt_cg_store_var(dt_node_t *src, dt_irlist_t *dlp, dt_regset_t *drp,
> >   	 * the disassembler expects this sequence in support for annotating
> >   	 * the disassembly with variables names.
> >   	 */
> > -	emit(dlp,  BPF_MOV_REG(BPF_REG_2, src->dn_reg));
> > +	emit(dlp,  BPF_MOV_REG(BPF_REG_2, dnp->dn_reg));
> >   	emit(dlp,  BPF_MOV_IMM(BPF_REG_1, varid));
> >   	dt_regset_xalloc(drp, BPF_REG_0);
> >   	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> > @@ -2830,6 +2841,35 @@ dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> >   
> >   	emitl(dlp, lbl_post,
> >   		   BPF_NOP());
> > +
> > +	/*
> > +	 * Strings complicate things a bit because dn_left and dn_right might
> > +	 * actually be temporary strings (tstring) *and* in different slots.
> > +	 * We need to allocate a new tstring to hold the result, and copy the
> > +	 * value into the new tstring (and free any tstrings in dn_left and
> > +	 * dn_right).
> > +	 */
> > +	if (dt_node_is_string(dnp)) {
> > +		/*
> > +		 * At this point, dnp->dn_reg holds a pointer to the string we
> > +		 * need to copy.  But we want to copy it into a tstring which
> > +		 * location is to be stored in dnp->dn_reg.  So, we need to
> > +		 * shuffle things a bit.
> > +		 */
> > +		emit(dlp,  BPF_MOV_REG(BPF_REG_0, dnp->dn_reg));
> > +		dt_cg_tstring_alloc(yypcb, dnp);
> > +
> > +		emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
> > +		emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM));
> > +		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
> > +
> > +		dt_cg_memcpy(dlp, drp, dnp->dn_reg, BPF_REG_0,
> > +			     DT_STRLEN_BYTES +
> > +			     yypcb->pcb_hdl->dt_options[DTRACEOPT_STRSIZE]);
> > +
> > +		dt_cg_tstring_free(yypcb, dnp->dn_left);
> > +		dt_cg_tstring_free(yypcb, dnp->dn_right);
> > +	}
> >   }
> 
> Would it not be safer to allocate a register than to hardwire %r0?

Not really - as discussed in the past, %r0 is commnly used as a scratch or
temporary register (in fact, that is how the code generator handles it) and
it is valid to use it in that fashion in short code sequences like this
where we know for certain there are no intervening code blocks.

> Should the memcpy not include an extra byte for the NULL terminator?

Excellent question!  The answer here is 'no' mainly because we don't do that in
other places either but whether that is the perfect behaviour or not is a bit
unclear.  The documentation states:

  In DTrace, strings are represented as an array of characters terminated by a
  null byte (that is, a byte whose value is zero, usually written as '\0').
  The visible part of the string is of variable length, depending on the
  location of the null byte, but DTrace stores each string in a fixed-size
  array so that each probe traces a consistent amount of data. Strings cannot
  exceed the length of the predefined string limit. However, the limit can be
  modified in your D program or on the dtrace command line by tuning the
  strsize option. 

The terminology shifts a bit near the end of the paragraph when we go from
a "fixed-size array" and "consistent amount of data" to "string limit".  The
implementation seems to be a bit inconsistent on what the interpretation ought
to be.  Whether the setting of strsize is meant to be the limit on the
"consistent amount of data" or whether it is that value - 1 to account for
the null byte is not clear.

> > diff --git a/test/unittest/codegen/tst.tstring_asgn_expr.d b/test/unittest/codegen/tst.tstring_asgn_expr.d
> > new file mode 100644
> > index 00000000..6b0d7d73
> > --- /dev/null
> > +++ b/test/unittest/codegen/tst.tstring_asgn_expr.d
> > @@ -0,0 +1,29 @@
> > +/*
> > + * Oracle Linux DTrace.
> > + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> > + * Licensed under the Universal Permissive License v 1.0 as shown at
> > + * http://oss.oracle.com/licenses/upl.
> > + */
> > +
> > +/*
> > + * DESC
> > + */
> 
> DESC?

Ah yes, I forgot to write those, didn't I?  Fixing.

> > diff --git a/test/unittest/codegen/tst.tstring_ternary.d b/test/unittest/codegen/tst.tstring_ternary.d
> > new file mode 100644
> > index 00000000..c59d8c2e
> > --- /dev/null
> > +++ b/test/unittest/codegen/tst.tstring_ternary.d
> > @@ -0,0 +1,28 @@
> > +/*
> > + * Oracle Linux DTrace.
> > + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
> > + * Licensed under the Universal Permissive License v 1.0 as shown at
> > + * http://oss.oracle.com/licenses/upl.
> > + */
> > +
> > +/*
> > + * DESC
> > + */
> DESC?
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list