[DTrace-devel] [PATCH v3 1/3] Introduce temporary string space for string manipulation functions

Kris Van Hees kris.van.hees at oracle.com
Fri Sep 3 12:52:47 PDT 2021


String functions that have a string as return value need to store that
string in a temporary location so it can be used in following code.
We need at most 4 temporary strings to support nesting string functions
in an expression.

Functions must request a temporary string slot and have it associated
with their return value using a statement like:

    dt_cg_tstring_alloc(pcb, dnp);

The address of the string location should be obtained using:

    BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX)
    BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM)
    BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value)

Functions that may be receiving a temporary string as argument (which
is any function or action that accept string arguments) must free the
temporary string once it is no longer needed.  This should be done
using code like:

    if (dnp->dn_tstring)
        dt_cg_tstring_free(pcb, dnp);

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_bpf.c    | 26 ++++++------
 libdtrace/dt_cg.c     | 95 +++++++++++++++++++++++++++++++++++++++----
 libdtrace/dt_impl.h   | 18 ++++++++
 libdtrace/dt_parser.c | 12 ++++++
 libdtrace/dt_parser.h |  5 ++-
 5 files changed, 134 insertions(+), 22 deletions(-)

diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index 8195cd07..57a6d49a 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -17,6 +17,7 @@
 #include <dt_dctx.h>
 #include <dt_probe.h>
 #include <dt_state.h>
+#include <dt_string.h>
 #include <dt_strtab.h>
 #include <dt_bpf.h>
 #include <dt_bpf_maps.h>
@@ -178,17 +179,9 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
  *		with a singleton element (key 0).  This means that every CPU
  *		will see its own copy of this singleton element, and can use it
  *		without interference from other CPUs.  The scratch memory is
- *		used to store the DTrace context, the temporary output buffer,
- *		and temporary storage for stack traces, string manipulation,
- *		etc.
- *		The size of the map value (a byte array) is the sum of:
- *			- size of the DTrace context, rounded up to the nearest
- *			  multiple of 8
- *			- 8 bytes padding for trace buffer alignment purposes
- *			- maximum trace buffer record size, rounded up to the
- *			  multiple of 8
- *			- the greater of the maximum stack trace size and three
- *			  times the maximum string size
+ *		used to store the DTrace machine state, the temporary output
+ *		buffer, and temporary storage for stack traces, string
+ *		manipulation, etc.
  * - strtab:	String table map.  This is a global map with a singleton
  *		element (key 0) that contains the entire string table as a
  *		concatenation of all unique strings (each terminated with a
@@ -276,20 +269,25 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
 
 	/*
 	 * The size of the map value (a byte array) is the sum of:
-	 *	- size of the DTrace context, rounded up to the nearest
+	 *	- size of the DTrace machine state, rounded up to the nearest
 	 *	  multiple of 8
 	 *	- 8 bytes padding for trace buffer alignment purposes
 	 *	- maximum trace buffer record size, rounded up to the
 	 *	  multiple of 8
 	 *	- the greater of:
 	 *		+ the maximum stack trace size
-	 *		+ three times the maximum string size
+	 *		+ four times the maximum string size (incl. length)
+	 *		  plus the maximum string size (to accomodate the BPF
+	 *		  verifier)
 	 */
 	memsz = roundup(sizeof(dt_mstate_t), 8) +
 		8 +
 		roundup(dtp->dt_maxreclen, 8) +
 		MAX(sizeof(uint64_t) * dtp->dt_options[DTRACEOPT_MAXFRAMES],
-		    3 * dtp->dt_options[DTRACEOPT_STRSIZE]);
+		    DT_TSTRING_SLOTS *
+			(DT_STRLEN_BYTES + dtp->dt_options[DTRACEOPT_STRSIZE] +
+		    dtp->dt_options[DTRACEOPT_STRSIZE]
+		));
 	if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
 			sizeof(uint32_t), memsz, 1) == -1)
 		return -1;		/* dt_errno is set for us */
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 94776677..e2af322a 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -759,6 +759,7 @@ dt_cg_strlen(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src)
 
 	emit(dlp,  BPF_MOV_REG(BPF_REG_1, src));
 	dt_regset_xalloc(drp, BPF_REG_0);
+
 	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
 	dt_regset_free_args(drp);
 	emit(dlp,  BPF_BRANCH_IMM(BPF_JLE, BPF_REG_0, size, lbl_ok));
@@ -784,6 +785,72 @@ dt_cg_spill_load(int reg)
 	emit(dlp, BPF_LOAD(BPF_DW, reg, BPF_REG_FP, DT_STK_SPILL(reg)));
 }
 
+/*
+ * Initialize the temporary string offsets and mark all not in use.
+ */
+static void
+dt_cg_tstring_reset(dtrace_hdl_t *dtp)
+{
+	int		i;
+	dt_tstring_t	*ts;
+
+	if (dtp->dt_tstrings == NULL) {
+		dtp->dt_tstrings = dt_calloc(dtp, DT_TSTRING_SLOTS,
+					    sizeof(dt_tstring_t));
+		if (dtp->dt_tstrings == NULL)
+			longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
+
+		ts = dtp->dt_tstrings;
+		for (i = 0; i < DT_TSTRING_SLOTS; i++, ts++)
+			ts->offset = i * (DT_STRLEN_BYTES +
+					  dtp->dt_options[DTRACEOPT_STRSIZE]);
+	}
+
+	ts = dtp->dt_tstrings;
+	for (i = 0; i < DT_TSTRING_SLOTS; i++, ts++)
+		ts->in_use = 0;
+}
+
+/*
+ * Associate a temporary string with the given node.
+ */
+static void
+dt_cg_tstring_alloc(dt_pcb_t *pcb, dt_node_t *dnp)
+{
+	int		i;
+	dt_tstring_t	*ts = pcb->pcb_hdl->dt_tstrings;
+
+	for (i = 0; i < DT_TSTRING_SLOTS; i++, ts++) {
+		if (!ts->in_use)
+			break;
+	}
+
+	assert(i < DT_TSTRING_SLOTS);
+	ts->in_use = 1;
+
+	dt_node_tstring(dnp, ts->offset);
+}
+
+/*
+ * Release the temporary string associated with the given node.
+ */
+static void
+dt_cg_tstring_free(dt_pcb_t *pcb, dt_node_t *dnp)
+{
+	int		i;
+	dt_tstring_t	*ts = pcb->pcb_hdl->dt_tstrings;
+	uint64_t	offset = dnp->dn_tstring->dn_value;
+
+	for (i = 0; i < DT_TSTRING_SLOTS; i++, ts++) {
+		if (ts->offset == offset)
+			break;
+	}
+
+	assert(i < DT_TSTRING_SLOTS);
+
+	ts->in_use = 0;
+}
+
 static const uint_t	ldstw[] = {
 					0,
 					BPF_B,	BPF_H,	0, BPF_W,
@@ -881,6 +948,8 @@ dt_cg_store_val(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind,
 		dt_regset_free(drp, reg);
 		emit(dlp,  BPF_MOV_REG(BPF_REG_3, dnp->dn_reg));
 		dt_regset_free(drp, dnp->dn_reg);
+		if (dnp->dn_tstring)
+			dt_cg_tstring_free(pcb, dnp);
 		dt_regset_xalloc(drp, BPF_REG_0);
 		emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_probe_read));
 		dt_regset_free_args(drp);
@@ -2175,6 +2244,8 @@ dt_cg_store_var(dt_node_t *src, dt_irlist_t *dlp, dt_regset_t *drp,
 		}
 
 		dt_regset_free(drp, reg);
+		if (src->dn_kind == DT_NODE_FUNC && src->dn_tstring)
+			dt_cg_tstring_free(yypcb, src);
 		return;
 	}
 
@@ -2841,6 +2912,8 @@ dt_cg_asgn_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 			dt_cg_arglist(idp, dnp->dn_left->dn_args, dlp, drp);
 
 		dt_cg_store_var(dnp, dlp, drp, idp);
+		if (dnp->dn_right->dn_kind == DT_NODE_FUNC)
+			dt_cg_tstring_free(yypcb, dnp->dn_right);
 	} else {
 		uint_t rbit = dnp->dn_left->dn_flags & DT_NF_REF;
 
@@ -4856,6 +4929,7 @@ dt_cg(dt_pcb_t *pcb, dt_node_t *dnp)
 	}
 
 	dt_regset_reset(pcb->pcb_regs);
+	dt_cg_tstring_reset(pcb->pcb_hdl);
 
 	dt_irlist_destroy(&pcb->pcb_ir);
 	dt_irlist_create(&pcb->pcb_ir);
@@ -4900,18 +4974,25 @@ dt_cg(dt_pcb_t *pcb, dt_node_t *dnp)
 				dt_cg_agg(pcb, act, &pcb->pcb_ir,
 					  pcb->pcb_regs);
 				break;
-			case DT_NODE_DEXPR:
-				if (act->dn_expr->dn_kind == DT_NODE_AGG)
-					dt_cg_agg(pcb, act->dn_expr,
-						  &pcb->pcb_ir, pcb->pcb_regs);
+			case DT_NODE_DEXPR: {
+				dt_node_t	*enp = act->dn_expr;
+
+				if (enp->dn_kind == DT_NODE_AGG)
+					dt_cg_agg(pcb, enp, &pcb->pcb_ir,
+						  pcb->pcb_regs);
 				else
-					dt_cg_node(act->dn_expr, &pcb->pcb_ir,
+					dt_cg_node(enp, &pcb->pcb_ir,
 						   pcb->pcb_regs);
 
-				if (act->dn_expr->dn_reg != -1)
+				if (enp->dn_reg != -1) {
 					dt_regset_free(pcb->pcb_regs,
-						       act->dn_expr->dn_reg);
+						       enp->dn_reg);
+					if (enp->dn_kind == DT_NODE_FUNC &&
+					    enp->dn_tstring)
+						dt_cg_tstring_free(pcb, enp);
+				}
 				break;
+			}
 			default:
 				dnerror(dnp, D_UNKNOWN, "internal error -- "
 					"node kind %u is not a valid "
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index bd8d9943..7897fb7a 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -188,6 +188,23 @@ typedef struct dt_ahash {
 	size_t		dtah_size;		/* size of hash table */
 } dt_ahash_t;
 
+/*
+ * Why do we need (only) 4 slots?  The maximum amount of string arguments to
+ * any function is 2, and if the result is a string as well, that means we may
+ * need 3 temporary strings during code generation for that function.
+ *
+ * Since string functions can be nested, we can (at most) end up with 1 tstring
+ * (from a nested function for which we already generated code) along with a
+ * nested function being processed which needs 3 temporary strings as mentioned
+ * above.  That brings us to a total of 4.
+ */
+#define DT_TSTRING_SLOTS	4
+
+typedef struct dt_tstring {
+	uint64_t	offset;			/* Offset from dctx->mem */
+	int		in_use;			/* In use (1) or not (0) */
+} dt_tstring_t;
+
 /*
  * To provide a lock-free aggregation write mechanism for the producer,
  * two copies of each aggregation can be used.  A latch sequence number
@@ -252,6 +269,7 @@ struct dtrace_hdl {
 	uint_t dt_strlen;	/* global string table (runtime) size */
 	uint_t dt_maxreclen;	/* largest record size across programs */
 	uint_t dt_maxlvaralloc;	/* largest lvar alloc across pcbs */
+	dt_tstring_t *dt_tstrings; /* temporary string slots */
 	dt_list_t dt_modlist;	/* linked list of dt_module_t's */
 	dt_module_t **dt_mods;	/* hash table of dt_module_t's */
 	uint_t dt_modbuckets;	/* number of module hash buckets */
diff --git a/libdtrace/dt_parser.c b/libdtrace/dt_parser.c
index bac895a2..65509f08 100644
--- a/libdtrace/dt_parser.c
+++ b/libdtrace/dt_parser.c
@@ -2582,6 +2582,18 @@ dt_node_trampoline(dt_probe_t *prp)
 	return dnp;
 }
 
+dt_node_t *
+dt_node_tstring(dt_node_t *fnp, uintmax_t val)
+{
+	dt_node_t *dnp = dt_node_alloc(DT_NODE_TSTRING);
+
+	dnp->dn_value = val;
+	dnp->dn_reg = fnp->dn_reg;
+	fnp->dn_tstring = dnp;
+
+	return dnp;
+}
+
 /*
  * This function provides the underlying implementation of cooking an
  * identifier given its node, a hash of dynamic identifiers, an identifier
diff --git a/libdtrace/dt_parser.h b/libdtrace/dt_parser.h
index 26e00aec..aec2a410 100644
--- a/libdtrace/dt_parser.h
+++ b/libdtrace/dt_parser.h
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2007, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -99,6 +99,7 @@ typedef struct dt_node {
 #define	dn_string	dn_u._const._string	/* STRING, IDENT, TYPE */
 #define	dn_ident	dn_u._nodes._ident	/* VAR,SYM,FUN,AGG,INL,PROBE */
 #define	dn_args		dn_u._nodes._links[0]	/* DT_NODE_VAR, FUNC */
+#define	dn_tstring	dn_u._nodes._links[1]	/* DT_NODE_FUNC */
 #define	dn_child	dn_u._nodes._links[0]	/* DT_NODE_OP1 */
 #define	dn_left		dn_u._nodes._links[0]	/* DT_NODE_OP2, OP3 */
 #define	dn_right	dn_u._nodes._links[1]	/* DT_NODE_OP2, OP3 */
@@ -148,6 +149,7 @@ typedef struct dt_node {
 #define	DT_NODE_PROVIDER 20	/* provider definition */
 #define	DT_NODE_PROG	21	/* program translation unit */
 #define	DT_NODE_TRAMPOLINE 22	/* probe trampoline */
+#define DT_NODE_TSTRING	23	/* temnporary string slot */
 
 #define	DT_NF_SIGNED	0x01	/* data is a signed quantity (else unsigned) */
 #define	DT_NF_COOKED	0x02	/* data is a known type (else still cooking) */
@@ -200,6 +202,7 @@ extern dt_node_t *dt_node_probe(char *, int, dt_node_t *, dt_node_t *);
 extern dt_node_t *dt_node_provider(char *, dt_node_t *);
 extern dt_node_t *dt_node_program(dt_node_t *);
 extern dt_node_t *dt_node_trampoline(struct dt_probe *);
+extern dt_node_t *dt_node_tstring(dt_node_t *, uintmax_t);
 
 extern dt_node_t *dt_node_link(dt_node_t *, dt_node_t *);
 extern dt_node_t *dt_node_cook(dt_node_t *, uint_t);
-- 
2.33.0




More information about the DTrace-devel mailing list