[DTrace-devel] [PATCH 10/47] Implement new dtrace_eprobedesc_t and dtrace_datadesc_t design

Kris Van Hees kris.van.hees at oracle.com
Sun May 3 20:16:56 PDT 2020


In DTrace v1, the dtrace_eprobedesc_t structure was used to obtain
metadata information about the probe data stream from the kernel.
Each EPID was also associated with a specific dtrace_probedesc_t,
identifying the probe for which the EPID represents an enabling.
Each EPID identified a unique dtrace_eprobedesc_t while the
dtrace_probedesc_t could be shared between multiple EPIDs.

    EPID -[1-to-1]-> dtrace_eprobedesc_t
    EPID -[n-to-1]-> dtrace_probedesc_t

In the DTrace v2 design, where everything resides in userspace, we
can create that metadata directly.  We can also make use of the fact
that when a single program is attached to multiple probes, we do not
need more than one copy of the metadata.  We also make use of the fact
that the EPID identifies a unique (probe, program) tuple.

    EPID -[n-to-1]-> dtrace_datadesc_t
    EPID -[n-to-1]-> dtrace_probedesc_t

The probe data description consists of records that each describe one
data item in the probe data.  Records are added as D code is being
compiled, and they specify the type, size, and alignment of the data
item.  The information from the records is used when consuming data
from the probe data buffer.

The dt_rec_add() function is used to register a data item as a new
record in the data description.  It will advance pcb_bufoff to the
proper alignment and call a code-generation callback function to
fill in any needed zero padding.  It returns the offset at which
the data item is to be stored in the output buffer.

This commit also disables the (incomplete) implementation of the
trace() action for string values.  Where before it would simply end
up putting bogus values into the data buffer, it may now cause actual
corruption of the data buffer.

Orabug: 31220520
Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
---
 include/dtrace/metadesc.h |  41 +++---
 libdtrace/dt_aggregate.c  |  12 +-
 libdtrace/dt_bpf.c        |  14 +--
 libdtrace/dt_cc.c         |   9 +-
 libdtrace/dt_cg.c         | 209 +++++++++++++++---------------
 libdtrace/dt_consume.c    |  98 +++++++++------
 libdtrace/dt_handle.c     |  28 ++---
 libdtrace/dt_impl.h       |  14 ++-
 libdtrace/dt_map.c        | 259 +++++++++++++++++++++++++-------------
 libdtrace/dt_pcb.h        |   4 +-
 libdtrace/dt_program.c    |  18 ++-
 libdtrace/dtrace.h        |  10 +-
 12 files changed, 426 insertions(+), 290 deletions(-)

diff --git a/include/dtrace/metadesc.h b/include/dtrace/metadesc.h
index 7d800e97..a4961b08 100644
--- a/include/dtrace/metadesc.h
+++ b/include/dtrace/metadesc.h
@@ -2,7 +2,7 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  *
- * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -21,16 +21,16 @@
  * DTrace separates the trace data stream from the metadata stream.  The only
  * metadata tokens placed in the data stream are enabled probe identifiers
  * (EPIDs) or (in the case of aggregations) aggregation identifiers.  In order
- * to determine the structure of the data, DTrace consumers pass the token to
- * the kernel, and receive in return a corresponding description of the enabled
- * probe (via the dtrace_eprobedesc structure) or the aggregation (via the
- * dtrace_aggdesc structure).  Both of these structures are expressed in terms
- * of record descriptions (via the dtrace_recdesc structure) that describe the
- * exact structure of the data.  Some record descriptions may also contain a
- * format identifier; this additional bit of metadata can be retrieved from the
- * kernel, for which a format description is returned via the dtrace_fmtdesc
- * structure.  Note that all four of these structures must be bitness-neutral
- * to allow for a 32-bit DTrace consumer on a 64-bit kernel.
+ * to determine the structure of the data, DTrace uses the token to perform a
+ * lookup to retrieve the corresponding description of the enabled probe (via
+ * the dtrace_datadesc structure) or the aggregation (via the dtrace_aggdesc
+ * structure).
+ *
+ * Both of these structures are expressed in terms of record descriptions (via
+ * the dtrace_recdesc structure) that describe the exact structure of the data.
+ * Some record descriptions may also contain a format identifier; this
+ * additional bit of metadata refers to a format description described via a
+ * dtrace_fmtdesc structure.
  */
 typedef struct dtrace_recdesc {
 	dtrace_actkind_t dtrd_action;		/* kind of action */
@@ -42,14 +42,13 @@ typedef struct dtrace_recdesc {
 	uint64_t dtrd_uarg;			/* user argument */
 } dtrace_recdesc_t;
 
-typedef struct dtrace_eprobedesc {
-	dtrace_epid_t dtepd_epid;		/* enabled probe ID */
-	dtrace_id_t dtepd_probeid;		/* probe ID */
-	uint64_t dtepd_uarg;			/* library argument */
-	uint32_t dtepd_size;			/* total size */
-	int dtepd_nrecs;			/* number of records */
-	dtrace_recdesc_t dtepd_rec[1];		/* recods themselves */
-} dtrace_eprobedesc_t;
+typedef struct dtrace_datadesc {
+	uint64_t dtdd_uarg;			/* library argument */
+	uint32_t dtdd_size;			/* total size */
+	int dtdd_nrecs;				/* number of records */
+	dtrace_recdesc_t *dtdd_recs;		/* records themselves */
+	int dtdd_refcnt;			/* reference count */
+} dtrace_datadesc_t;
 
 typedef struct dtrace_aggdesc {
 	DTRACE_PTR(char, dtagd_name);		/* not filled in by kernel */
@@ -70,8 +69,8 @@ typedef struct dtrace_fmtdesc {
 } dtrace_fmtdesc_t;
 
 #define DTRACE_SIZEOF_EPROBEDESC(desc)				\
-	(sizeof (dtrace_eprobedesc_t) + ((desc)->dtepd_nrecs ?  \
-	(((desc)->dtepd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0))
+	(sizeof (dtrace_eprobedesc_t) + ((desc)->dtdd_nrecs ?  \
+	(((desc)->dtdd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0))
 
 #define	DTRACE_SIZEOF_AGGDESC(desc)			       \
 	(sizeof (dtrace_aggdesc_t) + ((desc)->dtagd_nrecs ?     \
diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
index b545cb32..04d059b0 100644
--- a/libdtrace/dt_aggregate.c
+++ b/libdtrace/dt_aggregate.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -644,8 +644,8 @@ hashnext:
 		aggdata->dtada_size = size;
 		aggdata->dtada_desc = agg;
 		aggdata->dtada_handle = dtp;
-		(void) dt_epid_lookup(dtp, agg->dtagd_epid,
-		    &aggdata->dtada_edesc, &aggdata->dtada_pdesc);
+		dt_epid_lookup(dtp, agg->dtagd_epid, &aggdata->dtada_ddesc,
+			       &aggdata->dtada_pdesc);
 		aggdata->dtada_normal = 1;
 
 		h->dtahe_hashval = hashval;
@@ -1631,9 +1631,9 @@ dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggvarid_t *aggvars,
 				aggdata->dtada_size = agg->dtagd_size;
 				aggdata->dtada_desc = agg;
 				aggdata->dtada_handle = dtp;
-				(void) dt_epid_lookup(dtp, agg->dtagd_epid,
-				    &aggdata->dtada_edesc,
-				    &aggdata->dtada_pdesc);
+				dt_epid_lookup(dtp, agg->dtagd_epid,
+					       &aggdata->dtada_ddesc,
+					       &aggdata->dtada_pdesc);
 				aggdata->dtada_normal = 1;
 				zaggdata[i].dtahe_hashval = 0;
 				zaggdata[i].dtahe_size = agg->dtagd_size;
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index cacfa07a..2c90411b 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -249,15 +249,16 @@ dt_bpf_reloc_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
  */
 int
 dt_bpf_load_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
-		 const dtrace_difo_t *dp)
+		 dtrace_stmtdesc_t *sdp)
 {
 	struct bpf_load_program_attr	attr;
 	dtrace_epid_t			epid;
+	const dtrace_difo_t		*dp = sdp->dtsd_action->dtad_difo;
 	int				logsz = BPF_LOG_BUF_SIZE;
 	char				*log;
 	int				rc;
 
-	epid = dt_epid_add(dtp, prp->desc->id, 0);
+	epid = dt_epid_add(dtp, sdp->dtsd_ddesc, prp->desc->id);
 
 	/*
 	 * Check whether there are any probe-specific relocations to be
@@ -360,14 +361,7 @@ dt_bpf_stmt(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, dtrace_stmtdesc_t *sdp,
 	if (!prp)
 		return dt_set_errno(dtp, ESRCH);
 
-	/*
-	 * In the new implementation, every statement has a single action,
-	 * which is the entire program.  At a future code reworking, we should
-	 * combine the action into the statement and do away with the action
-	 * list.
-	 */
-	assert(ap == sdp->dtsd_action_last);
-	fd = dt_bpf_load_prog(dtp, prp, ap->dtad_difo);
+	fd = dt_bpf_load_prog(dtp, prp, sdp);
 	if (fd < 0)
 		return fd;
 
diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
index cc299b93..e0dbe465 100644
--- a/libdtrace/dt_cc.c
+++ b/libdtrace/dt_cc.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -131,6 +131,8 @@ dt_stmt_create(dtrace_hdl_t *dtp, dtrace_ecbdesc_t *edp,
 
 	assert(yypcb->pcb_stmt == NULL);
 	yypcb->pcb_stmt = sdp;
+	yypcb->pcb_nrecs = 0;
+	yypcb->pcb_maxrecs = 0;
 
 	sdp->dtsd_descattr = descattr;
 	sdp->dtsd_stmtattr = stmtattr;
@@ -259,6 +261,11 @@ dt_stmt_append(dtrace_stmtdesc_t *sdp, const dt_node_t *dnp)
 			datarec = 1;
 	}
 
+	/*
+	 * Finalize the probe data description for the statement.
+	 */
+	dt_datadesc_finalize(yypcb->pcb_hdl, sdp->dtsd_ddesc);
+
 	if (dtrace_stmt_add(yypcb->pcb_hdl, yypcb->pcb_prog, sdp) != 0)
 		longjmp(yypcb->pcb_jmpbuf, dtrace_errno(yypcb->pcb_hdl));
 
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 4945dead..538960c9 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -30,16 +30,15 @@ static void dt_cg_node(dt_node_t *, dt_irlist_t *, dt_regset_t *);
  *	3. Call bpf_get_smp_processor_id() to determine the current CPU id
  *	4. Store CPU id in its reserved stack slot
  *	5. Retrieve the output data buffer and store the base pointer in %r9
- *	6. Store the epid and 4 padding byes at the beginning of the output
- *	   buffer
+ *	6. Store the epid and tag at the beginning of the output buffer
  *
  * Due to the fact that raw perf samples are stored as a header (multiple of 8
- * bytes) followed by a 4-byte size, we need to ensure that our trace data is
- * preceded by a 4-byte padding.  That will place the EPID (4 bytes) at a
- * 64-bit boundary, and given that it is followed by a 4-byte tag, all further
- * trace data will also start at a 64-bit boundary.
+ * bytes) followed by a 32-bit size, we need to ensure that our trace data is
+ * preceded by a 4-byte padding.  That will place the 32-bit EPID at a 64-bit
+ * boundary, and given that it is followed by a 32-bit tag, all further trace
+ * data will also start at a 64-bit boundary.
  *
- * To accomplish all this, we store a 4-byte value at the beginning of our
+ * To accomplish all this, we store a 32-bit 0 value at the beginning of our
  * output data buffer, and then store the next address location in %r9.
  *
  * In the epilogue, we then need to submit (%r9 - 4) as source address of our
@@ -108,9 +107,10 @@ dt_cg_prologue(dt_pcb_t *pcb)
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 
 	/*
-	 * We read epid from dctx (struct dt_bpf_context) and store it in the
-	 * first 4 bytes of the output buffer.  The next 4 bytes are padded
-	 * with 0s so that the next entry will be at a 8-byte boundary.
+	 * We read the 32-bit epid from dctx (struct dt_bpf_context) and store
+	 * it at %r9 (base address of the trace data).  Next we store the
+	 * 32-bit tag (hardcoded as 0 for now), which leaves us at an 8-byte
+	 * boundary for actual probe data to be stored.
 	 *
 	 *		lddw %r0, [%fp + DT_STK_DCTX]
 	 *		ldw %r0, [%r0 + DCTX_EPID]
@@ -126,7 +126,10 @@ dt_cg_prologue(dt_pcb_t *pcb)
 	instr = BPF_STORE_IMM(BPF_W, BPF_REG_9, 4, 0);
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 
-	pcb->pcb_bufoff += sizeof(uint64_t);
+	/*
+	 * Account for 32-bit epid (at offset 0) and tag 32-bit (at offset 4).
+	 */
+	pcb->pcb_bufoff += 2 * sizeof(uint32_t);
 }
 
 /*
@@ -187,26 +190,51 @@ dt_cg_epilogue(dt_pcb_t *pcb)
 	dt_irlist_append(dlp, dt_cg_node_alloc(pcb->pcb_exitlbl, instr));
 }
 
-static size_t
+/*
+ * Generate an instruction sequence to fill a gap in the output buffer with 0
+ * values.  This is used to ensure that there are no uninitialized bytes in the
+ * output buffer that could result from alignment requirements for data
+ * records.
+ */
+static void
+dt_cg_fill_gap(dt_pcb_t *pcb, int gap)
+{
+	struct bpf_insn	instr;
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	uint_t		off = pcb->pcb_bufoff;
+
+	if (gap & 1) {
+		instr = BPF_STORE_IMM(BPF_B, BPF_REG_9, off, 0);
+		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+		off += 1;
+	}
+	if (gap & 2) {
+		instr = BPF_STORE_IMM(BPF_H, BPF_REG_9, off, 0);
+		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+		off += 2;
+	}
+	if (gap & 4) {
+		instr = BPF_STORE_IMM(BPF_W, BPF_REG_9, off, 0);
+		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	}
+}
+
+static void
 dt_cg_act_breakpoint(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	dnerror(dnp, D_UNKNOWN, "breakpoint() is not implemented (yet)\n");
 	/* FIXME: Needs implementation */
-
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_chill(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	dt_cg_node(dnp->dn_args, &pcb->pcb_ir, pcb->pcb_regs);
 	dnerror(dnp, D_UNKNOWN, "chill() is not implemented (yet)\n");
 	/* FIXME: Needs implementation */
-
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_clear(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	dt_node_t *anp;
@@ -236,8 +264,6 @@ dt_cg_act_clear(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 	 * DEPENDS ON: How aggregations are implemented using eBPF (hashmap?).
 	 * AGGID = aid->di_id
 	 */
-
-	return 0;
 }
 
 /*
@@ -247,22 +273,24 @@ dt_cg_act_clear(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
  * Stores:
  *	integer (4 bytes)		-- speculation ID
  */
-static size_t
+static void
 dt_cg_act_commit(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	struct bpf_insn instr;
 	dt_irlist_t *dlp = &pcb->pcb_ir;
-	uint_t off = pcb->pcb_bufoff;
+	uint_t off;
 
 	dt_cg_node(dnp->dn_args, &pcb->pcb_ir, pcb->pcb_regs);
 
+	off = dt_rec_add(pcb->pcb_hdl, dt_cg_fill_gap, DTRACEACT_COMMIT,
+			 sizeof(uint64_t), sizeof(uint64_t), NULL,
+			 DT_ACT_COMMIT);
+
 	instr = BPF_STORE(BPF_DW, BPF_REG_9, off, BPF_REG_0);	/* FIXME */
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
-
-	return sizeof(uint64_t);
 }
 
-static size_t
+static void
 dt_cg_act_denormalize(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	dt_node_t *anp;
@@ -292,8 +320,6 @@ dt_cg_act_denormalize(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 	 * DEPENDS ON: How aggregations are implemented using eBPF (hashmap?).
 	 * AGGID = aid->di_id
 	 */
-
-	return 0;
 }
 
 /*
@@ -303,19 +329,21 @@ dt_cg_act_denormalize(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
  * Stores:
  *	integer (4 bytes)		-- speculation ID
  */
-static size_t
+static void
 dt_cg_act_discard(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	struct bpf_insn instr;
-	dt_irlist_t *dlp = &pcb->pcb_ir;
-	uint_t off = pcb->pcb_bufoff;
+	struct bpf_insn	instr;
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	uint_t		off;
 
 	dt_cg_node(dnp->dn_args, &pcb->pcb_ir, pcb->pcb_regs);
 
+	off = dt_rec_add(pcb->pcb_hdl, dt_cg_fill_gap, DTRACEACT_DISCARD,
+			 sizeof(uint64_t), sizeof(uint64_t), NULL,
+			 DT_ACT_DISCARD);
+
 	instr = BPF_STORE(BPF_DW, BPF_REG_9, off, BPF_REG_0);	/* FIXME */
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
-
-	return sizeof(uint64_t);
 }
 
 /*
@@ -324,83 +352,74 @@ dt_cg_act_discard(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
  * Stores:
  *	integer (4 bytes)		-- return code
  */
-static size_t
+static void
 dt_cg_act_exit(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	struct bpf_insn instr;
-	dt_irlist_t *dlp = &pcb->pcb_ir;
-	uint_t off = pcb->pcb_bufoff;
+	struct bpf_insn	instr;
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	uint_t		off;
 
 	dt_cg_node(dnp->dn_args, &pcb->pcb_ir, pcb->pcb_regs);
 
+	off = dt_rec_add(pcb->pcb_hdl, dt_cg_fill_gap, DTRACEACT_EXIT,
+			 sizeof(uint64_t), sizeof(uint64_t), NULL,
+			 DT_ACT_EXIT);
+
 	instr = BPF_STORE(BPF_DW, BPF_REG_9, off, BPF_REG_0);	/* FIXME */
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
-
-	return sizeof(uint64_t);
 }
 
-static size_t
+static void
 dt_cg_act_freopen(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_ftruncate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_jstack(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_normalize(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_panic(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_pcap(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_printa(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_printf(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_raise(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	dt_cg_node(dnp->dn_args, &pcb->pcb_ir, pcb->pcb_regs);
 	dnerror(dnp, D_UNKNOWN, "raise() is not implemented (yet)\n");
 	/* FIXME: Needs implementation */
-
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_setopt(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
 /*
@@ -410,22 +429,24 @@ dt_cg_act_setopt(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
  * Stores:
  *	integer (4 bytes)		-- speculation ID
  */
-static size_t
+static void
 dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	struct bpf_insn instr;
-	dt_irlist_t *dlp = &pcb->pcb_ir;
-	uint_t off = pcb->pcb_bufoff;
+	struct bpf_insn	instr;
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	uint_t		off;
 
 	dt_cg_node(dnp->dn_args, &pcb->pcb_ir, pcb->pcb_regs);
 
+	off = dt_rec_add(pcb->pcb_hdl, dt_cg_fill_gap, DTRACEACT_SPECULATE,
+			 sizeof(uint64_t), sizeof(uint64_t), NULL,
+			 DT_ACT_SPECULATE);
+
 	instr = BPF_STORE(BPF_DW, BPF_REG_9, off, BPF_REG_0);	/* FIXME */
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
-
-	return sizeof(uint64_t);
 }
 
-static size_t
+static void
 dt_cg_act_stack(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	dt_node_t *arg = dnp->dn_args;
@@ -439,38 +460,32 @@ dt_cg_act_stack(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 
 		nframes = (uint32_t)arg->dn_value;
 	}
-
-	return 0; /* FIXME */
 }
 
-static size_t
+static void
 dt_cg_act_stop(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	dnerror(dnp, D_UNKNOWN, "stop() is not implemented (yet)\n");
 	/* FIXME: Needs implementation */
-
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_symmod(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_system(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_trace(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	struct bpf_insn instr;
-	dt_irlist_t *dlp = &pcb->pcb_ir;
-	char n[DT_TYPE_NAMELEN];
-	uint_t off = pcb->pcb_bufoff;
+	struct bpf_insn	instr;
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	char		n[DT_TYPE_NAMELEN];
+	uint_t		off;
 
 	if (dt_node_is_void(dnp->dn_args)) {
 		dnerror(dnp->dn_args, D_TRACE_VOID,
@@ -486,11 +501,14 @@ dt_cg_act_trace(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 	dt_cg_node(dnp->dn_args, &pcb->pcb_ir, pcb->pcb_regs);
 
 	if (dt_node_is_scalar(dnp->dn_args)) {
+		off = dt_rec_add(pcb->pcb_hdl, dt_cg_fill_gap,
+				 DTRACEACT_DIFEXPR, sizeof(uint64_t),
+				 sizeof(uint64_t), NULL, DT_ACT_TRACE);
+
 		instr = BPF_STORE(BPF_DW, BPF_REG_9, off, dnp->dn_args->dn_reg);
 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 		dt_regset_free(pcb->pcb_regs, dnp->dn_args->dn_reg);
-
-		return sizeof(uint64_t);
+#if 0
 	} else if (dt_node_is_string(dnp->dn_args)) {
 		size_t sz = dt_node_type_size(dnp->dn_args);
 
@@ -513,28 +531,25 @@ dt_cg_act_trace(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 		dt_regset_free(pcb->pcb_regs, dnp->dn_args->dn_reg);
 
 		return sz + sizeof(uint64_t);
+#endif
 	} else
 		dnerror(dnp->dn_args, D_PROTO_ARG,
 			"trace( ) argument #1 is incompatible with prototype:\n"
 			"\tprototype: scalar or string\n\t argument: %s\n",
 			dt_node_type_name(dnp->dn_args, n, sizeof (n)));
-
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_tracemem(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_trunc(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
-	return 0;
 }
 
-static size_t
+static void
 dt_cg_act_ustack(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	dt_node_t *arg0 = dnp->dn_args;
@@ -562,11 +577,9 @@ dt_cg_act_ustack(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 
 		strsize = (uint32_t)arg1->dn_value;
 	}
-
-	return 0; /* FIXME */
 }
 
-typedef size_t dt_cg_action_f(dt_pcb_t *, dt_node_t *, dtrace_actkind_t);
+typedef void dt_cg_action_f(dt_pcb_t *, dt_node_t *, dtrace_actkind_t);
 
 typedef struct dt_cg_actdesc {
 	dt_cg_action_f *fun;
@@ -2719,13 +2732,9 @@ dt_cg(dt_pcb_t *pcb, dt_node_t *dnp)
 
 				idp = act->dn_expr->dn_ident;
 				actdp = &_dt_cg_actions[DT_ACT_IDX(idp->di_id)];
-				if (actdp->fun) {
-					size_t	sz = 0;
-
-					sz = actdp->fun(pcb, act->dn_expr,
-							actdp->kind);
-					pcb->pcb_bufoff += sz;
-				}
+				if (actdp->fun)
+					actdp->fun(pcb, act->dn_expr,
+						   actdp->kind);
 			} else {
 				dt_cg_node(act->dn_expr, &pcb->pcb_ir,
 					   pcb->pcb_regs);
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index 62e8e9cb..68e6b1c8 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -366,7 +366,7 @@ dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
     dtrace_bufdesc_t *buf, size_t offs)
 {
 	dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
-	dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
+	dtrace_datadesc_t *dd = data->dtpda_ddesc, *ndd;
 	dtrace_flowkind_t flow = DTRACEFLOW_NONE;
 	const char *p = pd->prv;
 	const char *n = pd->prb;
@@ -376,7 +376,7 @@ dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
 	static const char *r_str[2] = { " <- ", " <= " };
 	static const char *ent = "entry", *ret = "return";
 	static int entlen = 0, retlen = 0;
-	dtrace_epid_t next, id = epd->dtepd_epid;
+	dtrace_epid_t next, id = data->dtpda_epid;
 	int rval;
 
 	if (entlen == 0) {
@@ -421,7 +421,7 @@ dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
 	 * _next_ EPID.
 	 */
 	if (flow == DTRACEFLOW_RETURN) {
-		offs += epd->dtepd_size;
+		offs += dd->dtdd_size;
 
 		do {
 			if (offs >= buf->dtbd_size) {
@@ -442,7 +442,7 @@ dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
 				offs += sizeof (id);
 		} while (next == DTRACE_EPIDNONE);
 
-		if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
+		if ((rval = dt_epid_lookup(dtp, next, &ndd, &npd)) != 0)
 			return (rval);
 
 		if (next != id && npd->id == pd->id)
@@ -1867,7 +1867,7 @@ dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
 
 again:
 	for (offs = start; offs < end; ) {
-		dtrace_eprobedesc_t *epd;
+		dtrace_datadesc_t *dd;
 
 		/*
 		 * We're guaranteed to have an ID.
@@ -1883,14 +1883,14 @@ again:
 			continue;
 		}
 
-		if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
-		    &data.dtpda_pdesc)) != 0)
+		if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_ddesc,
+					   &data.dtpda_pdesc)) != 0)
 			return (rval);
 
-		epd = data.dtpda_edesc;
+		dd = data.dtpda_ddesc;
 		data.dtpda_data = buf->dtbd_data + offs;
 
-		if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
+		if (data.dtpda_ddesc->dtdd_uarg != DT_ECB_DEFAULT) {
 			rval = dt_handle(dtp, &data);
 
 			if (rval == DTRACE_CONSUME_NEXT)
@@ -1919,8 +1919,8 @@ again:
 		if (rval != DTRACE_CONSUME_THIS)
 			return (dt_set_errno(dtp, EDT_BADRVAL));
 
-		for (i = 0; i < epd->dtepd_nrecs; i++) {
-			dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
+		for (i = 0; i < dd->dtdd_nrecs; i++) {
+			dtrace_recdesc_t *rec = &dd->dtdd_recs[i];
 			dtrace_actkind_t act = rec->dtrd_action;
 
 			data.dtpda_data = buf->dtbd_data + offs +
@@ -1956,7 +1956,7 @@ again:
 					continue;
 
 				case DT_ACT_NORMALIZE:
-					if (i == epd->dtepd_nrecs - 1)
+					if (i == dd->dtdd_nrecs - 1)
 						return (dt_set_errno(dtp,
 						    EDT_BADNORMAL));
 
@@ -1974,12 +1974,12 @@ again:
 					caddr_t val;
 					int rv;
 
-					if (i == epd->dtepd_nrecs - 1) {
+					if (i == dd->dtdd_nrecs - 1) {
 						return (dt_set_errno(dtp,
 						    EDT_BADSETOPT));
 					}
 
-					valrec = &epd->dtepd_rec[++i];
+					valrec = &dd->dtdd_recs[++i];
 					valsize = valrec->dtrd_size;
 
 					if (valrec->dtrd_action != act ||
@@ -2009,7 +2009,7 @@ again:
 				}
 
 				case DT_ACT_TRUNC:
-					if (i == epd->dtepd_nrecs - 1)
+					if (i == dd->dtdd_nrecs - 1)
 						return (dt_set_errno(dtp,
 						    EDT_BADTRUNC));
 
@@ -2109,7 +2109,7 @@ again:
 				}
 
 				n = (*func)(dtp, fp, fmtdata, &data,
-				    rec, epd->dtepd_nrecs - i,
+				    rec, dd->dtdd_nrecs - i,
 				    (uchar_t *)buf->dtbd_data + offs,
 				    buf->dtbd_size - offs);
 
@@ -2126,7 +2126,7 @@ nofmt:
 				dtrace_print_aggdata_t pd;
 				dtrace_aggvarid_t *aggvars;
 				int j, naggvars = 0;
-				size_t size = ((epd->dtepd_nrecs - i) *
+				size_t size = ((dd->dtdd_nrecs - i) *
 				    sizeof (dtrace_aggvarid_t));
 
 				if ((aggvars = dt_alloc(dtp, size)) == NULL)
@@ -2138,11 +2138,11 @@ nofmt:
 				 * forward through the records until we find
 				 * a record from a different statement.
 				 */
-				for (j = i; j < epd->dtepd_nrecs; j++) {
+				for (j = i; j < dd->dtdd_nrecs; j++) {
 					dtrace_recdesc_t *nrec;
 					caddr_t naddr;
 
-					nrec = &epd->dtepd_rec[j];
+					nrec = &dd->dtdd_recs[j];
 
 					if (nrec->dtrd_uarg != rec->dtrd_uarg)
 						break;
@@ -2191,7 +2191,7 @@ nofmt:
 
 			if (act == DTRACEACT_TRACEMEM) {
 				n = dt_print_tracemem(dtp, fp, rec,
-				    epd->dtepd_nrecs - i,
+				    dd->dtdd_nrecs - i,
 				    buf->dtbd_data + offs);
 
 				if (n < 0)
@@ -2253,7 +2253,7 @@ nextrec:
 		 */
 		rval = (*rfunc)(&data, NULL, arg);
 nextepid:
-		offs += epd->dtepd_size;
+		offs += dd->dtdd_size;
 		last = id;
 	}
 
@@ -2293,7 +2293,7 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 	if (hdr->type == PERF_RECORD_SAMPLE) {
 		char			*ptr = data;
 		uint32_t		size, epid, tag;
-		int			i, nrecs;
+		int			i;
 		dtrace_probedata_t	pdat;
 
 		/*
@@ -2311,6 +2311,13 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 		if (ptr > buf + hdr->size)
 			return -1;
 
+		/*
+		 * Clear the probe data, and fill in data independent fields.
+		 */
+		memset(&pdat, 0, sizeof(pdat));
+		pdat.dtpda_handle = dtp;
+		pdat.dtpda_cpu = cpu;
+
 		size = *(uint32_t *)data;
 		data += sizeof(size);
 		ptr += sizeof(size) + size;
@@ -2320,21 +2327,20 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 		data += sizeof(uint32_t);		/* skip padding */
 		size -= sizeof(uint32_t);
 
-		epid = *(uint32_t *)data;
-		data += sizeof(uint32_t);
-		size -= sizeof(uint32_t);
+		epid = ((uint32_t *)data)[0];
+		tag = ((uint32_t *)data)[1];
 
-		tag = *(uint32_t *)data;
-		data += sizeof(uint32_t);
-		size -= sizeof(uint32_t);
+		/*
+		 * Fill in the epid and address of the epid in the buffer.  We
+		 * need to pass this to the efunc.
+		 */
+		pdat.dtpda_epid = epid;
+		pdat.dtpda_data = data;
 
-		memset(&pdat, 0, sizeof(pdat));
-		pdat.dtpda_handle = dtp;
-		pdat.dtpda_cpu = cpu;
-		rval = dt_epid_lookup(dtp, epid, &pdat.dtpda_edesc,
+		rval = dt_epid_lookup(dtp, epid, &pdat.dtpda_ddesc,
 						 &pdat.dtpda_pdesc);
 		if (rval != 0)
-			return (rval);
+			return rval;
 
 #if 0
 		if (flow)
@@ -2362,14 +2368,26 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 		/*
 		 * FIXME: This code is temporary.
 		 */
-		nrecs = size / sizeof(uint64_t);
-		for (i = 0; i < nrecs; i++) {
-			int	n;
+		for (i = 0; i < pdat.dtpda_ddesc->dtdd_nrecs; i++) {
+			int			n;
+			dtrace_recdesc_t	*rec;
+
+			rec = &pdat.dtpda_ddesc->dtdd_recs[i];
+
+			pdat.dtpda_data = data + rec->dtrd_offset;
+			rval = (*rfunc)(&pdat, rec, arg);
+
+			if (rval == DTRACE_CONSUME_NEXT)
+				continue;
+
+			if (rval == DTRACE_CONSUME_ABORT)
+				return dt_set_errno(dtp, EDT_DIRABORT);
+
+			if (rval != DTRACE_CONSUME_THIS)
+				return dt_set_errno(dtp, EDT_BADRVAL);
 
 			n = dt_printf(dtp, fp, quiet ? "%lld" : " %16lld",
-				      *(uint64_t *)data);
-			data += sizeof(uint64_t);
-			size -= sizeof(uint64_t);
+				      *(int64_t *)pdat.dtpda_data);
 
 			if (n < 0)
 				return -1;
diff --git a/libdtrace/dt_handle.c b/libdtrace/dt_handle.c
index ead5e198..c678faff 100644
--- a/libdtrace/dt_handle.c
+++ b/libdtrace/dt_handle.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2007, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -126,12 +126,12 @@ dtrace_handle_setopt(dtrace_hdl_t *dtp, dtrace_handle_setopt_f *hdlr,
 }
 
 #define	DT_REC(type, ndx) *((type *)((uintptr_t)data->dtpda_data + \
-    epd->dtepd_rec[(ndx)].dtrd_offset))
+    dd->dtdd_recs[(ndx)].dtrd_offset))
 
 static int
 dt_handle_err(dtrace_hdl_t *dtp, dtrace_probedata_t *data)
 {
-	dtrace_eprobedesc_t *epd = data->dtpda_edesc, *errepd;
+	dtrace_datadesc_t *dd = data->dtpda_ddesc, *errdd;
 	dtrace_probedesc_t *pd = data->dtpda_pdesc, *errpd;
 	dtrace_errdata_t err;
 	dtrace_epid_t epid;
@@ -144,9 +144,9 @@ dt_handle_err(dtrace_hdl_t *dtp, dtrace_probedata_t *data)
 	char *str;
 	int len;
 
-	assert(epd->dtepd_uarg == DT_ECB_ERROR);
+	assert(dd->dtdd_uarg == DT_ECB_ERROR);
 
-	if (epd->dtepd_nrecs != 5 || strcmp(pd->prv, "dtrace") != 0 ||
+	if (dd->dtdd_nrecs != 5 || strcmp(pd->prv, "dtrace") != 0 ||
 	    strcmp(pd->prb, "ERROR") != 0)
 		return (dt_set_errno(dtp, EDT_BADERROR));
 
@@ -156,10 +156,10 @@ dt_handle_err(dtrace_hdl_t *dtp, dtrace_probedata_t *data)
 	 */
 	epid = (uint32_t)DT_REC(uint64_t, 0);
 
-	if (dt_epid_lookup(dtp, epid, &errepd, &errpd) != 0)
+	if (dt_epid_lookup(dtp, epid, &errdd, &errpd) != 0)
 		return (dt_set_errno(dtp, EDT_BADERROR));
 
-	err.dteda_edesc = errepd;
+	err.dteda_ddesc = errdd;
 	err.dteda_pdesc = errpd;
 	err.dteda_cpu = data->dtpda_cpu;
 	err.dteda_action = (int)DT_REC(uint64_t, 1);
@@ -225,7 +225,7 @@ dt_handle_liberr(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
 	char *str;
 	int len;
 
-	err.dteda_edesc = data->dtpda_edesc;
+	err.dteda_ddesc = data->dtpda_ddesc;
 	err.dteda_pdesc = errpd;
 	err.dteda_cpu = data->dtpda_cpu;
 	err.dteda_action = -1;
@@ -238,10 +238,10 @@ dt_handle_liberr(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
 
 	str = alloca(len);
 
-	snprintf(str, len, "error on enabled probe ID %u (ID %u: %s:%s:%s:%s): "
-			   "%s\n",
-		 data->dtpda_edesc->dtepd_epid, errpd->id, errpd->prv,
-		 errpd->mod, errpd->fun, errpd->prb, faultstr);
+	snprintf(str, len,
+		 "error on enabled probe ID %u (ID %u: %s:%s:%s:%s): %s\n",
+		 data->dtpda_epid, errpd->id, errpd->prv, errpd->mod,
+		 errpd->fun, errpd->prb, faultstr);
 
 	err.dteda_msg = str;
 
@@ -443,10 +443,10 @@ dt_handle_setopt(dtrace_hdl_t *dtp, dtrace_setoptdata_t *data)
 int
 dt_handle(dtrace_hdl_t *dtp, dtrace_probedata_t *data)
 {
-	dtrace_eprobedesc_t *epd = data->dtpda_edesc;
+	dtrace_datadesc_t *dd = data->dtpda_ddesc;
 	int rval;
 
-	switch (epd->dtepd_uarg) {
+	switch (dd->dtdd_uarg) {
 	case DT_ECB_ERROR:
 		rval = dt_handle_err(dtp, data);
 		break;
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index c20d30c9..536b1745 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -290,7 +290,7 @@ struct dtrace_hdl {
 	ctf_id_t dt_type_usymaddr; /* cached CTF ident. for _usymaddr type */
 	dtrace_epid_t dt_nextepid; /* next enabled probe ID to assign */
 	size_t dt_maxprobe;	/* max enabled probe ID */
-	dtrace_eprobedesc_t **dt_edesc; /* enabled probe descriptions */
+	dtrace_datadesc_t **dt_ddesc; /* probe data descriptions */
 	dtrace_probedesc_t **dt_pdesc; /* probe descriptions for enabled prbs */
 	size_t dt_maxagg;	/* max aggregation ID */
 	dtrace_aggdesc_t **dt_aggdesc; /* aggregation descriptions */
@@ -709,10 +709,16 @@ extern int dt_aggregate_go(dtrace_hdl_t *);
 extern int dt_aggregate_init(dtrace_hdl_t *);
 extern void dt_aggregate_destroy(dtrace_hdl_t *);
 
-extern dtrace_epid_t dt_epid_add(dtrace_hdl_t *, dtrace_id_t, int);
-extern int dt_epid_lookup(dtrace_hdl_t *, dtrace_epid_t,
-    dtrace_eprobedesc_t **, dtrace_probedesc_t **);
+extern dtrace_datadesc_t *dt_datadesc_create(dtrace_hdl_t *);
+extern int dt_datadesc_finalize(dtrace_hdl_t *, dtrace_datadesc_t *);
+extern dtrace_epid_t dt_epid_add(dtrace_hdl_t *, dtrace_datadesc_t *,
+				 dtrace_id_t);
+extern int dt_epid_lookup(dtrace_hdl_t *, dtrace_epid_t, dtrace_datadesc_t **,
+			  dtrace_probedesc_t **);
 extern void dt_epid_destroy(dtrace_hdl_t *);
+typedef void (*dt_cg_gap_f)(dt_pcb_t *, int);
+extern uint32_t dt_rec_add(dtrace_hdl_t *, dt_cg_gap_f, dtrace_actkind_t,
+			   uint32_t, uint16_t, const char *, uint64_t);
 extern int dt_aggid_lookup(dtrace_hdl_t *, dtrace_aggid_t, dtrace_aggdesc_t **);
 extern void dt_aggid_destroy(dtrace_hdl_t *);
 
diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
index 48ffa15d..58933776 100644
--- a/libdtrace/dt_map.c
+++ b/libdtrace/dt_map.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -11,32 +11,95 @@
 #include <assert.h>
 
 #include <dt_impl.h>
+#include <dt_pcb.h>
 #include <dt_probe.h>
 #include <dt_printf.h>
 
+static void
+dt_datadesc_hold(dtrace_datadesc_t *ddp)
+{
+	ddp->dtdd_refcnt++;
+}
+
+static void
+dt_datadesc_release(dtrace_hdl_t *dtp, dtrace_datadesc_t *ddp)
+{
+	if (--ddp->dtdd_refcnt > 0)
+		return;
+
+	dt_free(dtp, ddp->dtdd_recs);
+	dt_free(dtp, ddp);
+}
+
+dtrace_datadesc_t *
+dt_datadesc_create(dtrace_hdl_t *dtp)
+{
+	dtrace_datadesc_t	*ddp;
+
+	ddp = dt_zalloc(dtp, sizeof(dtrace_datadesc_t));
+	if (ddp == NULL) {
+		dt_set_errno(dtp, EDT_NOMEM);
+		return NULL;
+	}
+
+	dt_datadesc_hold(ddp);
+
+	return ddp;
+}
+
+int
+dt_datadesc_finalize(dtrace_hdl_t *dtp, dtrace_datadesc_t *ddp)
+{
+	dt_pcb_t	*pcb = dtp->dt_pcb;
+
+	/*
+	 * If the number of allocated data records is greater than the actual
+	 * number needed, we create a copy with the right number of records and
+	 * free the oversized one.
+	 */
+	if (pcb->pcb_nrecs < pcb->pcb_maxrecs) {
+		dtrace_recdesc_t	*nrecs;
+
+		nrecs = dt_calloc(dtp, pcb->pcb_nrecs,
+				  sizeof(dtrace_recdesc_t));
+		if (nrecs == NULL)
+			return dt_set_errno(dtp, EDT_NOMEM);
+
+		memcpy(nrecs, ddp->dtdd_recs,
+		       pcb->pcb_nrecs * sizeof(dtrace_recdesc_t));
+		dt_free(dtp, ddp->dtdd_recs);
+		ddp->dtdd_recs = nrecs;
+		pcb->pcb_maxrecs = pcb->pcb_nrecs;
+	}
+
+	ddp->dtdd_nrecs = pcb->pcb_nrecs;
+
+	return 0;
+}
+
 #if 0
 static int
 dt_epid_add(dtrace_hdl_t *dtp, dtrace_epid_t id)
 {
 	dtrace_id_t max;
 	int rval, i, maxformat;
-	dtrace_eprobedesc_t *enabled, *nenabled;
+	dtrace_datadesc_t *dd, *ndd;
 	dtrace_probedesc_t *probe;
 
 	while (id >= (max = dtp->dt_maxprobe) || dtp->dt_pdesc == NULL) {
 		dtrace_id_t new_max = max ? (max << 1) : 1;
 		size_t nsize = new_max * sizeof (void *);
 		dtrace_probedesc_t **new_pdesc;
-		dtrace_eprobedesc_t **new_edesc;
+		dtrace_datadesc_t **new_ddesc;
 
 		if ((new_pdesc = malloc(nsize)) == NULL ||
-		    (new_edesc = malloc(nsize)) == NULL) {
+		    (new_ddesc = malloc(nsize)) == NULL) {
 			free(new_pdesc);
 			return (dt_set_errno(dtp, EDT_NOMEM));
 		}
 
 		memset(new_pdesc, 0, nsize);
-		memset(new_edesc, 0, nsize);
+		memset(new_ddesc, 0, nsize);
 
 		if (dtp->dt_pdesc != NULL) {
 			size_t osize = max * sizeof (void *);
@@ -44,69 +107,68 @@ dt_epid_add(dtrace_hdl_t *dtp, dtrace_epid_t id)
 			memcpy(new_pdesc, dtp->dt_pdesc, osize);
 			free(dtp->dt_pdesc);
 
-			memcpy(new_edesc, dtp->dt_edesc, osize);
-			free(dtp->dt_edesc);
+			memcpy(new_ddesc, dtp->dt_ddesc, osize);
+			free(dtp->dt_ddesc);
 		}
 
 		dtp->dt_pdesc = new_pdesc;
-		dtp->dt_edesc = new_edesc;
+		dtp->dt_ddesc = new_ddesc;
 		dtp->dt_maxprobe = new_max;
 	}
 
 	if (dtp->dt_pdesc[id] != NULL)
 		return (0);
 
-	if ((enabled = malloc(sizeof (dtrace_eprobedesc_t))) == NULL)
+	if ((dd = malloc(sizeof (dtrace_datadesc_t))) == NULL)
 		return (dt_set_errno(dtp, EDT_NOMEM));
 
-	memset(enabled, 0, sizeof (dtrace_eprobedesc_t));
-	enabled->dtepd_epid = id;
-	enabled->dtepd_nrecs = 1;
+	memset(dd, 0, sizeof (dtrace_datadesc_t));
+	dd->dtdd_epid = id;
+	dd->dtdd_nrecs = 1;
 
-	if (dt_ioctl(dtp, DTRACEIOC_EPROBE, enabled) == -1) {
+	if (dt_ioctl(dtp, DTRACEIOC_EPROBE, dd) == -1) {
 		rval = dt_set_errno(dtp, errno);
-		free(enabled);
+		free(dd);
 		return (rval);
 	}
 
-	if (DTRACE_SIZEOF_EPROBEDESC(enabled) != sizeof (*enabled)) {
+	if (DTRACE_SIZEOF_EPROBEDESC(dd) != sizeof (*dd)) {
 		/*
 		 * There must be more than one action.  Allocate the
 		 * appropriate amount of space and try again.
 		 */
-		if ((nenabled =
-		    malloc(DTRACE_SIZEOF_EPROBEDESC(enabled))) != NULL)
-			memcpy(nenabled, enabled, sizeof (*enabled));
+		if ((ndd = malloc(DTRACE_SIZEOF_EPROBEDESC(dd))) != NULL)
+			memcpy(ndd, dd, sizeof (*dd));
 
-		free(enabled);
+		free(dd);
 
-		if ((enabled = nenabled) == NULL)
+		if ((dd = ndd) == NULL)
 			return (dt_set_errno(dtp, EDT_NOMEM));
 
-		rval = dt_ioctl(dtp, DTRACEIOC_EPROBE, enabled);
+		rval = dt_ioctl(dtp, DTRACEIOC_EPROBE, dd);
 
 		if (rval == -1) {
 			rval = dt_set_errno(dtp, errno);
-			free(enabled);
+			free(dd);
 			return (rval);
 		}
 	}
 
 	if ((probe = malloc(sizeof (dtrace_probedesc_t))) == NULL) {
-		free(enabled);
+		free(dd);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
-	probe->id = enabled->dtepd_probeid;
+	probe->id = dd->dtdd_probeid;
 
 	if (dt_ioctl(dtp, DTRACEIOC_PROBES, probe) == -1) {
 		rval = dt_set_errno(dtp, errno);
 		goto err;
 	}
 
-	for (i = 0; i < enabled->dtepd_nrecs; i++) {
+	for (i = 0; i < dd->dtdd_nrecs; i++) {
 		dtrace_fmtdesc_t fmt;
-		dtrace_recdesc_t *rec = &enabled->dtepd_rec[i];
+		dtrace_recdesc_t *rec = &dd->dtdd_rec[i];
 
 		if (!DTRACEACT_ISPRINTFLIKE(rec->dtrd_action))
 			continue;
@@ -173,7 +235,7 @@ dt_epid_add(dtrace_hdl_t *dtp, dtrace_epid_t id)
 	}
 
 	dtp->dt_pdesc[id] = probe;
-	dtp->dt_edesc[id] = enabled;
+	dtp->dt_ddesc[id] = dd;
 
 	return (0);
 
@@ -184,70 +246,56 @@ err:
 	 * we have already allocated.  This is okay; these formats are
 	 * hanging off of dt_formats and will therefore not be leaked.
 	 */
-	free(enabled);
+	free(dd);
 	free(probe);
 	return (rval);
 }
 #else
+/*
+ * Associate a probe data description and probe description with an enabled
+ * probe ID.  This means that the given ID refers to the program matching the
+ * probe data description being attached to the probe that matches the probe
+ * description.
+ */
 dtrace_epid_t
-dt_epid_add(dtrace_hdl_t *dtp, dtrace_id_t prid, int nrecs)
+dt_epid_add(dtrace_hdl_t *dtp, dtrace_datadesc_t *ddp, dtrace_id_t prid)
 {
-	dtrace_id_t		max;
+	dtrace_id_t		max = dtp->dt_maxprobe;
 	dtrace_epid_t		epid;
-	dtrace_eprobedesc_t	*eprobe;
 
 	epid = dtp->dt_nextepid++;
-	max = dtp->dt_maxprobe;
-	if (epid >= max || dtp->dt_edesc == NULL) {
+	if (epid >= max || dtp->dt_ddesc == NULL) {
 		dtrace_id_t		nmax = max ? (max << 1) : 2;
-		dtrace_eprobedesc_t	**nedesc;
+		dtrace_datadesc_t	**nddesc;
 		dtrace_probedesc_t	**npdesc;
 
-		nedesc = dt_calloc(dtp, nmax, sizeof(void *));
+		nddesc = dt_calloc(dtp, nmax, sizeof(void *));
 		npdesc = dt_calloc(dtp, nmax, sizeof(void *));
-		if (nedesc == NULL || npdesc == NULL) {
-			dt_free(dtp, nedesc);
+		if (nddesc == NULL || npdesc == NULL) {
+			dt_free(dtp, nddesc);
 			dt_free(dtp, npdesc);
 			return dt_set_errno(dtp, EDT_NOMEM);
 		}
 
-		if (dtp->dt_edesc != NULL) {
+		if (dtp->dt_ddesc != NULL) {
 			size_t	osize = max * sizeof(void *);
 
-			memcpy(nedesc, dtp->dt_edesc, osize);
-			dt_free(dtp, dtp->dt_edesc);
+			memcpy(nddesc, dtp->dt_ddesc, osize);
+			dt_free(dtp, dtp->dt_ddesc);
 			memcpy(npdesc, dtp->dt_pdesc, osize);
 			dt_free(dtp, dtp->dt_pdesc);
 		}
 
-		dtp->dt_edesc = nedesc;
+		dtp->dt_ddesc = nddesc;
 		dtp->dt_pdesc = npdesc;
 		dtp->dt_maxprobe = nmax;
 	}
 
-	if (dtp->dt_edesc[epid] != NULL)
+	if (dtp->dt_ddesc[epid] != NULL)
 		return epid;
 
-	/*
-	 * This is a bit odd, but we want to make sure that nrecs is not a
-	 * negative value while also taking into consideration that there is
-	 * always rooms for one record in dtrace_eprobedesc_t, so for our
-	 * allocation purposes we account for that default record slot.
-	 */
-	assert(nrecs >= 0);
-	if (nrecs == 0)
-		nrecs = 1;
-
-	eprobe = dt_zalloc(dtp, sizeof(dtrace_eprobedesc_t) +
-				(nrecs - 1) * sizeof(dtrace_recdesc_t));
-	if (eprobe == NULL)
-		return dt_set_errno(dtp, EDT_NOMEM);
-
-	eprobe->dtepd_epid = epid;
-	eprobe->dtepd_probeid = prid;
-	eprobe->dtepd_nrecs = nrecs;
-
-	dtp->dt_edesc[epid] = eprobe;
+	dt_datadesc_hold(ddp);
+	dtp->dt_ddesc[epid] = ddp;
 	dtp->dt_pdesc[epid] = (dtrace_probedesc_t *)dtp->dt_probes[prid]->desc;
 
 	return epid;
@@ -255,22 +303,14 @@ dt_epid_add(dtrace_hdl_t *dtp, dtrace_id_t prid, int nrecs)
 #endif
 
 int
-dt_epid_lookup(dtrace_hdl_t *dtp, dtrace_epid_t epid,
-    dtrace_eprobedesc_t **epdp, dtrace_probedesc_t **pdp)
+dt_epid_lookup(dtrace_hdl_t *dtp, dtrace_epid_t epid, dtrace_datadesc_t **ddp,
+	       dtrace_probedesc_t **pdp)
 {
-	int rval;
-
-#if 0
-	if (epid >= dtp->dt_maxprobe || dtp->dt_pdesc[epid] == NULL) {
-		if ((rval = dt_epid_add(dtp, epid)) != 0)
-			return (rval);
-	}
-#endif
-
 	assert(epid < dtp->dt_maxprobe);
-	assert(dtp->dt_edesc[epid] != NULL);
+	assert(dtp->dt_ddesc[epid] != NULL);
 	assert(dtp->dt_pdesc[epid] != NULL);
-	*epdp = dtp->dt_edesc[epid];
+
+	*ddp = dtp->dt_ddesc[epid];
 	*pdp = dtp->dt_pdesc[epid];
 
 	return (0);
@@ -281,33 +321,84 @@ dt_epid_destroy(dtrace_hdl_t *dtp)
 {
 	size_t i;
 
-	assert((dtp->dt_pdesc != NULL && dtp->dt_edesc != NULL &&
+	assert((dtp->dt_pdesc != NULL && dtp->dt_ddesc != NULL &&
 	    dtp->dt_maxprobe > 0) || (dtp->dt_pdesc == NULL &&
-	    dtp->dt_edesc == NULL && dtp->dt_maxprobe == 0));
+	    dtp->dt_ddesc == NULL && dtp->dt_maxprobe == 0));
 
 	if (dtp->dt_pdesc == NULL)
 		return;
 
 	for (i = 0; i < dtp->dt_maxprobe; i++) {
-		if (dtp->dt_edesc[i] == NULL) {
+		if (dtp->dt_ddesc[i] == NULL) {
 			assert(dtp->dt_pdesc[i] == NULL);
 			continue;
 		}
 
+		dt_datadesc_release(dtp, dtp->dt_ddesc[i]);
 		assert(dtp->dt_pdesc[i] != NULL);
-		free(dtp->dt_edesc[i]);
-		free(dtp->dt_pdesc[i]);
 	}
 
 	free(dtp->dt_pdesc);
 	dtp->dt_pdesc = NULL;
 
-	free(dtp->dt_edesc);
-	dtp->dt_edesc = NULL;
+	free(dtp->dt_ddesc);
+	dtp->dt_ddesc = NULL;
 	dtp->dt_nextepid = 0;
 	dtp->dt_maxprobe = 0;
 }
 
+uint32_t
+dt_rec_add(dtrace_hdl_t *dtp, dt_cg_gap_f gapf, dtrace_actkind_t kind,
+	   uint32_t size, uint16_t alignment, const char *fmt, uint64_t arg)
+{
+	dt_pcb_t		*pcb = dtp->dt_pcb;
+	uint32_t		off;
+	uint32_t		gap;
+	dtrace_datadesc_t	*ddp = pcb->pcb_stmt->dtsd_ddesc;
+	dtrace_recdesc_t	*rec;
+	int			cnt, max;
+
+	assert(gapf);
+
+	cnt = pcb->pcb_nrecs + 1;
+	max = pcb->pcb_maxrecs;
+	if (cnt >= max) {
+		int			nmax = max ? (max << 1) : cnt;
+		dtrace_recdesc_t	*nrecs;
+
+		nrecs = dt_calloc(dtp, nmax, sizeof(dtrace_recdesc_t));
+		if (nrecs == NULL)
+			longjmp(pcb->pcb_jmpbuf, EDT_NOMEM);
+
+		if (ddp->dtdd_recs != NULL) {
+			size_t	osize = max * sizeof(dtrace_recdesc_t);
+
+			memcpy(nrecs, ddp->dtdd_recs, osize);
+			dt_free(dtp, ddp->dtdd_recs);
+		}
+
+		ddp->dtdd_recs = nrecs;
+		pcb->pcb_maxrecs = nmax;
+	}
+
+	rec = &ddp->dtdd_recs[pcb->pcb_nrecs++];
+	off = (pcb->pcb_bufoff + (alignment - 1)) & ~(alignment - 1);
+	rec->dtrd_action = kind;
+	rec->dtrd_size = size;
+	rec->dtrd_offset = off;
+	rec->dtrd_alignment = alignment;
+	rec->dtrd_format = 0;	/* FIXME */
+	rec->dtrd_arg = arg;
+
+	gap = off - pcb->pcb_bufoff;
+	if (gap > 0)
+		(*gapf)(pcb, gap);
+
+	pcb->pcb_bufoff = off + size;
+
+	return off;
+}
+
 void *
 dt_format_lookup(dtrace_hdl_t *dtp, int format)
 {
@@ -338,7 +429,6 @@ static int
 dt_aggid_add(dtrace_hdl_t *dtp, dtrace_aggid_t id)
 {
 	dtrace_id_t max;
-	dtrace_epid_t epid;
 	int rval;
 
 	while (id >= (max = dtp->dt_maxagg) || dtp->dt_aggdesc == NULL) {
@@ -438,8 +528,7 @@ dt_aggid_add(dtrace_hdl_t *dtp, dtrace_aggid_t id)
 }
 
 int
-dt_aggid_lookup(dtrace_hdl_t *dtp, dtrace_aggid_t aggid,
-    dtrace_aggdesc_t **adp)
+dt_aggid_lookup(dtrace_hdl_t *dtp, dtrace_aggid_t aggid, dtrace_aggdesc_t **adp)
 {
 	int rval;
 
diff --git a/libdtrace/dt_pcb.h b/libdtrace/dt_pcb.h
index 8a73e7da..95d2fc8d 100644
--- a/libdtrace/dt_pcb.h
+++ b/libdtrace/dt_pcb.h
@@ -41,7 +41,7 @@ typedef struct dt_pcb {
 	dt_idhash_t *pcb_idents; /* current hash table of ambiguous idents */
 	dt_idhash_t *pcb_pragmas; /* current hash table of pending pragmas */
 	dt_regset_t *pcb_regs;	/* register set for code generation */
-	uint_t pcb_bufoff;	/* output buffer offset (for DFUNCs) */
+	uint32_t pcb_bufoff;	/* output buffer offset (for DFUNCs) */
 	dt_irlist_t pcb_ir;	/* list of unrelocated IR instructions */
 	uint_t pcb_exitlbl;	/* label for exit of program */
 	uint_t pcb_asvidx;	/* assembler vartab index (see dt_as.c) */
@@ -50,6 +50,8 @@ typedef struct dt_pcb {
 	const dtrace_probedesc_t *pcb_pdesc; /* probedesc for current context */
 	struct dt_probe *pcb_probe; /* probe associated with current context */
 	dtrace_probeinfo_t pcb_pinfo; /* info associated with current context */
+	int pcb_nrecs;		/* number of data record descriptions */
+	int pcb_maxrecs;	/* alloc'd number of data record descriptions */
 	dtrace_attribute_t pcb_amin; /* stability minimum for compilation */
 	dt_node_t *pcb_dret;	/* node containing return type for assembler */
 	dtrace_difo_t *pcb_difo; /* intermediate DIF object made by assembler */
diff --git a/libdtrace/dt_program.c b/libdtrace/dt_program.c
index 5555b176..b154668d 100644
--- a/libdtrace/dt_program.c
+++ b/libdtrace/dt_program.c
@@ -220,17 +220,26 @@ dt_ecbdesc_create(dtrace_hdl_t *dtp, const dtrace_probedesc_t *pdp)
 dtrace_stmtdesc_t *
 dtrace_stmt_create(dtrace_hdl_t *dtp, dtrace_ecbdesc_t *edp)
 {
-	dtrace_stmtdesc_t *sdp;
+	dtrace_stmtdesc_t	*sdp;
+	dtrace_datadesc_t	*ddp;
 
-	if ((sdp = dt_zalloc(dtp, sizeof (dtrace_stmtdesc_t))) == NULL)
-		return (NULL);
+	sdp = dt_zalloc(dtp, sizeof(dtrace_stmtdesc_t));
+	if (sdp == NULL)
+		return NULL;
+
+	ddp = dt_datadesc_create(dtp);
+	if (ddp == NULL) {
+		dt_free(dtp, sdp);
+		return NULL;
+	}
 
 	dt_ecbdesc_hold(edp);
 	sdp->dtsd_ecbdesc = edp;
+	sdp->dtsd_ddesc = ddp;
 	sdp->dtsd_descattr = _dtrace_defattr;
 	sdp->dtsd_stmtattr = _dtrace_defattr;
 
-	return (sdp);
+	return sdp;
 }
 
 dtrace_actdesc_t *
@@ -346,6 +355,7 @@ dtrace_stmt_destroy(dtrace_hdl_t *dtp, dtrace_stmtdesc_t *sdp)
 		dt_printf_destroy(sdp->dtsd_fmtdata);
 
 	dt_ecbdesc_release(dtp, sdp->dtsd_ecbdesc);
+	dt_free(dtp, sdp->dtsd_ddesc);
 	dt_free(dtp, sdp);
 }
 
diff --git a/libdtrace/dtrace.h b/libdtrace/dtrace.h
index 13844324..89d08813 100644
--- a/libdtrace/dtrace.h
+++ b/libdtrace/dtrace.h
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2007, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -135,6 +135,7 @@ typedef struct dtrace_stmtdesc {
 	dtrace_ecbdesc_t *dtsd_ecbdesc;		/* ECB description */
 	dtrace_actdesc_t *dtsd_action;		/* action list */
 	dtrace_actdesc_t *dtsd_action_last;	/* last action in action list */
+	dtrace_datadesc_t *dtsd_ddesc;		/* probe data description */
 	void *dtsd_aggdata;			/* aggregation data */
 	void *dtsd_fmtdata;			/* type-specific output data */
 	void (*dtsd_callback)();		/* callback function for EPID */
@@ -173,7 +174,8 @@ typedef enum {
 
 typedef struct dtrace_probedata {
 	dtrace_hdl_t *dtpda_handle;		/* handle to DTrace library */
-	dtrace_eprobedesc_t *dtpda_edesc;	/* enabled probe description */
+	dtrace_epid_t dtpda_epid;		/* enabled probe ID */
+	dtrace_datadesc_t *dtpda_ddesc;		/* probe data description */
 	dtrace_probedesc_t *dtpda_pdesc;	/* probe description */
 	unsigned int dtpda_cpu;			/* CPU for data */
 	caddr_t dtpda_data;			/* pointer to raw data */
@@ -262,7 +264,7 @@ extern dtrace_workstatus_t dtrace_work(dtrace_hdl_t *dtp, FILE *fp,
 
 typedef struct dtrace_errdata {
 	dtrace_hdl_t *dteda_handle;		/* handle to DTrace library */
-	dtrace_eprobedesc_t *dteda_edesc;	/* enabled probe inducing err */
+	dtrace_datadesc_t *dteda_ddesc;		/* probe data inducing err */
 	dtrace_probedesc_t *dteda_pdesc;	/* probe inducing error */
 	unsigned int dteda_cpu;			/* CPU of error */
 	int dteda_action;			/* action inducing error */
@@ -355,7 +357,7 @@ extern int dtrace_handle_setopt(dtrace_hdl_t *dtp,
 struct dtrace_aggdata {
 	dtrace_hdl_t *dtada_handle;		/* handle to DTrace library */
 	dtrace_aggdesc_t *dtada_desc;		/* aggregation description */
-	dtrace_eprobedesc_t *dtada_edesc;	/* enabled probe description */
+	dtrace_datadesc_t *dtada_ddesc;		/* probe data description */
 	dtrace_probedesc_t *dtada_pdesc;	/* probe description */
 	caddr_t dtada_data;			/* pointer to raw data */
 	uint64_t dtada_normal;			/* the normal -- 1 for denorm */
-- 
2.26.0




More information about the DTrace-devel mailing list