[DTrace-devel] [PATCH] Implementation of the exit() action

Kris Van Hees kris.van.hees at oracle.com
Wed Apr 8 08:00:16 PDT 2020


The exit() action is quite instrumental to the use of DTrace because it
allows a tracing script to terminate the tracing session.  In order to
implement it, various improvements needed to be made to the existing
code.

1. Support is being added for tracing data elements that are less than
   8 bytes.  E.g. the exit() action passes a 32-bit value as argument,
   to be used as return value for dtrace.  To ensure that data items are
   aligned properly in the data buffer, appropriate padding is inserted
   into the buffer between the previously stored item and the one we try
   to store.  E.g. if a 32-bit value was stored at offset 16, and we want
   to store a 64-bit value next, code will be generated to store a 4-byte
   zero value as padding at offset 20 so that the 64-bit value will be
   stored at offset 24 (which is properly aligned) for 8-byte data items.

2. The dt_rec_add() function has been modified to update the pcb_bufoff
   value to be the next offset to store data at, and it returns the offset
   to store the current data item at.  We now also pass in a gap fill
   function to be used to insert the proper number of padding bytes (all
   zero).  This means that dt_rec_add() must be called prior to generating
   the instructions to actually store the data.

3. The dtrace_probedata_t struct passed to data processing/output
   functions provided by the consumer was not passing the correct value
   for dtpda_data, i.e. the address of the data item in the trace data
   buffer.  We now make sure it is set to the address of the location of
   the EPID when the efunc is called, and we set it to the address of
   each data item for the rfunc calls.

4. The rfunc record processor must be passed one of the API defined
   action constants (DTRACEACT_*) to enable it to make the right
   decisions about each specific probe data item.  We also provide the
   libdtrace internal ST_ACT_* as optional library specific argument.

5. Because exit() must be able to signal that trace data processing is
   done, dt_consume_one(), dt_consume_cpu(), and dt_consume() have been
   updated to ret8rn a dtrace_workstatus_t value to indicate whether
   processing of trace data is to continue (DTRACE_WORKSTATUS_OKAY),
   is done (DTRACE_WORKSTATUS_DONE), or whether an error occurred
   (DTRACE_WORKSTATUS_ERROR).

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 libdtrace/dt_cg.c      | 59 ++++++++++++++++++++++++++-----
 libdtrace/dt_consume.c | 79 +++++++++++++++++++++++++++++-------------
 libdtrace/dt_impl.h    |  5 +--
 libdtrace/dt_map.c     | 26 ++++++++++----
 libdtrace/dt_pcb.h     |  2 +-
 libdtrace/dt_work.c    |  9 +----
 6 files changed, 128 insertions(+), 52 deletions(-)

diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 17074661..03fea709 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -126,7 +126,10 @@ dt_cg_prologue(dt_pcb_t *pcb)
 	instr = BPF_STORE_IMM(BPF_W, BPF_REG_9, 4, 0);
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 
-	pcb->pcb_bufoff += sizeof(uint64_t);
+	/*
+	 * Account for epid (at offset 0) and 4-byte padding (at offset 4).
+	 */
+	pcb->pcb_bufoff += 2 * sizeof(uint32_t);
 }
 
 /*
@@ -187,6 +190,38 @@ dt_cg_epilogue(dt_pcb_t *pcb)
 	dt_irlist_append(dlp, dt_cg_node_alloc(pcb->pcb_exitlbl, instr));
 }
 
+/*
+ * Generate an instruction sequence to fill a gap in the output buffer with 0
+ * values.  This is used to ensure that there are no uninitialized bytes in the
+ * output buffer that could result from alignment requirements for data
+ * records.
+ */
+static void
+dt_cg_fill_gap(dt_pcb_t *pcb, int gap)
+{
+	struct bpf_insn instr;
+	dt_irlist_t *dlp = &pcb->pcb_ir;
+	uint_t off = pcb->pcb_bufoff;
+
+	if (gap == 0)
+		return;
+
+	if (gap & 1) {
+		instr = BPF_STORE_IMM(BPF_B, BPF_REG_9, off, 0);
+		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+		off += 1;
+	}
+	if (gap & 2) {
+		instr = BPF_STORE_IMM(BPF_H, BPF_REG_9, off, 0);
+		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+		off += 2;
+	}
+	if (gap & 4) {
+		instr = BPF_STORE_IMM(BPF_W, BPF_REG_9, off, 0);
+		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	}
+}
+
 static size_t
 dt_cg_act_breakpoint(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
@@ -329,14 +364,18 @@ dt_cg_act_exit(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 {
 	struct bpf_insn instr;
 	dt_irlist_t *dlp = &pcb->pcb_ir;
-	uint_t off = pcb->pcb_bufoff;
+	uint_t off;
 
 	dt_cg_node(dnp->dn_args, &pcb->pcb_ir, pcb->pcb_regs);
 
-	instr = BPF_STORE(BPF_DW, BPF_REG_9, off, BPF_REG_0);	/* FIXME */
+	off = dt_rec_add(pcb->pcb_hdl, dt_cg_fill_gap, DTRACEACT_EXIT,
+			 sizeof(uint32_t), sizeof(uint32_t), NULL, DT_ACT_EXIT);
+
+	instr = BPF_STORE(BPF_W, BPF_REG_9, off, dnp->dn_args->dn_reg);
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	dt_regset_free(pcb->pcb_regs, dnp->dn_args->dn_reg);
 
-	return sizeof(uint64_t);
+	return sizeof(uint32_t);
 }
 
 static size_t
@@ -470,7 +509,7 @@ dt_cg_act_trace(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 	struct bpf_insn instr;
 	dt_irlist_t *dlp = &pcb->pcb_ir;
 	char n[DT_TYPE_NAMELEN];
-	uint_t off = pcb->pcb_bufoff;
+	uint_t off;
 
 	if (dt_node_is_void(dnp->dn_args)) {
 		dnerror(dnp->dn_args, D_TRACE_VOID,
@@ -486,14 +525,16 @@ dt_cg_act_trace(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 	dt_cg_node(dnp->dn_args, &pcb->pcb_ir, pcb->pcb_regs);
 
 	if (dt_node_is_scalar(dnp->dn_args)) {
+		off = dt_rec_add(pcb->pcb_hdl, dt_cg_fill_gap,
+				 DTRACEACT_DIFEXPR, sizeof(uint64_t),
+				 sizeof(uint64_t), NULL, DT_ACT_TRACE);
+
 		instr = BPF_STORE(BPF_DW, BPF_REG_9, off, dnp->dn_args->dn_reg);
 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 		dt_regset_free(pcb->pcb_regs, dnp->dn_args->dn_reg);
 
-		dt_rec_add(pcb->pcb_hdl, DT_ACT_TRACE, sizeof(uint64_t), off,
-			   sizeof(uint64_t), NULL, 0); 
-
 		return sizeof(uint64_t);
+#if 0
 	} else if (dt_node_is_string(dnp->dn_args)) {
 		size_t sz = dt_node_type_size(dnp->dn_args);
 
@@ -516,6 +557,7 @@ dt_cg_act_trace(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 		dt_regset_free(pcb->pcb_regs, dnp->dn_args->dn_reg);
 
 		return sz + sizeof(uint64_t);
+#endif
 	} else
 		dnerror(dnp->dn_args, D_PROTO_ARG,
 			"trace( ) argument #1 is incompatible with prototype:\n"
@@ -2727,7 +2769,6 @@ dt_cg(dt_pcb_t *pcb, dt_node_t *dnp)
 
 					sz = actdp->fun(pcb, act->dn_expr,
 							actdp->kind);
-					pcb->pcb_bufoff += sz;
 				}
 			} else {
 				dt_cg_node(act->dn_expr, &pcb->pcb_ir,
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index 752a71e5..2b946018 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -2254,10 +2254,10 @@ nextepid:
 	return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
 }
 #else
-int
+dtrace_workstatus_t
 dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 	       dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc,
-	       int flow, int quiet, dtrace_epid_t last, void *arg)
+	       int flow, int quiet, dtrace_epid_t *last, void *arg)
 {
 	char				*data = buf;
 	struct perf_event_header	*hdr;
@@ -2270,6 +2270,7 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 		char			*ptr = data;
 		uint32_t		size, epid, tag;
 		int			i;
+		int			done = 0;
 		dtrace_probedata_t	pdat;
 
 		/*
@@ -2287,6 +2288,13 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 		if (ptr > buf + hdr->size)
 			return -1;
 
+		/*
+		 * Clear the probe data, and fill in data independent fields.
+		 */
+		memset(&pdat, 0, sizeof(pdat));
+		pdat.dtpda_handle = dtp;
+		pdat.dtpda_cpu = cpu;
+
 		size = *(uint32_t *)data;
 		data += sizeof(size);
 		ptr += sizeof(size) + size;
@@ -2296,25 +2304,23 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 		data += sizeof(uint32_t);		/* skip padding */
 		size -= sizeof(uint32_t);
 
-		epid = *(uint32_t *)data;
-		data += sizeof(uint32_t);
-		size -= sizeof(uint32_t);
-
-		tag = *(uint32_t *)data;
-		data += sizeof(uint32_t);
-		size -= sizeof(uint32_t);
+		epid = ((uint32_t *)data)[0];
+		tag = ((uint32_t *)data)[1];
 
-		memset(&pdat, 0, sizeof(pdat));
+		/*
+		 * Fill in the epid and address of the epid in the buffer.  We
+		 * need to pass this to the efunc.
+		 */
 		pdat.dtpda_epid = epid;
-		pdat.dtpda_handle = dtp;
-		pdat.dtpda_cpu = cpu;
+		pdat.dtpda_data = data;
+
 		rval = dt_epid_lookup(dtp, epid, &pdat.dtpda_ddesc,
 						 &pdat.dtpda_pdesc);
 		if (rval != 0)
 			return rval;
 
 		if (flow)
-			dt_flowindent(dtp, &pdat, last, DTRACE_EPIDNONE);
+			dt_flowindent(dtp, &pdat, *last, DTRACE_EPIDNONE);
 
 		rval = (*efunc)(&pdat, arg);
 
@@ -2336,12 +2342,27 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 		 * FIXME: This code is temporary.
 		 */
 		for (i = 0; i < pdat.dtpda_ddesc->dtdd_nrecs; i++) {
-			int	n;
+			int			n;
+			dtrace_recdesc_t	*rec;
+
+			rec = &pdat.dtpda_ddesc->dtdd_recs[i];
+			if (rec->dtrd_action == DTRACEACT_EXIT)
+				done = 1;
+
+			pdat.dtpda_data = data + rec->dtrd_offset;
+			rval = (*rfunc)(&pdat, rec, arg);
+
+			if (rval == DTRACE_CONSUME_NEXT)
+				continue;
+
+			if (rval == DTRACE_CONSUME_ABORT)
+				return dt_set_errno(dtp, EDT_DIRABORT);
+
+			if (rval != DTRACE_CONSUME_THIS)
+				return dt_set_errno(dtp, EDT_BADRVAL);
 
 			n = dt_printf(dtp, fp, quiet ? "%lld" : " %16lld",
-				      *(int64_t *)data);
-			data += sizeof(uint64_t);
-			size -= sizeof(uint64_t);
+				      *(int64_t *)pdat.dtpda_data);
 
 			if (n < 0)
 				return -1;
@@ -2353,7 +2374,9 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, int cpu, char *buf,
 		 */
 		rval = (*rfunc)(&pdat, NULL, arg);
 
-		return epid;
+		*last = epid;
+
+		return done ? DTRACE_WORKSTATUS_DONE : DTRACE_WORKSTATUS_OKAY;
 	} else if (hdr->type == PERF_RECORD_LOST) {
 		uint64_t	lost;
 
@@ -2388,6 +2411,7 @@ dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dt_peb_t *peb,
 	dt_pebset_t			*pebset = dtp->dt_pebset;
 	uint64_t			data_size = pebset->data_size;
 	int				flow, quiet;
+	dtrace_workstatus_t		rval = DTRACE_WORKSTATUS_OKAY;
 
 	flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
 	quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
@@ -2432,15 +2456,18 @@ dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dt_peb_t *peb,
 				event = dst;
 			}
 
-			last = dt_consume_one(dtp, fp, cpu, event, efunc, rfunc,
-					      flow, quiet, last, arg);
+			rval = dt_consume_one(dtp, fp, cpu, event, efunc, rfunc,
+					      flow, quiet, &last, arg);
+			if (rval != DTRACE_WORKSTATUS_OKAY)
+				return rval;
+
 			tail += hdr->size;
 		} while (tail != head);
 
 		ring_buffer_write_tail(rb_page, tail);
 	}
 
-	return 0;
+	return DTRACE_WORKSTATUS_OKAY;
 }
 #endif
 
@@ -2648,7 +2675,7 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
 	return (rval);
 }
 
-int
+dtrace_workstatus_t
 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
 {
@@ -2761,8 +2788,10 @@ dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
 	timeout /= NANOSEC / MILLISEC;
 	cnt = epoll_wait(dtp->dt_poll_fd, events, dtp->dt_conf.numcpus,
 			 timeout);
-	if (cnt < 0)
-		return dt_set_errno(dtp, errno);
+	if (cnt < 0) {
+		dt_set_errno(dtp, errno);
+		return DTRACE_WORKSTATUS_ERROR;
+	}
 
 	/*
 	 * Loop over the buffers that have data available, and process them one
@@ -2777,6 +2806,6 @@ dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
 			return rval;
 	}
 
-	return 0;
+	return DTRACE_WORKSTATUS_OKAY;
 #endif
 }
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 8bb5c003..536b1745 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -716,8 +716,9 @@ extern dtrace_epid_t dt_epid_add(dtrace_hdl_t *, dtrace_datadesc_t *,
 extern int dt_epid_lookup(dtrace_hdl_t *, dtrace_epid_t, dtrace_datadesc_t **,
 			  dtrace_probedesc_t **);
 extern void dt_epid_destroy(dtrace_hdl_t *);
-extern int dt_rec_add(dtrace_hdl_t *, dtrace_actkind_t, uint32_t, uint32_t,
-		      uint16_t, const char *, uint64_t);
+typedef void (*dt_cg_gap_f)(dt_pcb_t *, int);
+extern uint32_t dt_rec_add(dtrace_hdl_t *, dt_cg_gap_f, dtrace_actkind_t,
+			   uint32_t, uint16_t, const char *, uint64_t);
 extern int dt_aggid_lookup(dtrace_hdl_t *, dtrace_aggid_t, dtrace_aggdesc_t **);
 extern void dt_aggid_destroy(dtrace_hdl_t *);
 
diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
index 048789f7..a6cf0c58 100644
--- a/libdtrace/dt_map.c
+++ b/libdtrace/dt_map.c
@@ -65,7 +65,8 @@ dt_datadesc_finalize(dtrace_hdl_t *dtp, dtrace_datadesc_t *ddp)
 		if (nrecs == NULL)
 			return dt_set_errno(dtp, EDT_NOMEM);
 
-		memcpy(nrecs, ddp->dtdd_recs, pcb->pcb_nrecs);
+		memcpy(nrecs, ddp->dtdd_recs,
+		       pcb->pcb_nrecs * sizeof(dtrace_recdesc_t));
 		dt_free(dtp, ddp->dtdd_recs);
 		ddp->dtdd_recs = nrecs;
 		pcb->pcb_maxrecs = pcb->pcb_nrecs;
@@ -346,15 +347,19 @@ dt_epid_destroy(dtrace_hdl_t *dtp)
 	dtp->dt_maxprobe = 0;
 }
 
-int
-dt_rec_add(dtrace_hdl_t *dtp, dtrace_actkind_t kind, uint32_t size,
-	   uint32_t offset, uint16_t alignment, const char *fmt, uint64_t arg)
+uint32_t
+dt_rec_add(dtrace_hdl_t *dtp, dt_cg_gap_f gapf, dtrace_actkind_t kind,
+	   uint32_t size, uint16_t alignment, const char *fmt, uint64_t arg)
 {
 	dt_pcb_t		*pcb = dtp->dt_pcb;
+	uint32_t		off = pcb->pcb_bufoff;
+	uint32_t		gap;
 	dtrace_datadesc_t	*ddp = pcb->pcb_stmt->dtsd_ddesc;
 	dtrace_recdesc_t	*rec;
 	int			cnt, max;
 
+	assert(gapf);
+
 	cnt = pcb->pcb_nrecs + 1;
 	max = pcb->pcb_maxrecs;
 	if (cnt >= max) {
@@ -363,7 +368,7 @@ dt_rec_add(dtrace_hdl_t *dtp, dtrace_actkind_t kind, uint32_t size,
 
 		nrecs = dt_calloc(dtp, cnt, sizeof(dtrace_recdesc_t));
 		if (nrecs == NULL)
-			return dt_set_errno(dtp, EDT_NOMEM);
+			longjmp(pcb->pcb_jmpbuf, EDT_NOMEM);
 
 		if (ddp->dtdd_recs != NULL) {
 			size_t	osize = max * sizeof(dtrace_recdesc_t);
@@ -377,14 +382,21 @@ dt_rec_add(dtrace_hdl_t *dtp, dtrace_actkind_t kind, uint32_t size,
 	}
 
 	rec = &ddp->dtdd_recs[pcb->pcb_nrecs++];
+	off = (pcb->pcb_bufoff + (alignment - 1)) & ~(alignment - 1);
 	rec->dtrd_action = kind;
 	rec->dtrd_size = size;
-	rec->dtrd_offset = offset;
+	rec->dtrd_offset = off;
 	rec->dtrd_alignment = alignment;
 	rec->dtrd_format = 0;	/* FIXME */
 	rec->dtrd_arg = arg;
 
-	return 0;
+	gap = off - pcb->pcb_bufoff;
+	if (gap > 0)
+		(*gapf)(pcb, gap);
+
+	pcb->pcb_bufoff = off + size;
+
+	return off;
 }
 
 void *
diff --git a/libdtrace/dt_pcb.h b/libdtrace/dt_pcb.h
index da35d03e..95d2fc8d 100644
--- a/libdtrace/dt_pcb.h
+++ b/libdtrace/dt_pcb.h
@@ -41,7 +41,7 @@ typedef struct dt_pcb {
 	dt_idhash_t *pcb_idents; /* current hash table of ambiguous idents */
 	dt_idhash_t *pcb_pragmas; /* current hash table of pending pragmas */
 	dt_regset_t *pcb_regs;	/* register set for code generation */
-	uint_t pcb_bufoff;	/* output buffer offset (for DFUNCs) */
+	uint32_t pcb_bufoff;	/* output buffer offset (for DFUNCs) */
 	dt_irlist_t pcb_ir;	/* list of unrelocated IR instructions */
 	uint_t pcb_exitlbl;	/* label for exit of program */
 	uint_t pcb_asvidx;	/* assembler vartab index (see dt_as.c) */
diff --git a/libdtrace/dt_work.c b/libdtrace/dt_work.c
index 7889ba78..df7f8f5c 100644
--- a/libdtrace/dt_work.c
+++ b/libdtrace/dt_work.c
@@ -337,13 +337,6 @@ dtrace_workstatus_t
 dtrace_work(dtrace_hdl_t *dtp, FILE *fp, dtrace_consume_probe_f *pfunc,
 	    dtrace_consume_rec_f *rfunc, void *arg)
 {
-	dtrace_workstatus_t	rval;
-
-	rval = DTRACE_WORKSTATUS_OKAY;
-
-	if (dtrace_consume(dtp, fp, pfunc, rfunc, arg) == -1)
-		return DTRACE_WORKSTATUS_ERROR;
-
-	return rval;
+	return dtrace_consume(dtp, fp, pfunc, rfunc, arg);
 }
 #endif
-- 
2.26.0




More information about the DTrace-devel mailing list