[DTrace-devel] [PATCH 14/14] Have the consumer get the PRID from the output buffer

eugene.loh at oracle.com eugene.loh at oracle.com
Tue Jun 4 18:11:13 UTC 2024


From: Eugene Loh <eugene.loh at oracle.com>

WIP

Each output record has a EPID associated with it, allowing the consumer
to get both a data description and a PRID (so it can report PRID and
probe function and name).  We want to support uprobes that trigger for
multiple PRIDs, however, basically breaking this scheme.

So have the producer write the PRID into the output buffer;  it does so
easily by having the clause prologue copy mst->prid to the output buffer.
There was an unused pad in the output buffer anyhow.

Then, the consumer reads the PRID from the output buffer.

If this approach is adopted, further work is required for this patch:

- shift references to the output buffer
    so that we are not reading from negative offsets

- get rid of the EPID->PRID mapping (relocation, etc.)
    dt_link_construct management of PRID, calling dt_epid_add(), etc.

- clean up the hardwired PRID=3 for the ERROR probe

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 bpf/probe_error.c      |  4 ++++
 libdtrace/dt_cg.c      | 12 +++++++++---
 libdtrace/dt_consume.c | 21 ++++++++++++++-------
 libdtrace/dt_dctx.h    |  3 +++
 4 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/bpf/probe_error.c b/bpf/probe_error.c
index c8ddcdfa..df546665 100644
--- a/bpf/probe_error.c
+++ b/bpf/probe_error.c
@@ -26,6 +26,7 @@ noinline void dt_probe_error(const dt_dctx_t *dctx, uint64_t pc, uint64_t fault,
 			     uint64_t illval)
 {
 	dt_mstate_t	*mst = dctx->mst;
+int oldprid;
 
 	mst->argv[0] = 0;
 	mst->argv[1] = mst->epid;
@@ -34,7 +35,10 @@ noinline void dt_probe_error(const dt_dctx_t *dctx, uint64_t pc, uint64_t fault,
 	mst->argv[4] = fault;
 	mst->argv[5] = illval;
 
+oldprid = mst->prid;
+mst->prid = 3;           // FIXME all we are doing is forcing the PRID to be 3, which is usually what ERROR is; the clause prologues will then use this
 	dt_error(dctx);
+mst->prid = oldprid;
 
 	mst->fault = fault;
 }
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 4fd2d359..8ec917be 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -1057,9 +1057,10 @@ dt_cg_tramp_error(dt_pcb_t *pcb)
  *
  *	1. Store the base pointer to the output data buffer in %r9.
  *	2. Initialize the machine state (dctx->mst).
- *	3. Store the epid at [%r9 + DBUF_EPID].
- *	4. Store 0 to indicate no active speculation at [%r9 + DBUF_SPECID].
- *	5. Evaluate the predicate expression and return if false.
+ *	3. Copy dctx->mst->prid to [%r9 + DBUF_PRID].
+ *	4. Store the epid at [%r9 + DBUF_EPID].
+ *	5. Store 0 to indicate no active speculation at [%r9 + DBUF_SPECID].
+ *	6. Evaluate the predicate expression and return if false.
  *
  * The dt_program() function will always return 0.
  */
@@ -1105,6 +1106,9 @@ dt_cg_prologue(dt_pcb_t *pcb, dt_node_t *pred)
 	 *	dctx->mst->specsize = 0;// stdw [%r0 + DMST_SPECSIZE], 0
 	 *	dctx->mst->epid = EPID;	// stw [%r0 + DMST_EPID], EPID
 	 *	dctx->mst->clid = CLID;	// stw [%r0 + DMST_CLID], CLID
+	 *	*((uint32_t *)&buf[DBUF_PRID]) = dctx->mst->prid;
+	 *				// ld %r1, [%r0 + DMST_PRID]
+	 *				// stw [%r9 + DBUF_PRID], %r1
 	 *	*((uint32_t *)&buf[DBUF_EPID]) = EPID;
 	 *				// stw [%r9 + DBUF_EPID], EPID
 	 */
@@ -1114,6 +1118,8 @@ dt_cg_prologue(dt_pcb_t *pcb, dt_node_t *pred)
 	emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_0, DMST_SPECSIZE, 0));
 	emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_0, DMST_EPID, -1), epid);
 	emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_0, DMST_CLID, -1), clid);
+	emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, DMST_PRID));
+	emit(dlp,  BPF_STORE(BPF_W, BPF_REG_9, DBUF_PRID, BPF_REG_1));
 	emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, DBUF_EPID, -1), epid);
 
 	/*
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index dec2314b..21ba7ce7 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -17,6 +17,7 @@
 #include <dt_module.h>
 #include <dt_pcap.h>
 #include <dt_peb.h>
+#include <dt_probe.h>
 #include <dt_state.h>
 #include <dt_string.h>
 #include <libproc.h>
@@ -463,7 +464,7 @@ dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
 	 */
 	if (flow == DTRACEFLOW_ENTRY) {
 		if (last != DTRACE_EPIDNONE && id != last &&
-		    pd->id == dtp->dt_pdesc[last]->id)
+		    pd->id == dtp->dt_pdesc[last]->id)                  // FIXME will this work if we do not get a pdesc from an epid?  or is it even a bad idea to access dtp->dt_pdesc[] directly anyhow?
 			flow = DTRACEFLOW_NONE;
 	}
 
@@ -2087,13 +2088,13 @@ dt_spec_buf_add_data(dtrace_hdl_t *dtp, dt_spec_buf_t *dtsb,
 		goto oom;
 
 	dsbd->dsbd_cpu = cpu;
-	dsbd->dsbd_size = size;
-	dsbd->dsbd_data = dt_alloc(dtp, size);
+	dsbd->dsbd_size = size;                      // FIXME should also change this, or wait?
+	dsbd->dsbd_data = dt_alloc(dtp, size + 8);   // FIXME OMG hack, need to account that the prid is in the previously ignored preceding 4-byte pad, also we want to preserve 8-byte alignment
 	if (!dsbd->dsbd_data)
 		goto oom;
 
 	dtsb->dtsb_size += size;
-	memcpy(dsbd->dsbd_data, data, size);
+	memcpy(dsbd->dsbd_data + 4, data - 4, size + 4);  // FIXME OMG, we want to copy starting at preceding 4-byte pad;  pad destination 4 bytes to preserve 8-byte alignment
 
 	dt_list_append(&dtsb->dtsb_dsbd_list, dsbd);
 	return dsbd;
@@ -2184,7 +2185,7 @@ dt_commit_one_spec(dtrace_hdl_t *dtp, FILE *fp, dt_spec_buf_t *dtsb,
 
 		memcpy(&specpdat, pdat, sizeof(dtrace_probedata_t));
 		specpdat.dtpda_cpu = dsbd->dsbd_cpu;
-		ret = dt_consume_one_probe(dtp, fp, dsbd->dsbd_data,
+		ret = dt_consume_one_probe(dtp, fp, dsbd->dsbd_data + 8,   // FIXME OMG; points to where data[0] used to be
 					   dsbd->dsbd_size, &specpdat,
 					   efunc, rfunc, flow, quiet,
 					   peekflags, last, 1, arg);
@@ -2227,7 +2228,12 @@ dt_consume_one_probe(dtrace_hdl_t *dtp, FILE *fp, char *data, uint32_t size,
 					 &pdat->dtpda_pdesc);
 	if (rval != 0)
 		return dt_set_errno(dtp, EDT_BADEPID);
-
+{
+// overwrite the (prid and) probe description we get from the EPID, using instead the prid we read from the output buffer
+uint32_t prid;
+prid = ((uint32_t *)data)[-1];           // should check prid > 0???
+pdat->dtpda_pdesc = (dtrace_probedesc_t *)dtp->dt_probes[prid]->desc;
+}
 	epd = pdat->dtpda_ddesc;
 	if (epd->dtdd_uarg != DT_ECB_DEFAULT) {
 		rval = dt_handle(dtp, pdat);
@@ -2661,7 +2667,7 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
 		 * struct {
 		 *	struct perf_event_header	header;
 		 *	uint32_t			size;
-		 *	uint32_t			pad;
+		 *	uint32_t			prid;
 		 *	uint32_t			epid;
 		 *	uint32_t			specid;
 		 *	uint64_t			data[n];
@@ -2678,6 +2684,7 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
 		if (ptr != buf + hdr->size)
 			return dt_set_errno(dtp, EDT_DSIZE);
 
+// FIXME get rid of this later
 		data += sizeof(uint32_t);		/* skip padding */
 		size -= sizeof(uint32_t);
 
diff --git a/libdtrace/dt_dctx.h b/libdtrace/dt_dctx.h
index 1422ad24..ebb83a98 100644
--- a/libdtrace/dt_dctx.h
+++ b/libdtrace/dt_dctx.h
@@ -82,6 +82,8 @@ typedef struct dt_dctx {
  * The dctx->buf pointer references a block of memory that contains:
  *
  *                       +----------------+
+ *                 -4 -> | PRID           |
+ *                       +----------------+
  *                  0 -> | EPID           |
  *                       +----------------+
  *		    4 -> | Speculation ID |
@@ -90,6 +92,7 @@ typedef struct dt_dctx {
  *                       |      ...       |
  *                       +----------------+
  */
+#define DBUF_PRID	-4
 #define DBUF_EPID	0
 #define DBUF_SPECID	4
 
-- 
2.18.4




More information about the DTrace-devel mailing list