[DTrace-devel] [PATCH 4/4] spec: correct semantics of specsize overflow

Kris Van Hees kris.van.hees at oracle.com
Tue May 9 07:19:54 UTC 2023


The speculations implementation (commit 7fe6ce9a "consume, cg: implement
speculations") did not detect speculation data exceeding the specsize
limit until the consumer added data fragments to a speculation buffer.
As a result, all statements in clauses were always executed.

The legacy behaviour is to abort clause execution once a data recording
action causes the speculation buffer to overflow.  That causes any
further statementa in the clause to be skipped.  The testsuite contains
an explicit test for this: tst.SpecSizeVariations4.d.  Unfortunately,
the test (and expected results) were amended to match the (incorrect)
new semantics.

This patch restores the legacy behaviour by tracking the size of the
speculation across clauses that contribute to a given speculation, so
that we can detect speculation buffer overflows as they occur due to
a specific data recording action.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 bpf/speculation.c                             |  13 ++-
 libdtrace/dt_bpf_maps.h                       |   1 +
 libdtrace/dt_cg.c                             | 105 ++++++++++++++----
 libdtrace/dt_dctx.h                           |  19 +++-
 .../speculation/tst.SpecSizeVariations4.d     |   9 +-
 .../speculation/tst.SpecSizeVariations4.r     |   2 +-
 6 files changed, 119 insertions(+), 30 deletions(-)

diff --git a/bpf/speculation.c b/bpf/speculation.c
index 9f3c0e6c..04b1d3f0 100644
--- a/bpf/speculation.c
+++ b/bpf/speculation.c
@@ -105,29 +105,32 @@ noinline uint32_t dt_speculation(void)
  * Begin a speculation given an already-assigned ID.
  *
  * We consider a speculation ID usable only if it exists in the speculation map
- * (indicating that speculation() has returned it) with a a zero read value
+ * (indicating that speculation() has returned it) and it is not being drained
  * (indicating that neither commit() nor discard() have been called for it).
  * We bump the written value by one to indicate that another speculative buffer
  * is (or will soon be, once this clause terminates and its epilogue runs)
  * available for draining.
+ *
+ * We return the speculation entry in the map or 0.
  */
-noinline int32_t
+noinline dt_bpf_specs_t *
 dt_speculation_speculate(uint32_t id)
 {
 	dt_bpf_specs_t *spec;
 
 	if ((spec = bpf_map_lookup_elem(&specs, &id)) == NULL)
-		return -1;
+		return 0;
 
 	/*
 	 * Spec already being drained: do not continue to emit new
 	 * data into it.
 	 */
 	if (spec->draining)
-		return -1;
+		return 0;
 
 	atomic_add(&spec->written, 1);
-	return 0;
+
+	return spec;
 }
 
 /*
diff --git a/libdtrace/dt_bpf_maps.h b/libdtrace/dt_bpf_maps.h
index 074a95db..68b34b82 100644
--- a/libdtrace/dt_bpf_maps.h
+++ b/libdtrace/dt_bpf_maps.h
@@ -26,6 +26,7 @@ struct dt_bpf_probe {
 typedef struct dt_bpf_specs	dt_bpf_specs_t;
 struct dt_bpf_specs {
 	uint64_t	written;	/* number of spec buffers written */
+	uint64_t	size;		/* data size */
 	uint32_t	draining;	/* 1 if userspace has been asked to
 					 * drain this buffer */
 };
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 403ec4bb..d308fe18 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -775,25 +775,27 @@ dt_cg_prologue(dt_pcb_t *pcb, dt_node_t *pred)
 	 *	dctx->mst->fault = 0;	// lddw %r0, [%r0 + DCTX_MST]
 	 *				// stdw [%r0 + DMST_FAULT], 0
 	 *	dctx->mst->tstamp = 0;	// stdw [%r0 + DMST_TSTAMP], 0
+	 *	dctx->mst->specsize = 0;// stdw [%r0 + DMST_SPECSIZE], 0
 	 *	dctx->mst->epid = EPID;	// stw [%r0 + DMST_EPID], EPID
 	 *	dctx->mst->clid = CLID;	// stw [%r0 + DMST_CLID], CLID
-	 *	*((uint32_t *)&buf[0]) = EPID;
-	 *				// stw [%r9 + 0], EPID
+	 *	*((uint32_t *)&buf[DBUF_EPID]) = EPID;
+	 *				// stw [%r9 + DBUF_EPID], EPID
 	 */
 	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_0, DCTX_MST));
 	emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_0, DMST_FAULT, 0));
 	emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_0, DMST_TSTAMP, 0));
+	emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_0, DMST_SPECSIZE, 0));
 	emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_0, DMST_EPID, -1), epid);
 	emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_0, DMST_CLID, -1), clid);
-	emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, 0, -1), epid);
+	emite(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, DBUF_EPID, -1), epid);
 
 	/*
 	 *	Set the speculation ID field to zero to indicate no active
 	 *	speculation.
-	 *	*((uint32_t *)&buf[4]) = 0;
-	 *				// stw [%r9 + 4], 0
+	 *	*((uint32_t *)&buf[DBUF_SPECID]) = 0;
+	 *				// stw [%r9 + DBUF_SPECID], 0
 	 */
-	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_9, 4, 0));
+	emit(dlp,  BPF_STORE_IMM(BPF_W, BPF_REG_9, DBUF_SPECID, 0));
 
 	/*
 	 * If there is a predicate:
@@ -813,7 +815,8 @@ dt_cg_prologue(dt_pcb_t *pcb, dt_node_t *pred)
 	TRACE_REGSET("Prologue: End  ");
 
 	/*
-	 * Account for 32-bit epid (at offset 0) and 32-bit tag (at offset 4).
+	 * Account for 32-BIt EPID (at offset 0) and 32-bit speculation ID (at
+	 * offset 4).
 	 */
 	pcb->pcb_bufoff += 2 * sizeof(uint32_t);
 }
@@ -888,6 +891,21 @@ dt_cg_epilogue(dt_pcb_t *pcb)
 			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
 			emit(dlp, BPF_MOV_IMM(BPF_REG_1, 1));
 			emit(dlp, BPF_XADD_REG(BPF_W, BPF_REG_0, 0, BPF_REG_1));
+
+			/*
+			 * Add the size of the trace data to the overall count
+			 * for the current speculation.
+			 */
+			idp = dt_dlib_get_map(dtp, "specs");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_2, BPF_REG_FP, DT_STK_SP));
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_9, DBUF_SPECID));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_2, 0, BPF_REG_0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
+			emit(dlp, BPF_MOV_IMM(BPF_REG_1, pcb->pcb_bufoff));
+			emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_0, offsetof(dt_bpf_specs_t, size), BPF_REG_1));
 		} else {
 			idp = dt_dlib_get_map(dtp, "cpuinfo");
 			assert(idp != NULL);
@@ -994,8 +1012,6 @@ dt_cg_check_fault(dt_pcb_t *pcb)
 	emit(dlp,  BPF_LOAD(BPF_DW, reg, reg, DCTX_MST));
 	emit(dlp,  BPF_LOAD(BPF_DW, reg, reg, DMST_FAULT));
 	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, reg, 0, lbl_ok));
-	emit(dlp,  BPF_MOV_IMM(BPF_REG_0, 0));
-	emit(dlp,  BPF_RETURN());
 	emitl(dlp, lbl_ok,
 		   BPF_NOP());
 	dt_regset_free(drp, reg);
@@ -1287,6 +1303,7 @@ dt_cg_store_val(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind,
 	uint_t			off;
 	size_t			size;
 	int			not_null = 0;
+	int			cflags = pcb->pcb_stmt->dtsd_clauseflags;
 
 	/*
 	 * Special case for aggregations: we store the aggregation id.  We
@@ -1337,7 +1354,7 @@ dt_cg_store_val(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind,
 		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_9, off + 8, dnp->dn_reg));
 		dt_regset_free(drp, dnp->dn_reg);
 
-		return 0;
+		goto ok;
 	}
 
 	if (dt_node_is_scalar(dnp) || dt_node_is_float(dnp) ||
@@ -1350,15 +1367,15 @@ dt_cg_store_val(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind,
 		emit(dlp, BPF_STORE(ldstw[size], BPF_REG_9, off, dnp->dn_reg));
 		dt_regset_free(drp, dnp->dn_reg);
 
-		return 0;
+		goto ok;
 	} else if (dt_node_is_string(dnp)) {
-		size_t	strsize = pcb->pcb_hdl->dt_options[DTRACEOPT_STRSIZE];
+		size_t	strsize = dtp->dt_options[DTRACEOPT_STRSIZE];
 
 		if (!not_null)
 			dt_cg_check_ptr_arg(dlp, drp, dnp, NULL);
 
 		TRACE_REGSET("store_val(): Begin ");
-		off = dt_rec_add(pcb->pcb_hdl, dt_cg_fill_gap, kind, size + 1,
+		off = dt_rec_add(dtp, dt_cg_fill_gap, kind, size + 1,
 				 1, pfp, arg);
 
 		/*
@@ -1381,7 +1398,7 @@ dt_cg_store_val(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind,
 		dt_regset_free(drp, BPF_REG_0);
 		TRACE_REGSET("store_val(): End   ");
 
-		return 0;
+		goto ok;
 	}
 
 	/* Handle tracing of by-ref values (arrays, struct, union). */
@@ -1407,10 +1424,48 @@ dt_cg_store_val(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind,
 		dt_regset_free(drp, BPF_REG_0);
 		TRACE_REGSET("store_val(): End   ");
 
-		return 0;
+		goto ok;
 	}
 
 	return -1;
+
+ok:
+	/*
+	 * If this clause contains a speculate() statement, we need to
+	 * generate code to verify that the amount of data recorded for this
+	 * speculation (incl. what was just added) does not exceed the
+	 * specsize limit.  If it does, further execution of this clause
+	 * should be aborted.
+	 */
+	if (cflags & DT_CLSFLAG_SPECULATE) {
+		uint_t	lbl_ok = dt_irlist_label(dlp);
+		size_t	specsize = dtp->dt_options[DTRACEOPT_SPECSIZE];
+
+		/*
+		 * if (*((uint32_t *)&buf[DBUF_SPECID]) != 0) {
+		 *     if (dctx->dmst->specsize + off + size >
+		 *	   dtp->dt_options[DTRACEOPT_SPECSIZE])
+		 *         return;
+		 * }
+		 */
+		dt_regset_xalloc(drp, BPF_REG_0);
+		emit(dlp,  BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_9, DBUF_SPECID));
+		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_ok));
+
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DT_STK_DCTX));
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_0, DCTX_MST));
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_0, DMST_SPECSIZE));
+		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, off + size));
+		emit(dlp,  BPF_BRANCH_IMM(BPF_JLE, BPF_REG_0, specsize, lbl_ok));
+		emit(dlp,  BPF_MOV_IMM(BPF_REG_0, 0));
+		emit(dlp,  BPF_RETURN());
+		dt_regset_free(drp, BPF_REG_0);
+
+		emitl(dlp, lbl_ok,
+			   BPF_NOP());
+	}
+
+	return 0;
 }
 
 static void
@@ -1953,7 +2008,8 @@ dt_cg_act_setopt(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
  * back pending a commit() or discard() for the speculation with the given id.
  *
  * Updates the specid in the output buffer header, rather than emitting a new
- * record into it.
+ * record into it.  The dctx->dmst->specsize value is initialized with the size
+ * of the data thus far recorded for this speculation.
  */
 static void
 dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
@@ -1973,14 +2029,16 @@ dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
 
 	/*
-	 *	rc = dt_speculation_speculate(spec);
+	 *	dt_bpf_specs_t *spec = dt_speculation_speculate(specid);
 	 *				// lddw %r1, %dn_reg
 	 *				// call dt_speculation_speculate
 	 *				//     (%r1 ... %r5 clobbered)
 	 *				//     (%r0 = error return)
-	 *	if (rc != 0)		// jne %r0, 0, lbl_exit
+	 *	if (spec == 0)		// jne %r0, 0, lbl_exit
 	 *		goto exit;
-	 *	*((uint32_t *)&buf[4]) = spec;	// mov [%r9 + 4], %dn_reg
+	 *	*((uint32_t *)&buf[DBUF_SPECID]) = specid;
+	 *				// mov [%r9 + DBUF_SPECID], %dn_reg
+	 *	dctx->dmst->specsize = spec->size;
 	 *	exit:			// nop
 	 */
 
@@ -1990,8 +2048,13 @@ dt_cg_act_speculate(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 	dt_regset_xalloc(drp, BPF_REG_0);
 	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
 	dt_regset_free_args(drp);
-	emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, 0, lbl_exit));
-	emit(dlp, BPF_STORE(BPF_W, BPF_REG_9, 4, dnp->dn_reg));
+	emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit));
+	emit(dlp, BPF_STORE(BPF_W, BPF_REG_9, DBUF_SPECID, dnp->dn_reg));
+	/* We need a register, so we re-use dnp->dn_reg. */
+	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_0, offsetof(dt_bpf_specs_t, size)));
+	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX));
+	emit(dlp,  BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MST));
+	emit(dlp,  BPF_STORE(BPF_DW, dnp->dn_reg, DMST_SPECSIZE, BPF_REG_0));
 	dt_regset_free(drp, BPF_REG_0);
 	dt_regset_free(drp, dnp->dn_reg);
 }
diff --git a/libdtrace/dt_dctx.h b/libdtrace/dt_dctx.h
index dcf111fb..315bdc37 100644
--- a/libdtrace/dt_dctx.h
+++ b/libdtrace/dt_dctx.h
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -24,6 +24,7 @@ typedef struct dt_mstate {
 	uint32_t	tag;		/* Tag (for future use) */
 	uint32_t	scratch_top;	/* Current top of scratch space */
 	int32_t		syscall_errno;	/* syscall errno */
+	uint64_t	specsize;	/* speculation size */
 	uint64_t	scalarizer;	/* used to scalarize pointers */
 	uint64_t	fault;		/* DTrace fault flags */
 	uint64_t	tstamp;		/* cached timestamp value */
@@ -38,6 +39,7 @@ typedef struct dt_mstate {
 #define DMST_TAG		offsetof(dt_mstate_t, tag)
 #define DMST_SCRATCH_TOP	offsetof(dt_mstate_t, scratch_top)
 #define DMST_ERRNO		offsetof(dt_mstate_t, syscall_errno)
+#define DMST_SPECSIZE		offsetof(dt_mstate_t, specsize)
 #define DMST_SCALARIZER		offsetof(dt_mstate_t, scalarizer)
 #define DMST_FAULT		offsetof(dt_mstate_t, fault)
 #define DMST_TSTAMP		offsetof(dt_mstate_t, tstamp)
@@ -74,6 +76,21 @@ typedef struct dt_dctx {
 
 #define DCTX_SIZE	((int16_t)sizeof(dt_dctx_t))
 
+/*
+ * The dctx->buf pointer references a block of memory that contains:
+ *
+ *                       +----------------+
+ *                  0 -> | EPID           |
+ *                       +----------------+
+ *		    4 -> | Speculation ID |
+ *                       +----------------+
+ *                       | Trace Data     |
+ *                       |      ...       |
+ *                       +----------------+
+ */
+#define DBUF_EPID	0
+#define DBUF_SPECID	4
+
 /*
  * The dctx->mem pointer references a block of memory that contains:
  *
diff --git a/test/unittest/speculation/tst.SpecSizeVariations4.d b/test/unittest/speculation/tst.SpecSizeVariations4.d
index 569b426a..4221c89e 100644
--- a/test/unittest/speculation/tst.SpecSizeVariations4.d
+++ b/test/unittest/speculation/tst.SpecSizeVariations4.d
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -50,12 +50,17 @@ BEGIN
 /1 <= self->commitFlag/
 {
 	printf("Statement was executed\n");
-	exit(0);
+	exit(1);
 }
 
 BEGIN
 /1 > self->commitFlag/
 {
 	printf("Statement wasn't executed\n");
+	exit(0);
+}
+
+ERROR
+{
 	exit(1);
 }
diff --git a/test/unittest/speculation/tst.SpecSizeVariations4.r b/test/unittest/speculation/tst.SpecSizeVariations4.r
index 719e2441..41ccfc14 100644
--- a/test/unittest/speculation/tst.SpecSizeVariations4.r
+++ b/test/unittest/speculation/tst.SpecSizeVariations4.r
@@ -1,3 +1,3 @@
 Speculative buffer ID: 1
-Statement was executed
+Statement wasn't executed
 
-- 
2.40.1




More information about the DTrace-devel mailing list