[DTrace-devel] [PATCH 3/4] Drop counters support

Kris Van Hees kris.van.hees at oracle.com
Tue May 9 07:19:04 UTC 2023


DTrace provides a mechanism to report dropped events.  A drop occurs
when trace data cannot be recorded for a particular reason.  This
patch contains the full implementation of 4 categories of drops,
primarily because they are all closely related:

- principal buffer drops: reported when the producer failed to add
  trace data to the principal buffer
- aggregation buffer drops: reported when the producer failed to
  allocate an aggreggation and therefore failed to record data
- speculation drops: reported when something goes wrong with the
  recording of speculative tracing data
    + regular drops: reported when speculative data could not be
      written to a speculation buffer
    + busy drops: reported when a speculation could not be created
      because all buffers are busy being committed or discarded
    + unavailable drops: reported when no available speculation
      buffers were found
- dynamic variable drops: reported when a dynamic variable (or
  associative array element) could not be allocated

Two mechanism for reporting drops are needed:

(1) Per-CPU reporting: used for principal and aggregation buffer
    drops (stored in the cpuinfo structures)
(2) Global reporting: used for speculation and dynamic variable
    drops (stored in the state BPF map)

Detection of drops (and subsequent reporting to the user) is done
through frequent retrieval of status data.  The handling of status
data (and the use of statusrate) is being re-introduced with this
patch.

The drop count for speculations is a bit more complex than the other
ones because drops can occur both in the producer (when data cannot
be written to the trace output buffer) *and* in the consumer (when
data cannot be recorded in a speculation buffer.  These separate
counts are combined whenever status processing takes place to ensure
the correct drop count is presented to the user.

Various tests have updated expected results because drops are now
being reported correctly.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 bpf/get_agg.c                                 |  25 +++-
 bpf/get_dvar.c                                |  27 ++++-
 bpf/speculation.c                             |  35 +++++-
 libdtrace/dt_aggregate.c                      |  21 ++--
 libdtrace/dt_bpf.c                            |  15 ++-
 libdtrace/dt_cg.c                             |  56 +++++++--
 libdtrace/dt_consume.c                        |  28 +++--
 libdtrace/dt_handle.c                         |   3 +
 libdtrace/dt_impl.h                           |  12 ++
 libdtrace/dt_open.c                           |   7 ++
 libdtrace/dt_state.h                          |   6 +-
 libdtrace/dt_work.c                           | 109 +++++++++++++++++-
 libdtrace/dtrace.h                            |   1 +
 test/unittest/assocs/tst.store_zero_deletes.d |   9 +-
 .../drops/drp.DTRACEDROP_AGGREGATION.d        |   7 +-
 .../drops/drp.DTRACEDROP_AGGREGATION.r        |   3 +-
 test/unittest/drops/drp.DTRACEDROP_DYNAMIC.d  |   7 +-
 test/unittest/drops/drp.DTRACEDROP_DYNAMIC.r  |   2 +-
 .../unittest/drops/drp.DTRACEDROP_PRINCIPAL.d |  11 +-
 .../drops/drp.DTRACEDROP_PRINCIPAL.end.d      |  11 +-
 .../drops/drp.DTRACEDROP_PRINCIPAL.end.r      |   4 +-
 .../unittest/drops/drp.DTRACEDROP_PRINCIPAL.r |   4 +-
 test/unittest/speculation/tst.NoSpecBuffer.r  |   2 +
 .../unittest/speculation/tst.TwoSpecBuffers.r |   2 +
 24 files changed, 333 insertions(+), 74 deletions(-)

diff --git a/bpf/get_agg.c b/bpf/get_agg.c
index b72127d1..0f26ed34 100644
--- a/bpf/get_agg.c
+++ b/bpf/get_agg.c
@@ -5,6 +5,8 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include <bpf-helpers.h>
+#include <bpf-lib.h>
+#include <dt_bpf_maps.h>
 #include <dt_dctx.h>
 
 #ifndef noinline
@@ -12,6 +14,23 @@
 #endif
 
 extern struct bpf_map_def agggen;
+extern struct bpf_map_def cpuinfo;
+
+/*
+ * Register an aggregation drop.
+ */
+noinline uint64_t *dt_no_agg(void)
+{
+	uint32_t		key = 0;
+	dt_bpf_cpuinfo_t	*ci;
+
+	ci = bpf_map_lookup_elem(&cpuinfo, &key);
+	if (ci == 0)
+		return 0;
+
+	atomic_add(&ci->agg_drops, 1);
+	return 0;
+}
 
 /*
  * Get a pointer to the data storage for an aggregation.  Regular aggregations
@@ -29,7 +48,7 @@ noinline uint64_t *dt_get_agg(const dt_dctx_t *dctx, uint32_t id,
 	/* get the gen value */
 	genp = bpf_map_lookup_elem(&agggen, &id);
 	if (genp == 0)
-		return 0;
+		return dt_no_agg();
 
 	/* place the variable ID at the beginning of the key */
 	*(uint32_t *)key = id;
@@ -41,11 +60,11 @@ noinline uint64_t *dt_get_agg(const dt_dctx_t *dctx, uint32_t id,
 	if (valp == 0 || valp[0] < *genp) {
 		/* start with all zeroes */
 		if (bpf_map_update_elem(dctx->agg, key, dflt, BPF_ANY) < 0)
-			return 0;
+			return dt_no_agg();
 
 		valp = bpf_map_lookup_elem(dctx->agg, key);
 		if (valp == 0)
-			return 0;
+			return dt_no_agg();
 
 		/* ival is nonzero only for min() and max() */
 		if (ival)
diff --git a/bpf/get_dvar.c b/bpf/get_dvar.c
index d11182df..b7e49d4c 100644
--- a/bpf/get_dvar.c
+++ b/bpf/get_dvar.c
@@ -1,19 +1,38 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
  */
 #include <linux/bpf.h>
 #include <stdint.h>
 #include <bpf-helpers.h>
+#include <bpf-lib.h>
+#include <dt_dctx.h>
 
 #ifndef noinline
 # define noinline	__attribute__((noinline))
 #endif
 
 extern struct bpf_map_def dvars;
+extern struct bpf_map_def state;
 extern struct bpf_map_def tuples;
 extern uint64_t NCPUS;
 
+/*
+ * Register an aggregation drop.
+ */
+noinline void *dt_no_dvar(void)
+{
+	uint32_t	kind = DT_STATE_DYNVAR_DROPS;
+	uint32_t	*valp;
+
+	valp = bpf_map_lookup_elem(&state, &kind);
+	if (valp == 0)
+		return 0;
+
+	atomic_add32(valp, 1);
+	return 0;
+}
+
 /*
  * Dynamic variables are identified using a unique 64-bit key.  Three different
  * categories of dynamic variables are supported in DTrace:
@@ -80,7 +99,7 @@ noinline void *dt_get_dvar(uint64_t key, uint64_t store, uint64_t nval,
 	 * with the default value.
 	 */
 	if (bpf_map_update_elem(&dvars, &key, dflt, BPF_ANY) < 0)
-		return 0;
+		return dt_no_dvar();
 
 	val = bpf_map_lookup_elem(&dvars, &key);
 	if (val != 0)
@@ -126,13 +145,13 @@ noinline void *dt_get_assoc(uint32_t id, const char *tuple, uint64_t store,
 		 * actual value.
 		 */
 		if (bpf_map_update_elem(&tuples, tuple, &dflt_val, BPF_ANY) < 0)
-			return 0;
+			return dt_no_dvar();
 		valp = bpf_map_lookup_elem(&tuples, tuple);
 		if (valp == 0)
 			return 0;
 		*valp = (uint64_t)valp;
 		if (bpf_map_update_elem(&tuples, tuple, valp, BPF_ANY) < 0)
-			return 0;
+			return dt_no_dvar();
 
 		val = *valp;
 	} else {
diff --git a/bpf/speculation.c b/bpf/speculation.c
index 0a19ac33..9f3c0e6c 100644
--- a/bpf/speculation.c
+++ b/bpf/speculation.c
@@ -19,18 +19,36 @@
 #endif
 
 extern struct bpf_map_def specs;
+extern struct bpf_map_def state;
 extern uint64_t NSPEC;
 
+/*
+ * Register an aggregation drop.
+ */
+noinline uint32_t dt_no_spec(uint32_t kind)
+{
+	uint32_t	*valp;
+
+	valp = bpf_map_lookup_elem(&state, &kind);
+	if (valp == 0)
+		return 0;
+
+	atomic_add32(valp, 1);
+	return 0;
+}
+
 /*
  * Assign a speculation ID.
  */
 noinline uint32_t dt_speculation(void)
 {
-	uint32_t id;
-	dt_bpf_specs_t zero;
+	uint32_t	id, busy;
+	dt_bpf_specs_t	zero;
+	dt_bpf_specs_t	*spec;
 
 	__builtin_memset(&zero, 0, sizeof (dt_bpf_specs_t));
 
+	busy = 0;
 #if 1 /* Loops are broken in BPF right now */
 #define SEARCH(n)							\
 	do {								\
@@ -40,6 +58,11 @@ noinline uint32_t dt_speculation(void)
 		if (bpf_map_update_elem(&specs, &id, &zero,		\
 			BPF_NOEXIST) == 0)				\
 			return id;					\
+		spec = bpf_map_lookup_elem(&specs, &id);		\
+		if (spec != 0 && spec->draining > 0) {			\
+			busy++;						\
+			break;						\
+		}							\
 	} while (0);
 
 	SEARCH(1);
@@ -66,10 +89,16 @@ noinline uint32_t dt_speculation(void)
 		if (bpf_map_update_elem(&specs, &id, &zero,
 					BPF_NOEXIST) == 0)
 			return id;
+
+		spec = bpf_map_lookup_elem(&specs, &id);
+		if (spec != 0 && spec->draining > 0)  {
+			busy++;
+			break;
+		}
 	}
 #endif
 
-	return 0;
+	return dt_no_spec(busy ? DT_STATE_SPEC_BUSY : DT_STATE_SPEC_UNAVAIL);
 }
 
 /*
diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
index 0bd91ba3..7c09908d 100644
--- a/libdtrace/dt_aggregate.c
+++ b/libdtrace/dt_aggregate.c
@@ -605,21 +605,24 @@ hashnext:
 static int
 dt_aggregate_snap_cpu(dtrace_hdl_t *dtp, processorid_t cpu, int fd)
 {
-	dt_aggregate_t	*agp = &dtp->dt_aggregate;
-	size_t		ksize = dtp->dt_maxtuplesize;
-	char		*key = agp->dtat_key;
-	char		*data = agp->dtat_buf;
-	char		*nxt = agp->dtat_nextkey;
-	uint32_t	*aggidp = (uint32_t *)key;
-	int		rval;
-
+	dt_aggregate_t		*agp = &dtp->dt_aggregate;
+	size_t			ksize = dtp->dt_maxtuplesize;
+	char			*key = agp->dtat_key;
+	char			*data = agp->dtat_buf;
+	char			*nxt = agp->dtat_nextkey;
+	uint32_t		*aggidp = (uint32_t *)key;
+	int			rval;
 
 	*aggidp = DTRACE_AGGIDNONE;
 	while (dt_bpf_map_next_key(fd, key, nxt) == 0) {
+		rval = dt_check_cpudrops(dtp, cpu, DTRACEDROP_AGGREGATION);
+		if (rval != 0)
+			return rval;
+
 		memcpy(key, nxt, ksize);
 
 		if (dt_bpf_map_lookup(fd, key, data) == -1)
-			return -1; /* FIXME: dt_set_errno() */
+			return dt_set_errno(dtp, EDT_BPF);
 
 		rval = dt_aggregate_snap_one(dtp, *aggidp, cpu, key, data);
 		if (rval != 0)
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index 3cf8ef89..54086077 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -604,26 +604,29 @@ gmap_create_buffers(dtrace_hdl_t *dtp)
 static int
 gmap_create_cpuinfo(dtrace_hdl_t *dtp)
 {
-	int			i, fd, rc;
+	int			i, rc;
 	uint32_t		key = 0;
 	dtrace_conf_t		*conf = &dtp->dt_conf;
 	size_t			ncpus = conf->max_cpuid + 1;
 	dt_bpf_cpuinfo_t	*data;
 	cpuinfo_t		*ci;
 
-	data = dt_zalloc(dtp, ncpus * sizeof(dt_bpf_cpuinfo_t));
+	data = dt_calloc(dtp, dtp->dt_conf.num_possible_cpus,
+			 sizeof(dt_bpf_cpuinfo_t));
 	if (data == NULL)
 		return dt_set_errno(dtp, EDT_NOMEM);
 
 	for (i = 0, ci = &conf->cpus[0]; i < ncpus; i++, ci++)
 		memcpy(&data[ci->cpu_id].ci, ci, sizeof(cpuinfo_t));
 
-	fd = create_gmap(dtp, "cpuinfo", BPF_MAP_TYPE_PERCPU_ARRAY,
-			 sizeof(uint32_t), sizeof(dt_bpf_cpuinfo_t), 1);
-	if (fd == -1)
+	dtp->dt_cpumap_fd = create_gmap(dtp, "cpuinfo",
+					BPF_MAP_TYPE_PERCPU_ARRAY,
+					sizeof(uint32_t),
+					sizeof(dt_bpf_cpuinfo_t), 1);
+	if (dtp->dt_cpumap_fd == -1)
 		return -1;
 
-	rc = dt_bpf_map_update(fd, &key, data);
+	rc = dt_bpf_map_update(dtp->dt_cpumap_fd, &key, data);
 	dt_free(dtp, data);
 	if (rc == -1)
 		return dt_bpf_error(dtp,
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 14ae21f7..403ec4bb 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -632,6 +632,7 @@ void
 dt_cg_tramp_epilogue(dt_pcb_t *pcb)
 {
 	dt_cg_tramp_call_clauses(pcb, pcb->pcb_probe, DT_ACTIVITY_ACTIVE);
+
 	/*
 	 * For each dependent probe (if any):
 	 *	1.1 Call dt_cg_tramp_save_args()
@@ -827,6 +828,7 @@ dt_cg_prologue(dt_pcb_t *pcb, dt_node_t *pred)
 static void
 dt_cg_epilogue(dt_pcb_t *pcb)
 {
+	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
 	dt_irlist_t	*dlp = &pcb->pcb_ir;
 
 	TRACE_REGSET("Epilogue: Begin");
@@ -839,14 +841,16 @@ dt_cg_epilogue(dt_pcb_t *pcb)
 	 */
 	if (pcb->pcb_stmt->dtsd_clauseflags & DT_CLSFLAG_DATAREC ||
 	    pcb->pcb_stmt->dtsd_clauseflags & DT_CLSFLAG_COMMIT_DISCARD) {
-		dt_ident_t *buffers = dt_dlib_get_map(pcb->pcb_hdl, "buffers");
+		dt_ident_t	*buffers = dt_dlib_get_map(dtp, "buffers");
+		dt_ident_t	*idp;
+		int		cflags = pcb->pcb_stmt->dtsd_clauseflags;
 
 		assert(buffers != NULL);
 
 		/*
-		 *	bpf_perf_event_output(dctx->ctx, &buffers,
-		 *			      BPF_F_CURRENT_CPU,
-		 *			      buf - 4, bufoff + 4);
+		 *	rc = bpf_perf_event_output(dctx->ctx, &buffers,
+		 *				   BPF_F_CURRENT_CPU,
+		 *				   buf - 4, bufoff + 4);
 		 *				// lddw %r1, [%fp + DT_STK_DCTX]
 		 *				// lddw %r1, [%r1 + DCTX_CTX]
 		 *				// lddw %r2, &buffers
@@ -856,7 +860,6 @@ dt_cg_epilogue(dt_pcb_t *pcb)
 		 *				// mov %r5, pcb->pcb_bufoff
 		 *				// add %r5, 4
 		 *				// call bpf_perf_event_output
-		 *
 		 */
 		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_FP, DT_STK_DCTX));
 		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_1, DCTX_CTX));
@@ -867,6 +870,36 @@ dt_cg_epilogue(dt_pcb_t *pcb)
 		emit(dlp, BPF_MOV_IMM(BPF_REG_5, pcb->pcb_bufoff));
 		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4));
 		emit(dlp, BPF_CALL_HELPER(BPF_FUNC_perf_event_output));
+
+		/*
+		 * If writing the trace data to the output buffer failed,
+		 * increment the drop count for the principal buffer (no
+		 * speculate() in the clause) or for aggregations (speculate()
+		 * in the clause).
+		 */
+		emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
+		if (cflags & DT_CLSFLAG_SPECULATE) {
+			idp = dt_dlib_get_map(dtp, "state");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_2, BPF_REG_FP, DT_STK_SP));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, DT_STATE_SPEC_DROPS));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
+			emit(dlp, BPF_MOV_IMM(BPF_REG_1, 1));
+			emit(dlp, BPF_XADD_REG(BPF_W, BPF_REG_0, 0, BPF_REG_1));
+		} else {
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_2, BPF_REG_FP, DT_STK_SP));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, pcb->pcb_exitlbl));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, buf_drops)));
+			emit(dlp, BPF_MOV_IMM(BPF_REG_1, 1));
+			emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_0, 0, BPF_REG_1));
+		}
 	}
 
 	/*
@@ -3150,6 +3183,7 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
 		dt_regset_free_args(drp);
 		lbl_done = dt_irlist_label(dlp);
 		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, dnp->dn_reg, 0, lbl_done));
+		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_done));
 
 		if ((reg = dt_regset_alloc(drp)) == -1)
 			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
@@ -3157,7 +3191,9 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
 		emit(dlp,  BPF_MOV_REG(reg, BPF_REG_0));
 		dt_regset_free(drp, BPF_REG_0);
 
+#if 0
 		dt_cg_check_notnull(dlp, drp, reg);
+#endif
 
 		if (dnp->dn_flags & DT_NF_REF) {
 			size_t	srcsz;
@@ -7228,7 +7264,7 @@ dt_cg_agg(dt_pcb_t *pcb, dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 					dnp->dn_ctfp,
 					dtp->dt_type_void,
 				 };
-	uint_t		Lgot_agg = dt_irlist_label(dlp);
+	uint_t		Lno_agg = dt_irlist_label(dlp);
 
 	assert(idp != NULL);
 
@@ -7284,11 +7320,8 @@ dt_cg_agg(dt_pcb_t *pcb, dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 	emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
 	dt_regset_free_args(drp);
 
-	emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, 0, Lgot_agg));
+	emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lno_agg));
 	dt_regset_free(drp, BPF_REG_0);
-	dt_cg_probe_error(yypcb, DTRACEFLT_ILLOP, DT_ISIMM, 0);
-	emitl(dlp, Lgot_agg,
-		   BPF_NOP());
 
 	/* Push the agg data pointer onto the stack. */
 	dt_cg_push_stack(BPF_REG_0, dlp, drp);
@@ -7326,6 +7359,9 @@ dt_cg_agg(dt_pcb_t *pcb, dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 			dt_aggid_rec_add(dtp, aid->di_id, size, alignment);
 		}
 	}
+
+	emitl(dlp, Lno_agg,
+		   BPF_NOP());
 }
 
 void
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index d628ff21..cca38137 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -2226,17 +2226,21 @@ dt_consume_one_probe(dtrace_hdl_t *dtp, FILE *fp, char *data, uint32_t size,
 		if (dtsb)
 			cursz = dtsb->dtsb_size;
 
-		if (cursz + size > dtp->dt_options[DTRACEOPT_SPECSIZE])
+		if (cursz + size > dtp->dt_options[DTRACEOPT_SPECSIZE]) {
+			dtp->dt_specdrops++;
 			return DTRACE_WORKSTATUS_OKAY;
+		}
 
 		if (!dtsb) {
-			if ((dtsb = dt_spec_buf_create(dtp, specid)) == NULL)
-				return -1;
+			if ((dtsb = dt_spec_buf_create(dtp, specid)) == NULL) {
+				dtp->dt_specdrops++;
+				return DTRACE_WORKSTATUS_OKAY;
+			}
 		}
 
 		if (dt_spec_buf_add_data(dtp, dtsb, epid, pdat->dtpda_cpu, epd,
 					 data, size) == NULL)
-			return -1;
+			dtp->dt_specdrops++;
 
 		return DTRACE_WORKSTATUS_OKAY;
 	}
@@ -2613,8 +2617,8 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
 					    rfunc, flow, quiet, peekflags,
 					    last, 0, arg);
 	} else if (hdr->type == PERF_RECORD_LOST) {
-#ifdef FIXME
-		uint64_t	lost;
+#if 0
+		uint64_t	drops;
 
 		/*
 		 * struct {
@@ -2624,11 +2628,12 @@ dt_consume_one(dtrace_hdl_t *dtp, FILE *fp, char *buf,
 		 * }
 		 * and data points to the 'id' member at this point.
 		 */
-		lost = *(uint64_t *)(data + sizeof(uint64_t));
+		drops = *(uint64_t *)(data + sizeof(uint64_t));
+		return dt_handle_cpudrop(dtp, pdat->dtpda_cpu,
+					 DTRACEDROP_PRINCIPAL, drops);
+#else
+		return DTRACE_WORKSTATUS_OKAY;
 #endif
-
-		/* FIXME: To be implemented */
-		return DTRACE_WORKSTATUS_ERROR;
 	} else
 		return DTRACE_WORKSTATUS_ERROR;
 }
@@ -2670,6 +2675,9 @@ dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, dt_peb_t *peb,
 	flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
 	quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
 
+	if (dt_check_cpudrops(dtp, peb->cpu, DTRACEDROP_PRINCIPAL) != 0)
+		return DTRACE_WORKSTATUS_ERROR;
+
 	/*
 	 * Clear the probe data, and fill in data independent fields.
 	 *
diff --git a/libdtrace/dt_handle.c b/libdtrace/dt_handle.c
index 75b01908..4c9b9413 100644
--- a/libdtrace/dt_handle.c
+++ b/libdtrace/dt_handle.c
@@ -313,6 +313,9 @@ dt_handle_cpudrop(dtrace_hdl_t *dtp, processorid_t cpu,
 
 	assert(what == DTRACEDROP_PRINCIPAL || what == DTRACEDROP_AGGREGATION);
 
+	if (howmany == 0)
+		return 0;
+
 	memset(&drop, 0, sizeof(drop));
 	drop.dtdda_handle = dtp;
 	drop.dtdda_cpu = cpu;
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index e240cef0..33c2dec7 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -248,6 +248,11 @@ typedef struct dt_spec_buf {
 	struct dt_hentry dtsb_he;	/* htab links */
 } dt_spec_buf_t;
 
+typedef struct dt_percpu_drops {
+	uint64_t	buf;		/* principal buffer drops */
+	uint64_t	agg;		/* aggregate buffer drops */
+} dt_percpu_drops_t;
+
 /*
  * This will be raised much higher in future: right now it is nailed low
  * because the search-for-free-speculation code is unrolled rather than being a
@@ -381,6 +386,7 @@ struct dtrace_hdl {
 	int dt_stmap_fd;	/* file descriptor for the 'state' BPF map */
 	int dt_aggmap_fd;	/* file descriptor for the 'aggs' BPF map */
 	int dt_genmap_fd;	/* file descriptor for the 'agggen' BPF map */
+	int dt_cpumap_fd;	/* file descriptor for the 'cpuinfo' BPF map */
 	dtrace_handle_err_f *dt_errhdlr; /* error handler, if any */
 	void *dt_errarg;	/* error handler argument */
 	dtrace_handle_drop_f *dt_drophdlr; /* drop handler, if any */
@@ -390,7 +396,10 @@ struct dtrace_hdl {
 	dtrace_handle_setopt_f *dt_setopthdlr; /* setopt handler, if any */
 	void *dt_setoptarg;	/* setopt handler argument */
 	dtrace_status_t dt_status[2]; /* status cache */
+	dt_percpu_drops_t *dt_drops; /* per-CPU drop counters cache */
+	uint64_t dt_specdrops;	/* consumer-side spec drops counter */
 	int dt_statusgen;	/* current status generation */
+	hrtime_t dt_laststatus;	/* last status */
 	hrtime_t dt_lastswitch;	/* last switch of buffer data */
 	hrtime_t dt_lastagg;	/* last snapshot of aggregation data */
 	dt_list_t dt_spec_bufs_draining; /* List of spec bufs being drained */
@@ -696,6 +705,9 @@ extern int dt_set_errno(dtrace_hdl_t *, int);
 extern void dt_set_errmsg(dtrace_hdl_t *, const char *, const char *,
     const char *, int, const char *, va_list);
 
+extern void dt_get_status(dtrace_hdl_t *dtp);
+extern int dt_check_cpudrops(dtrace_hdl_t *dtp, processorid_t cpu,
+			     dtrace_dropkind_t what);
 extern int dt_ioctl(dtrace_hdl_t *, unsigned long int, void *);
 extern int dt_cpu_status(dtrace_hdl_t *, int);
 extern long dt_sysconf(dtrace_hdl_t *, int);
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 4818f61d..52e23fe5 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -802,6 +802,7 @@ dt_vopen(int version, int flags, int *errp,
 	/*
 	 * Set the default data rates.
 	 */
+	dtp->dt_options[DTRACEOPT_STATUSRATE] = NANOSEC;	/* 1s */
 	dtp->dt_options[DTRACEOPT_SWITCHRATE] = 0;
 	dtp->dt_options[DTRACEOPT_AGGRATE] = 0;
 
@@ -827,6 +828,11 @@ dt_vopen(int version, int flags, int *errp,
 		  dtp->dt_conf.num_online_cpus, dtp->dt_conf.num_possible_cpus,
 		  dtp->dt_conf.max_cpuid);
 
+	dtp->dt_drops = calloc((dtp->dt_conf.max_cpuid + 1),
+			       sizeof(dt_percpu_drops_t));
+	if (dtp->dt_drops == NULL)
+		return set_open_errno(dtp, errp, EDT_NOMEM);
+
 	if (flags & DTRACE_O_LP64)
 		dtp->dt_conf.dtc_ctfmodel = CTF_MODEL_LP64;
 	else if (flags & DTRACE_O_ILP32)
@@ -1296,6 +1302,7 @@ dtrace_close(dtrace_hdl_t *dtp)
 	free(dtp->dt_sprintf_buf);
 	pthread_mutex_destroy(&dtp->dt_sprintf_lock);
 
+	free(dtp->dt_drops);
 	free(dtp->dt_module_path);
 	free(dtp);
 
diff --git a/libdtrace/dt_state.h b/libdtrace/dt_state.h
index 9a82f994..010ffb61 100644
--- a/libdtrace/dt_state.h
+++ b/libdtrace/dt_state.h
@@ -2,7 +2,7 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  *
- * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _DT_STATE_H
@@ -21,6 +21,10 @@ typedef enum dt_state_elem {
 	DT_STATE_ACTIVITY = 0,		/* activity state of the session */
 	DT_STATE_BEGANON,		/* cpu BEGIN probe executed on */
 	DT_STATE_ENDEDON,		/* cpu END probe executed on */
+	DT_STATE_SPEC_DROPS,		/* speculation buffer drops */
+	DT_STATE_SPEC_BUSY,		/* all speculation buffers are busy */
+	DT_STATE_SPEC_UNAVAIL,		/* no available speculation buffer */
+	DT_STATE_DYNVAR_DROPS,		/* dynamic variable drops */
 	DT_STATE_NUM_ELEMS
 } dt_state_elem_t;
 
diff --git a/libdtrace/dt_work.c b/libdtrace/dt_work.c
index 179dbd51..d28e5e5a 100644
--- a/libdtrace/dt_work.c
+++ b/libdtrace/dt_work.c
@@ -9,6 +9,7 @@
 #include <dt_peb.h>
 #include <dt_probe.h>
 #include <dt_bpf.h>
+#include <dt_bpf_maps.h>
 #include <dt_state.h>
 #include <stddef.h>
 #include <errno.h>
@@ -30,15 +31,96 @@ END_probe(void)
 {
 }
 
+int
+dt_check_cpudrops(dtrace_hdl_t *dtp, processorid_t cpu, dtrace_dropkind_t what)
+{
+	dt_bpf_cpuinfo_t	*ci;
+	uint32_t		cikey = 0;
+	uint64_t		cnt;
+	int			rval = 0;
+
+	assert(what == DTRACEDROP_PRINCIPAL || what == DTRACEDROP_AGGREGATION);
+
+	ci = dt_calloc(dtp, dtp->dt_conf.num_possible_cpus,
+		       sizeof(dt_bpf_cpuinfo_t));
+	if (ci == NULL)
+		return dt_set_errno(dtp, EDT_NOMEM);
+
+	if (dt_bpf_map_lookup(dtp->dt_cpumap_fd, &cikey, ci) == -1) {
+		rval = dt_set_errno(dtp, EDT_BPF);
+		goto fail;
+	}
+
+	if (what == DTRACEDROP_PRINCIPAL) {
+		cnt = ci[cpu].buf_drops - dtp->dt_drops[cpu].buf;
+		dtp->dt_drops[cpu].buf = ci[cpu].buf_drops;
+	} else {
+		cnt = ci[cpu].agg_drops - dtp->dt_drops[cpu].agg;
+		dtp->dt_drops[cpu].agg = ci[cpu].agg_drops;
+	}
+
+	rval = dt_handle_cpudrop(dtp, cpu, what, cnt);
+
+fail:
+	dt_free(dtp, ci);
+	return rval;
+}
+
+static void
+dt_add_local_status(dtrace_hdl_t *dtp)
+{
+	/*
+	 * We work on the most recently retrieved status, which is
+	 * (dt_statusgen ^ 1) because the dt_get_status() function moves
+	 * dt_statusgen after data retrieval *and* we get called after that
+	 * data retrieval.
+	 */
+	dtrace_status_t	*st = &dtp->dt_status[dtp->dt_statusgen ^ 1];
+
+	st->dtst_specdrops += dtp->dt_specdrops;
+}
+
+void
+dt_get_status(dtrace_hdl_t *dtp)
+{
+	dtrace_status_t	*st = &dtp->dt_status[dtp->dt_statusgen];
+
+	st->dtst_specdrops = dt_state_get(dtp, DT_STATE_SPEC_DROPS);
+	st->dtst_specdrops_busy = dt_state_get(dtp, DT_STATE_SPEC_BUSY);
+	st->dtst_specdrops_unavail = dt_state_get(dtp, DT_STATE_SPEC_UNAVAIL);
+	st->dtst_dyndrops = dt_state_get(dtp, DT_STATE_DYNVAR_DROPS);
+	dtp->dt_statusgen ^= 1;
+
+	dt_add_local_status(dtp);
+}
+
 int
 dtrace_status(dtrace_hdl_t *dtp)
 {
+	dtrace_optval_t	interval = dtp->dt_options[DTRACEOPT_STATUSRATE];
+	hrtime_t	now = gethrtime();
+	int		gen;
+
 	if (!dtp->dt_active)
 		return DTRACE_STATUS_NONE;
 
 	if (dtp->dt_stopped)
 		return DTRACE_STATUS_STOPPED;
 
+	if (dtp->dt_laststatus != 0) {
+		if (now - dtp->dt_laststatus < interval)
+			return DTRACE_STATUS_NONE;
+
+		dtp->dt_laststatus += interval;
+	} else
+		dtp->dt_laststatus = now;
+
+	dt_get_status(dtp);
+	gen = dtp->dt_statusgen;
+	if (dt_handle_status(dtp, &dtp->dt_status[gen],
+			     &dtp->dt_status[gen ^ 1]) == -1)
+                return DTRACE_STATUS_ERROR;
+
 	if (dt_state_get_activity(dtp) == DT_ACTIVITY_DRAINING) {
 		if (!dtp->dt_stopped)
 			dtrace_stop(dtp);
@@ -141,8 +223,6 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
 int
 dtrace_stop(dtrace_hdl_t *dtp)
 {
-	int		gen = dtp->dt_statusgen;
-
 	if (dtp->dt_stopped)
 		return 0;
 
@@ -157,10 +237,6 @@ dtrace_stop(dtrace_hdl_t *dtp)
 	dtp->dt_stopped = 1;
 	dtp->dt_endedon = dt_state_get_endedon(dtp);
 
-	if (dt_handle_status(dtp, &dtp->dt_status[gen ^ 1],
-	    &dtp->dt_status[gen]) == -1)
-		return -1;
-
 	return 0;
 }
 
@@ -169,6 +245,7 @@ dtrace_work(dtrace_hdl_t *dtp, FILE *fp, dtrace_consume_probe_f *pfunc,
 	    dtrace_consume_rec_f *rfunc, void *arg)
 {
 	dtrace_workstatus_t	rval;
+	int			gen;
 
 	switch (dtrace_status(dtp)) {
 	case DTRACE_STATUS_EXITED:
@@ -189,5 +266,25 @@ dtrace_work(dtrace_hdl_t *dtp, FILE *fp, dtrace_consume_probe_f *pfunc,
 	    DTRACE_WORKSTATUS_ERROR)
 		return DTRACE_WORKSTATUS_ERROR;
 
+	/*
+	 * If we are stopped, we use dt_get_status() to get any potential
+	 * pending speculation drops because we want to ensure that old and new
+	 * counts for other drops are identical (lest they be reported more
+	 * than once).
+	 *
+	 * If we are not stopped, we use dt_add_local_status() to adjust the
+	 * current drop counters without retrieving producer counts (since they
+	 * might have changed and we do not want to report them yet).
+	 */
+	if (!dtp->dt_stopped) {
+		dt_add_local_status(dtp);
+		gen = dtp->dt_statusgen ^ 1;
+	} else {
+		dt_get_status(dtp);
+		gen = dtp->dt_statusgen;
+	}
+
+	dt_handle_status(dtp, &dtp->dt_status[gen], &dtp->dt_status[gen ^ 1]);
+
 	return rval;
 }
diff --git a/libdtrace/dtrace.h b/libdtrace/dtrace.h
index 51c8e081..0568355c 100644
--- a/libdtrace/dtrace.h
+++ b/libdtrace/dtrace.h
@@ -199,6 +199,7 @@ typedef int dtrace_consume_rec_f(const dtrace_probedata_t *data,
 extern int dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg);
 
+#define	DTRACE_STATUS_ERROR	-1	/* error occured getting status */
 #define	DTRACE_STATUS_NONE	0	/* no status; not yet time */
 #define	DTRACE_STATUS_OKAY	1	/* status okay */
 #define	DTRACE_STATUS_EXITED	2	/* exit() was called; tracing stopped */
diff --git a/test/unittest/assocs/tst.store_zero_deletes.d b/test/unittest/assocs/tst.store_zero_deletes.d
index 2db9c6c0..739d4e5d 100644
--- a/test/unittest/assocs/tst.store_zero_deletes.d
+++ b/test/unittest/assocs/tst.store_zero_deletes.d
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -12,8 +12,13 @@
  * SECTION: Variables/Thread-Local Variables
  */
 
+/*
+ * We use a dynvarsizde that guarantees that we can only store 4 values without
+ * causing a drop.  Since we use an associative array indexed by an int and
+ * storing an int value, each element will use 20 bytes.
+ */
+#pragma D option dynvarsize=80
 #pragma D option quiet
-#pragma D option dynvarsize=15
 
 BEGIN
 {
diff --git a/test/unittest/drops/drp.DTRACEDROP_AGGREGATION.d b/test/unittest/drops/drp.DTRACEDROP_AGGREGATION.d
index f37a7077..d5b6bf99 100644
--- a/test/unittest/drops/drp.DTRACEDROP_AGGREGATION.d
+++ b/test/unittest/drops/drp.DTRACEDROP_AGGREGATION.d
@@ -1,14 +1,13 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 #pragma D option quiet
-#pragma D option strsize=1024
-#pragma D option aggsize=512
+#pragma D option strsize=512
+#pragma D option aggsize=1024
 
 BEGIN
 {
diff --git a/test/unittest/drops/drp.DTRACEDROP_AGGREGATION.r b/test/unittest/drops/drp.DTRACEDROP_AGGREGATION.r
index 99a09470..cdf3c8b3 100644
--- a/test/unittest/drops/drp.DTRACEDROP_AGGREGATION.r
+++ b/test/unittest/drops/drp.DTRACEDROP_AGGREGATION.r
@@ -1,3 +1,4 @@
 
+  Harding                                                           1
 -- @@stderr --
-dtrace: [DTRACEDROP_AGGREGATION] 4 aggregation drops on CPU #
+dtrace: [DTRACEDROP_AGGREGATION] 3 aggregation drops on CPU #
diff --git a/test/unittest/drops/drp.DTRACEDROP_DYNAMIC.d b/test/unittest/drops/drp.DTRACEDROP_DYNAMIC.d
index a009b136..7b252f6e 100644
--- a/test/unittest/drops/drp.DTRACEDROP_DYNAMIC.d
+++ b/test/unittest/drops/drp.DTRACEDROP_DYNAMIC.d
@@ -1,13 +1,12 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
-#pragma D option strsize=1024
-#pragma D option dynvarsize=512
+#pragma D option strsize=512
+#pragma D option dynvarsize=1024
 
 BEGIN
 {
diff --git a/test/unittest/drops/drp.DTRACEDROP_DYNAMIC.r b/test/unittest/drops/drp.DTRACEDROP_DYNAMIC.r
index 8e3fd30e..982bf557 100644
--- a/test/unittest/drops/drp.DTRACEDROP_DYNAMIC.r
+++ b/test/unittest/drops/drp.DTRACEDROP_DYNAMIC.r
@@ -3,4 +3,4 @@
 
 -- @@stderr --
 dtrace: script 'test/unittest/drops/drp.DTRACEDROP_DYNAMIC.d' matched 2 probes
-dtrace: [DTRACEDROP_DYNAMIC] 1 dynamic variable drop
+dtrace: [DTRACEDROP_DYNAMIC] 3 dynamic variable drops
diff --git a/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.d b/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.d
index c88164b4..e385292b 100644
--- a/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.d
+++ b/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.d
@@ -1,18 +1,21 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
-#pragma D option strsize=1024
-#pragma D option bufsize=512
+#pragma D option strsize=1k
+#pragma D option bufsize=3k
 
 BEGIN
 {
 	trace("Harding");
 	trace("Hoover");
+}
+
+BEGIN
+{
 	trace("Nixon");
 	trace("Bush");
 }
diff --git a/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.d b/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.d
index 23ba18ab..a57ff6e4 100644
--- a/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.d
+++ b/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.d
@@ -1,13 +1,12 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
-#pragma D option strsize=1024
-#pragma D option bufsize=512
+#pragma D option strsize=1k
+#pragma D option bufsize=3k
 
 BEGIN
 {
@@ -18,6 +17,10 @@ END
 {
 	trace("Harding");
 	trace("Hoover");
+}
+
+END
+{
 	trace("Nixon");
 	trace("Bush");
 }
diff --git a/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.r b/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.r
index 25877767..ea21c890 100644
--- a/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.r
+++ b/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.r
@@ -1,6 +1,8 @@
                    FUNCTION:NAME
                           :BEGIN 
+                            :END   Harding                            Hoover                           
 
 -- @@stderr --
-dtrace: script 'test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.d' matched 2 probes
+dtrace: script 'test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.end.d' matched 3 probes
+bufsize increased to 4096
 dtrace: [DTRACEDROP_PRINCIPAL] 1 drop on CPU #
diff --git a/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.r b/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.r
index a8f4a208..4f239371 100644
--- a/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.r
+++ b/test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.r
@@ -1,6 +1,8 @@
                    FUNCTION:NAME
+                          :BEGIN   Harding                            Hoover                           
                           :BEGIN 
 
 -- @@stderr --
-dtrace: script 'test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.d' matched 2 probes
+dtrace: script 'test/unittest/drops/drp.DTRACEDROP_PRINCIPAL.d' matched 3 probes
+bufsize increased to 4096
 dtrace: [DTRACEDROP_PRINCIPAL] 1 drop on CPU #
diff --git a/test/unittest/speculation/tst.NoSpecBuffer.r b/test/unittest/speculation/tst.NoSpecBuffer.r
index 88d21d7c..8d6f9d2c 100644
--- a/test/unittest/speculation/tst.NoSpecBuffer.r
+++ b/test/unittest/speculation/tst.NoSpecBuffer.r
@@ -1,3 +1,5 @@
 Speculative buffer ID: 1
 Speculative buffer ID: 0
 i: 2	self->spec: 0
+-- @@stderr --
+dtrace: 1 failed speculation (no speculative buffer available)
diff --git a/test/unittest/speculation/tst.TwoSpecBuffers.r b/test/unittest/speculation/tst.TwoSpecBuffers.r
index 0c6bd3d4..c5b354f3 100644
--- a/test/unittest/speculation/tst.TwoSpecBuffers.r
+++ b/test/unittest/speculation/tst.TwoSpecBuffers.r
@@ -2,3 +2,5 @@ Speculation ID: 1
 Speculation ID: 2
 Speculation ID: 0
 Successfully got two speculative buffers
+-- @@stderr --
+dtrace: 1 failed speculation (no speculative buffer available)
-- 
2.40.1




More information about the DTrace-devel mailing list