[DTrace-devel] [PATCH v3 01/08] Ensure that BEGIN is the first probe and END the last one

Kris Van Hees kris.van.hees at oracle.com
Wed Sep 16 14:51:41 PDT 2020


DTrace semantics require that the BEGIN probe is the first one to be
recorded in the trace buffers, and that the END probe is the last one
to be recorded in the trace buffers.  This is implemented using the
activity member of the DTrace state that is shared using a BPF map.

The default activity state is INACTIVE which means that no probe will
processed except for the BEGIN probe.  The BEGIN probe will only be
processed when the state is INACTIVE.

Once the BEGIN probe processing has completed, the activity state is
advanced to ACTIVE.  All probes other than BEGIN require the activity
state to be ACTIVE in order for them to be processed.

When the END probe has been processed, the activity state is advanced
to DRAINING.

The BEGIN and END probes are therefore always added to the list of
enablings, even if no clauses are specified for them.  In other words,
the BEGIN and END probes will always be enabled to ensure that the
activity state is updated correctly.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 libdtrace/dt_bpf.c                    |   1 -
 libdtrace/dt_cg.c                     | 124 +++++++++++++++++++++++---
 libdtrace/dt_impl.h                   |   8 +-
 libdtrace/dt_prov_dtrace.c            |  43 ++++++++-
 libdtrace/dt_state.h                  |  10 +++
 test/unittest/begin/tst.begin-first.d |  24 +++++
 6 files changed, 189 insertions(+), 21 deletions(-)
 create mode 100644 test/unittest/begin/tst.begin-first.d

diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index a381a86e..7adcf70d 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -14,7 +14,6 @@
 #include <dtrace.h>
 #include <dt_impl.h>
 #include <dt_probe.h>
-#include <dt_state.h>
 #include <dt_bpf.h>
 #include <port.h>
 
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 2dcf8023..d798752f 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -36,13 +36,14 @@ static void dt_cg_node(dt_node_t *, dt_irlist_t *, dt_regset_t *);
  * The trampoline prologue will populate the dt_dctx_t structure on the stack.
  *
  * The caller should NOT depend on any register values that exist at the end of
- * the trampiline prologue.
+ * the trampoline prologue.
  */
 void
-dt_cg_tramp_prologue(dt_pcb_t *pcb, uint_t lbl_exit)
+dt_cg_tramp_prologue_act(dt_pcb_t *pcb, uint_t lbl_exit, dt_activity_t act)
 {
 	dt_irlist_t	*dlp = &pcb->pcb_ir;
 	dt_ident_t	*mem = dt_dlib_get_map(pcb->pcb_hdl, "mem");
+	dt_ident_t	*state = dt_dlib_get_map(pcb->pcb_hdl, "state");
 	struct bpf_insn	instr;
 
 	assert(mem != NULL);
@@ -63,7 +64,39 @@ dt_cg_tramp_prologue(dt_pcb_t *pcb, uint_t lbl_exit)
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 
 	/*
-	 *	key = 0;                // stw [%fp + DCTX_FP(DCTX_MST)], 0
+	 *	key = DT_STATE_ACTIVITY;// stw [%fp + DCTX_FP(DCTX_MST)],
+	 *				//		DT_STATE_ACTIVITY
+	 *	rc = bpf_map_lookup_elem(&state, &key);
+	 *				// lddw %r1, &state
+	 *				// mov %r2, %fp
+	 *				// add %r2, DCTX_FP(DCTX_MST)
+	 *				// call bpf_map_lookup_elem
+	 *				//     (%r1 ... %r5 clobbered)
+	 *				//     (%r0 = map value)
+	 *	if (rc == 0)		// jeq %r0, 0, lbl_exit
+	 *		goto exit;
+	 *	if (*rc == act)		// ldw %r0, [%r0 + 0]
+	 *		goto exit;	// jne %r0, act, lbl_exit
+	 */
+	instr = BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_MST),
+			      DT_STATE_ACTIVITY);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	dt_cg_xsetx(dlp, state, DT_LBL_NONE, BPF_REG_1, state->di_id);
+	instr = BPF_MOV_REG(BPF_REG_2, BPF_REG_FP);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(DCTX_MST));
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_0, 0);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, act, lbl_exit);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+
+	/*
+	 *	key = 0;		// stw [%fp + DCTX_FP(DCTX_MST)], 0
 	 *	rc = bpf_map_lookup_elem(&mem, &key);
 	 *				// lddw %r1, &mem
 	 *				// mov %r2, %fp
@@ -71,10 +104,10 @@ dt_cg_tramp_prologue(dt_pcb_t *pcb, uint_t lbl_exit)
 	 *				// call bpf_map_lookup_elem
 	 *				//     (%r1 ... %r5 clobbered)
 	 *				//     (%r0 = 'mem' BPF map value)
-	 *	if (rc == 0)            // jeq %r0, 0, lbl_exit
+	 *	if (rc == 0)		// jeq %r0, 0, lbl_exit
 	 *		goto exit;
 	 *				//     (%r0 = pointer to dt_mstate_t)
-	 *	dctx.mst = rc;          // stdw [%fp + DCTX_FP(DCTX_MST)], %r0
+	 *	dctx.mst = rc;		// stdw [%fp + DCTX_FP(DCTX_MST)], %r0
 	 */
 	instr = BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_MST), 0);
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
@@ -91,14 +124,14 @@ dt_cg_tramp_prologue(dt_pcb_t *pcb, uint_t lbl_exit)
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 
 	/*
-	 *      buf = rc + roundup(sizeof(dt_mstate_t), 8);
+	 *	buf = rc + roundup(sizeof(dt_mstate_t), 8);
 	 *				// add %r0, roundup(
 	 *						sizeof(dt_mstate_t), 8)
-	 *      *((uint64_t *)&buf[0]) = 0;
+	 *	*((uint64_t *)&buf[0]) = 0;
 	 *				// stdw [%r0 + 0], 0
-	 *      buf += 8;		// add %r0, 8
+	 *	buf += 8;		// add %r0, 8
 	 *				//     (%r0 = pointer to buffer space)
-	 *      dctx.buf = buf;		// stdw [%fp + DCTX_FP(DCTX_BUF)], %r0
+	 *	dctx.buf = buf;		// stdw [%fp + DCTX_FP(DCTX_BUF)], %r0
 	 */
 	instr = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0,
 			      roundup(sizeof(dt_mstate_t), 8));
@@ -111,6 +144,12 @@ dt_cg_tramp_prologue(dt_pcb_t *pcb, uint_t lbl_exit)
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 }
 
+void
+dt_cg_tramp_prologue(dt_pcb_t *pcb, uint_t lbl_exit)
+{
+	dt_cg_tramp_prologue_act(pcb, lbl_exit, DT_ACTIVITY_ACTIVE);
+}
+
 static int
 dt_cg_call_clause(dtrace_hdl_t *dtp, dt_ident_t *idp, dt_irlist_t *dlp)
 {
@@ -132,17 +171,23 @@ dt_cg_call_clause(dtrace_hdl_t *dtp, dt_ident_t *idp, dt_irlist_t *dlp)
 	return 0;
 }
 
-void
-dt_cg_tramp_epilogue(dt_pcb_t *pcb, uint_t lbl_exit)
+static void
+dt_cg_tramp_call_clauses(dt_pcb_t *pcb)
 {
 	dt_irlist_t	*dlp = &pcb->pcb_ir;
-	struct bpf_insn	instr;
 
 	/*
 	 *	dt_clause(dctx);	//     (repeated for each clause)
 	 */
 	dt_probe_clause_iter(pcb->pcb_hdl, pcb->pcb_probe,
 			     (dt_clause_f *)dt_cg_call_clause, dlp);
+}
+
+static void
+dt_cg_tramp_return(dt_pcb_t *pcb, uint_t lbl_exit)
+{
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	struct bpf_insn	instr;
 
 	/*
 	 * exit:
@@ -156,6 +201,57 @@ dt_cg_tramp_epilogue(dt_pcb_t *pcb, uint_t lbl_exit)
 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 }
 
+void
+dt_cg_tramp_epilogue(dt_pcb_t *pcb, uint_t lbl_exit)
+{
+	dt_cg_tramp_call_clauses(pcb);
+	dt_cg_tramp_return(pcb, lbl_exit);
+}
+
+void
+dt_cg_tramp_epilogue_advance(dt_pcb_t *pcb, uint_t lbl_exit)
+{
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	dt_ident_t	*state = dt_dlib_get_map(pcb->pcb_hdl, "state");
+	struct bpf_insn	instr;
+
+	dt_cg_tramp_call_clauses(pcb);
+
+	/*
+	 *	key = DT_STATE_ACTIVITY;// stw [%fp + DCTX_FP(DCTX_MST)],
+	 *				//		DT_STATE_ACTIVITY
+	 *	rc = bpf_map_lookup_elem(&state, &key);
+	 *				// lddw %r1, &state
+	 *				// mov %r2, %fp
+	 *				// add %r2, DCTX_FP(DCTX_MST)
+	 *				// call bpf_map_lookup_elem
+	 *				//     (%r1 ... %r5 clobbered)
+	 *				//     (%r0 = map value)
+	 *	if (rc == 0)		// jeq %r0, 0, lbl_exit
+	 *		goto exit;
+	 *	(*rc)++;		// mov %r1, 1
+	 *				// xadd [%r0 + 0], %r1
+	 */
+	instr = BPF_STORE_IMM(BPF_W, BPF_REG_FP, DCTX_FP(DCTX_MST),
+			      DT_STATE_ACTIVITY);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	dt_cg_xsetx(dlp, state, DT_LBL_NONE, BPF_REG_1, state->di_id);
+	instr = BPF_MOV_REG(BPF_REG_2, BPF_REG_FP);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DCTX_FP(DCTX_MST));
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_exit);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_MOV_IMM(BPF_REG_1, 1);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+	instr = BPF_XADD_REG(BPF_W, BPF_REG_0, 0, BPF_REG_1);
+	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
+
+	dt_cg_tramp_return(pcb, lbl_exit);
+}
+
 /*
  * Generate the function prologue.
  *
@@ -485,7 +581,7 @@ dt_cg_act_clear(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 	}
 
 	aid = anp->dn_ident;
-        if (aid->di_gen == pcb->pcb_hdl->dt_gen &&
+	if (aid->di_gen == pcb->pcb_hdl->dt_gen &&
 	    !(aid->di_flags & DT_IDFLG_MOD)) {
 		dnerror(dnp, D_CLEAR_AGGBAD,
 			"undefined aggregation: @%s\n", aid->di_name);
@@ -541,7 +637,7 @@ dt_cg_act_denormalize(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind)
 	}
 
 	aid = anp->dn_ident;
-        if (aid->di_gen == pcb->pcb_hdl->dt_gen &&
+	if (aid->di_gen == pcb->pcb_hdl->dt_gen &&
 	    !(aid->di_flags & DT_IDFLG_MOD)) {
 		dnerror(dnp, D_NORMALIZE_AGGBAD,
 			"undefined aggregation: @%s\n", aid->di_name);
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 60095a80..f062aa9b 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -50,6 +50,7 @@ extern "C" {
 #include <dt_pt_regs.h>
 #include <dt_printf.h>
 #include <dt_dctx.h>
+#include <dt_state.h>
 #include <dt_debug.h>
 #include <dt_version.h>
 
@@ -776,8 +777,11 @@ extern void dt_pragma(dt_node_t *);
 extern int dt_reduce(dtrace_hdl_t *, dt_version_t);
 extern void dt_cg(dt_pcb_t *, dt_node_t *);
 extern dt_irnode_t *dt_cg_node_alloc(uint_t, struct bpf_insn);
-extern void dt_cg_tramp_prologue(dt_pcb_t *dtp, uint_t lbl_exit);
-extern void dt_cg_tramp_epilogue(dt_pcb_t *dtp, uint_t lbl_exit);
+extern void dt_cg_tramp_prologue_act(dt_pcb_t *pcb, uint_t lbl_exit,
+				     dt_activity_t act);
+extern void dt_cg_tramp_prologue(dt_pcb_t *pcb, uint_t lbl_exit);
+extern void dt_cg_tramp_epilogue(dt_pcb_t *pcb, uint_t lbl_exit);
+extern void dt_cg_tramp_epilogue_advance(dt_pcb_t *pcb, uint_t lbl_exit);
 extern dtrace_difo_t *dt_as(dt_pcb_t *);
 extern void dt_dis_program(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, FILE *fp);
 extern void dt_dis_difo(const dtrace_difo_t *dp, FILE *fp);
diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
index 76e60d87..94d9de10 100644
--- a/libdtrace/dt_prov_dtrace.c
+++ b/libdtrace/dt_prov_dtrace.c
@@ -34,16 +34,23 @@ static const dtrace_pattr_t	pattr = {
 static int populate(dtrace_hdl_t *dtp)
 {
 	dt_provider_t	*prv;
+	dt_probe_t	*prp;
 	int		n = 0;
 
 	prv = dt_provider_create(dtp, prvname, &dt_dtrace, &pattr);
 	if (prv == NULL)
 		return 0;
 
-	if (tp_probe_insert(dtp, prv, prvname, modname, funname, "BEGIN"))
+	prp = tp_probe_insert(dtp, prv, prvname, modname, funname, "BEGIN");
+	if (prp) {
 		n++;
-	if (tp_probe_insert(dtp, prv, prvname, modname, funname, "END"))
+		dt_list_append(&dtp->dt_enablings, prp);
+	}
+	prp = tp_probe_insert(dtp, prv, prvname, modname, funname, "END");
+	if (prp) {
 		n++;
+		dt_list_append(&dtp->dt_enablings, prp);
+	}
 	if (tp_probe_insert(dtp, prv, prvname, modname, funname, "ERROR"))
 		n++;
 
@@ -67,8 +74,33 @@ static void trampoline(dt_pcb_t *pcb)
 	dt_irlist_t	*dlp = &pcb->pcb_ir;
 	struct bpf_insn	instr;
 	uint_t		lbl_exit = dt_irlist_label(dlp);
+	dt_activity_t	act;
+	int		adv_act;
+
+	/*
+	 * The BEGIN probe should only run when the activity state is INACTIVE.
+	 * At the end of the trampoline (after executing any clauses), the
+	 * state must be advanced to the next state (INACTIVE -> ACTIVE).
+	 *
+	 * The END probe should only run when the activity state is ACTIVE.
+	 * At the end of the trampoline (after executing any clauses), the
+	 * state must be advanced to the next state (ACTIVE -> DRAINING).
+	 *
+	 * Any other probe requires the state tp be ACTIVE, and does not change
+	 * the state.
+	 */
+	if (strcmp(pcb->pcb_probe->desc->prb, "BEGIN") == 0) {
+		act = DT_ACTIVITY_INACTIVE;
+		adv_act = 1;
+	} else if (strcmp(pcb->pcb_probe->desc->prb, "END") == 0) {
+		act = DT_ACTIVITY_ACTIVE;
+		adv_act = 1;
+	} else {
+		act = DT_ACTIVITY_ACTIVE;
+		adv_act = 0;
+	}
 
-	dt_cg_tramp_prologue(pcb, lbl_exit);
+	dt_cg_tramp_prologue_act(pcb, lbl_exit, act);
 
 	/*
 	 * We cannot assume anything about the state of any registers so set up
@@ -154,7 +186,10 @@ static void trampoline(dt_pcb_t *pcb)
 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
 	}
 
-	dt_cg_tramp_epilogue(pcb, lbl_exit);
+	if (adv_act)
+		dt_cg_tramp_epilogue_advance(pcb, lbl_exit);
+	else
+		dt_cg_tramp_epilogue(pcb, lbl_exit);
 }
 
 static char *uprobe_spec(dtrace_hdl_t *dtp, const char *prb)
diff --git a/libdtrace/dt_state.h b/libdtrace/dt_state.h
index 5c24d47d..219a00fc 100644
--- a/libdtrace/dt_state.h
+++ b/libdtrace/dt_state.h
@@ -24,4 +24,14 @@ typedef enum dt_state_elem {
 	DT_STATE_NUM_ELEMS
 } dt_state_elem_t;
 
+/*
+ * DTrace activity state
+ */
+typedef enum dt_activity {
+	DT_ACTIVITY_INACTIVE = 0,	/* tracing not started */
+	DT_ACTIVITY_ACTIVE,		/* tracing in progress */
+	DT_ACTIVITY_DRAINING,		/* tracing being stopped */
+	DT_ACTIVITY_STOPPED		/* tracing stopped */
+} dt_activity_t;
+
 #endif /* _DT_STATE_H */
diff --git a/test/unittest/begin/tst.begin-first.d b/test/unittest/begin/tst.begin-first.d
new file mode 100644
index 00000000..a24d07d9
--- /dev/null
+++ b/test/unittest/begin/tst.begin-first.d
@@ -0,0 +1,24 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+/*
+ * ASSERTION: BEGIN probe first before all other probes.
+ *
+ * SECTION: dtrace Provider
+ */
+
+#pragma D option quiet
+
+BEGIN
+{
+	exit(0);
+}
+
+profile-200us
+{
+	exit(1);
+}
-- 
2.28.0




More information about the DTrace-devel mailing list