[DTrace-devel] [PATCH 7/7] io: Use private TLS variable for submit_bio_wait bio pointer

eugene.loh at oracle.com eugene.loh at oracle.com
Tue Aug 22 21:51:14 UTC 2023


From: Eugene Loh <eugene.loh at oracle.com>

One of the problems with the wait-done probe is that it needs
fbt::submit_bio_wait:return and needs the (struct bio *) that
is available at submit_bio_wait:entry.  Further, multiple such
calls may be pending concurrently on a CPU.  So, we use TLS to
make sure we pick up the right bio pointer.

This means that the io-provider trampoline generation has to
access the io-private TLS variable.

Ideally, it would insert this private variable into the TLS hash
table.  Unfortunately, io trampoline generation takes place after
the BPF maps, including the TLS table, have been created.

For now, just stick the private-variable insertion into some
random spot in the parser.  This is extremely crude, but for
now it gets the job done.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_parser.c  |   3 +-
 libdtrace/dt_prov_io.c | 119 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/libdtrace/dt_parser.c b/libdtrace/dt_parser.c
index 870faaa9..b13941d9 100644
--- a/libdtrace/dt_parser.c
+++ b/libdtrace/dt_parser.c
@@ -4782,6 +4782,7 @@ static dt_node_t *(*dt_cook_funcs[])(dt_node_t *, uint_t) = {
 	dt_cook_none		/* DT_NODE_TRAMPOLINE */
 };
 
+uint_t get_id_TLS(dtrace_hdl_t *dtp);           /* FIXME: Ugly hack.  See dt_prov_io.c. */
 /*
  * Recursively cook the parse tree starting at the specified node.  The idflags
  * parameter is used to indicate the type of reference (r/w) and is applied to
@@ -4791,7 +4792,7 @@ dt_node_t *
 dt_node_cook(dt_node_t *dnp, uint_t idflags)
 {
 	int oldlineno = yylineno;
-
+get_id_TLS(yypcb->pcb_hdl);           /* FIXME: Ugly hack.  See dt_prov_io.c. */
 	yylineno = dnp->dn_line;
 
 	dnp = dt_cook_funcs[dnp->dn_kind](dnp, idflags);
diff --git a/libdtrace/dt_prov_io.c b/libdtrace/dt_prov_io.c
index ebca3520..bae289ba 100644
--- a/libdtrace/dt_prov_io.c
+++ b/libdtrace/dt_prov_io.c
@@ -440,6 +440,58 @@ static void io_xfs_args(dtrace_hdl_t *dtp, dt_irlist_t *dlp, uint_t exitlbl, con
 	emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
 }
 
+/*
+ * The io provider uses a special, private TLS variable.  Here, we look
+ * up its id, or insert such a variable if it does not already exist.
+ *
+ * This code mimics insertion in either:
+ *   - dt_node_decl()
+ *   - dt_xcook_ident(...)
+ */
+#if 0
+static uint_t get_id_TLS(dtrace_hdl_t *dtp)
+#else
+/*
+ * FIXME:
+ * It is natural to create the TLS variable while io trampoline code is generated.
+ * That is when we know that the variable is needed.  Unfortunately, this is
+ * during dt_bpf_load_progs().  Meanwhile, the BPF map is created (or not) during
+ * dt_bpf_gmap_create(), which comes earlier.  So io-provider trampoline code
+ * generation is too late to impact the BPF map.
+ *
+ * Further, set_storage() gets dtp from yypcb, so it has to be called at special
+ * times.
+ *
+ * For now, a get_id_TLS() call is issued from the parser just to get the job done,
+ * however awkwardly.
+ */
+uint_t get_id_TLS(dtrace_hdl_t *dtp)
+#endif
+{
+	dt_idhash_t *dhp = dtp->dt_tls;
+	const char name[] = "private TLS variable for io provider";
+	dt_ident_t *idp = dt_idhash_lookup(dhp, name);
+
+	if (idp) {
+		/* If it already exists, use its di_id. */
+		return idp->di_id;
+	} else {
+		/* Otherwise, insert it.  Its flags and attributes hardly matter. */
+		uint_t id = 0;
+
+		if (dt_idhash_nextid(dhp, &id) == -1)
+			xyerror(D_ID_OFLOW, "cannot create %s: limit on number of %s variables exceeded\n", name, dt_idhash_name(dhp));
+
+		idp = dt_idhash_insert(dhp, name, 0, DT_IDFLG_TLS, id, _dtrace_defattr, 0, NULL, NULL, 0);
+		if (idp == NULL)
+			longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
+
+		dt_ident_set_storage(idp, 8, 8);
+
+		return id;
+	}
+}
+
 /*
  * Generate a BPF trampoline for a SDT probe.
  *
@@ -542,6 +594,73 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
 			/* Save %r0 as the first arg. */
 			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
 		}
+	} else if (strcmp(prp->desc->prb, "wait-done") == 0 && strcmp(uprp->desc->fun, "submit_bio_wait") == 0) {
+		/*
+		 * For wait-done, we need to instrument submit_bio_wait(struct bio *).
+		 * Upon entry, we store the bio pointer into a special TLS location.
+		 * Upon return, we retrieve the pointer (and store a 0 back to the
+		 * TLS variable).
+		 */
+		uint_t varid = get_id_TLS(dtp) - DIF_VAR_OTHER_UBASE;
+		dt_ident_t *fnp = dt_dlib_get_func(dtp, "dt_get_tvar");
+		dt_ident_t *zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
+
+		assert(fnp);
+		assert(zero_off);
+
+		/* If this is the return probe, retrieve the bio pointer from TLS. */
+		if (strcmp(uprp->desc->prb, "return") == 0) {
+			uint_t Lnull = dt_irlist_label(dlp);
+
+			/* Call dt_get_tvar() for our private io-provider TLS variable. */
+			emit(dlp,  BPF_MOV_IMM(BPF_REG_1, varid));
+			emit(dlp,  BPF_MOV_IMM(BPF_REG_2, 0));
+			emit(dlp,  BPF_MOV_IMM(BPF_REG_3, 0));
+			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_9, DCTX_STRTAB));
+			emite(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -1), zero_off);
+			emite(dlp, BPF_CALL_FUNC(fnp->di_id), fnp);
+
+			/* If we got a nonzero address, load from it. */
+			emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lnull));
+			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_0, 0));
+			emitl(dlp, Lnull,
+				   BPF_NOP());
+
+			/* Store the retrieved value (bio pointer) as arg0. */
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+		}
+
+		/*
+		 * Store (update) the TLS copy of the bio pointer:
+		 *   - return probe: store 0 (clear TLS, freeing storage)
+		 *   - entry probe: store arg0 (for the return probe to use later)
+		 */
+		emit(dlp,  BPF_MOV_IMM(BPF_REG_1, varid));
+		emit(dlp,  BPF_MOV_IMM(BPF_REG_2, 1));
+		if (strcmp(uprp->desc->prb, "return") == 0)
+			emit(dlp,  BPF_MOV_IMM(BPF_REG_3, 0));
+		else
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_9, DCTX_STRTAB));
+		emite(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -1), zero_off);
+		emite(dlp, BPF_CALL_FUNC(fnp->di_id), fnp);
+
+		/*
+		 * At this point, the entry probe only has the TLS variable
+		 * address.  It has yet actually to store arg0 there,
+		 * provided the address is nonzero.
+		 */
+		if (strcmp(uprp->desc->prb, "entry") == 0) {
+			uint_t Lnull = dt_irlist_label(dlp);
+
+			emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lnull));
+			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+			emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_0, 0, BPF_REG_1));
+			emitl(dlp, Lnull,
+				   BPF_NOP());
+
+			return 1;
+		}
 	} else {
 		get_cpuinfo(dtp, dlp, exitlbl);
 
-- 
2.18.4




More information about the DTrace-devel mailing list