[DTrace-devel] [PATCH] io: adjust io provider for NFS tracepoint variants

Kris Van Hees kris.van.hees at oracle.com
Tue Jan 7 22:44:23 UTC 2025


Kernels prior to 5.6.0 pass 3 arguments (derived from the NFS hdr)
to the nfs_initiate_read raw tracepoint, whereas kernels as of 5.6.0
pass just the NFS hdr.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 libdtrace/dt_prov_io.c | 127 +++++++++++++++++++++++++++++++++++------
 1 file changed, 108 insertions(+), 19 deletions(-)

diff --git a/libdtrace/dt_prov_io.c b/libdtrace/dt_prov_io.c
index 385dc792..45cb875a 100644
--- a/libdtrace/dt_prov_io.c
+++ b/libdtrace/dt_prov_io.c
@@ -63,17 +63,9 @@ static probe_dep_t	probes[] = {
 	{ "start",
 	  DTRACE_PROBESPEC_NAME,	"rawtp:block::block_bio_queue" },
 	{ "start",
-	  DTRACE_PROBESPEC_NAME,	"rawtp:nfs::nfs_initiate_read",
-	  DT_VERSION_NUMBER(5, 6, 0), },
+	  DTRACE_PROBESPEC_NAME,	"rawtp:nfs::nfs_initiate_read" },
 	{ "start",
-	  DTRACE_PROBESPEC_NAME,	"fbt:nfs:nfs_initiate_read:entry",
-	  0, DT_VERSION_NUMBER(5, 5, 19) },
-	{ "start",
-	  DTRACE_PROBESPEC_NAME,	"rawtp:nfs::nfs_initiate_write",
-	  DT_VERSION_NUMBER(5, 6, 0), },
-	{ "start",
-	  DTRACE_PROBESPEC_NAME,	"fbt:nfs:nfs_initiate_write:entry",
-	  0, DT_VERSION_NUMBER(5, 5, 19) },
+	  DTRACE_PROBESPEC_NAME,	"rawtp:nfs::nfs_initiate_write" },
 	{ NULL, }
 };
 
@@ -155,12 +147,109 @@ static void deref_r3(dt_irlist_t *dlp, uint_t exitlbl, int addend, int width,
 	emit(dlp, BPF_LOAD(width, reg, BPF_REG_FP, DT_TRAMP_SP_SLOT(0)));
 }
 
+/*
+ * For NFS events, we have to construct a fake struct bio, which we have to
+ * populate from the inode (arg0) and hdr->good_bytes (arg2) arguments the
+ * underlying probe provides.
+ */
+static void io_nfs_args_v1(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl,
+			   const char *prb, const char *uprb)
+{
+	int	off;
+	size_t	siz;
+
+	/*
+	 * Determine the various sizes and offsets we want.
+	 *
+	 *     // Access these fields relative to &bio.
+	 *     struct bio bio = {
+	 *         .bi_opf = ...,
+	 *         .bi_iter.bi_size = ...,      // struct bvec_iter bi_iter
+	 *         .bi_iter.bi_sector = ...,
+	 *         .bi_bdev = 0,		// -or- .bi_disk = 0
+	 *     };
+	 *
+	 *     // Access these fields relative to hdr.
+	 *     struct nfs_pgio_header *hdr;
+	 *     ... = hdr->res.count;            // struct nfs_pgio_res  res
+	 */
+
+	/*
+	 * Declare the -io-bio variable and store its address in %r6.
+	 */
+	dt_cg_tramp_decl_var(pcb, &v_bio);
+	dt_cg_tramp_get_var(pcb, "this->-io-bio", 1, BPF_REG_6);
+
+	/* Fill in bi_opf */
+	off = dt_cg_ctf_offsetof("struct bio", "bi_opf", &siz, 0);
+	siz = bpf_ldst_size(siz, 1);
+	if (strstr(uprb, "read"))
+		emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, REQ_OP_READ));
+	else
+		emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, REQ_OP_WRITE));
+
+	/*
+	 * bio.bi_iter.bi_size = hdr->foo.count;
+	 *
+	 * For the 'start' probe, count is arg2
+	 * For the 'done' probe, count is hdr->res.count (hdr in arg1)
+	 */
+	if (strcmp(prb, "start") == 0) {
+		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(2)));
+	} else {
+		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
+		off = dt_cg_ctf_offsetof("struct nfs_pgio_header", "res", NULL, 0)
+		    + dt_cg_ctf_offsetof("struct nfs_pgio_res", "count", &siz, 0);
+		deref_r3(dlp, exitlbl, off, siz, BPF_REG_0);
+	}
+
+	off = dt_cg_ctf_offsetof("struct bio", "bi_iter", NULL, 0)
+	    + dt_cg_ctf_offsetof("struct bvec_iter", "bi_size", &siz, 0);
+	siz = bpf_ldst_size(siz, 1);
+	emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
+
+	/*
+	 * bio.bi_iter.bi_sector = inode;
+	 */
+	if (strcmp(prb, "start") == 0) {
+		/* inode is arg0 */
+		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+	} else {
+		/* use hdr->inode, hdr is arg1 */
+		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
+
+		off = dt_cg_ctf_offsetof("struct nfs_pgio_header", "inode", &siz, 0);
+		deref_r3(dlp, exitlbl, off, siz, BPF_REG_3);
+	}
+
+	off = dt_cg_ctf_offsetof("struct nfs_inode", "fileid", &siz, 0)
+	    - dt_cg_ctf_offsetof("struct nfs_inode", "vfs_inode", NULL, 0);
+	deref_r3(dlp, exitlbl, off, siz, BPF_REG_0);
+
+	off = dt_cg_ctf_offsetof("struct bio", "bi_iter", NULL, 0)
+	    + dt_cg_ctf_offsetof("struct bvec_iter", "bi_sector", &siz, 0);
+	siz = bpf_ldst_size(siz, 1);
+	emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
+
+	/*
+	 * bio.bi_bdev = 0;
+	 */
+	off = dt_cg_ctf_offsetof("struct bio", "bi_bdev", &siz, 1);
+	if (off == -1)
+		off = dt_cg_ctf_offsetof("struct bio", "bi_disk", &siz, 0);
+	siz = bpf_ldst_size(siz, 1);
+	emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, 0));
+
+	/* Store a pointer to the fake bio in arg0. */
+	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
+}
+
 /*
  * For NFS events, we have to construct a fake struct bio, which we have to
  * populate from the nfs_pgio_header argument the underlying probe provides.
  */
-static void io_nfs_args(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl,
-			const char *prb, const char *uprb)
+static void io_nfs_args_v2(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl,
+			   const char *prb, const char *uprb)
 {
 	int	off;
 	size_t	siz;
@@ -411,6 +500,7 @@ static void io_xfs_args(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl)
  */
 static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
 {
+	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
 	dt_irlist_t	*dlp = &pcb->pcb_ir;
 	dt_probe_t	*prp = pcb->pcb_probe;
 	dt_probe_t	*uprp = pcb->pcb_parent_probe;
@@ -420,13 +510,12 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
 	 * we need to synthesize one.
 	 */
 	if (strcmp(uprp->desc->mod, "nfs") == 0) {
-		/*
-		 * If the underlying probe is an FBT probe, we pass function
-		 * name.  Otherwise, pass probe name.
-		 */
-		io_nfs_args(pcb, dlp, exitlbl, prp->desc->prb,
-			    strcmp(uprp->desc->prb, "entry") == 0
-				? uprp->desc->fun : uprp->desc->prb);
+		if (dtp->dt_kernver < DT_VERSION_NUMBER(5, 6, 0))
+			io_nfs_args_v1(pcb, dlp, exitlbl, prp->desc->prb,
+				       uprp->desc->prb);
+		else
+			io_nfs_args_v2(pcb, dlp, exitlbl, prp->desc->prb,
+				       uprp->desc->prb);
 		goto done;
 	} else if (strcmp(uprp->desc->mod, "xfs") == 0) {
 		io_xfs_args(pcb, dlp, exitlbl);
-- 
2.45.2




More information about the DTrace-devel mailing list