[DTrace-devel] [PATCH v2 12/12] Implement the io provider
Kris Van Hees
kris.van.hees at oracle.com
Fri Jan 5 05:33:56 UTC 2024
Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
libdtrace/Build | 4 +-
libdtrace/dt_open.c | 1 +
libdtrace/dt_prov_io.c | 540 ++++++++++++++++++++++++
libdtrace/dt_provider.h | 3 +-
test/demo/io/applicat.d | 1 -
test/demo/io/iocpu.d | 1 -
test/demo/io/iothrough.d | 1 -
test/demo/io/whoio.d | 1 -
test/unittest/io/check_io_probe_args.sh | 273 ++++++++++++
test/unittest/io/dump_io_probe_args.d | 47 +++
test/unittest/io/tst.local.sh | 3 +-
test/unittest/io/tst.local2.sh | 100 +++++
test/unittest/io/tst.lv-done.r | 17 +
test/unittest/io/tst.lv-done.r.p | 5 +
test/unittest/io/tst.lv-done.sh | 11 +
test/unittest/io/tst.lv-start.r | 17 +
test/unittest/io/tst.lv-start.r.p | 1 +
test/unittest/io/tst.lv-start.sh | 11 +
test/unittest/io/tst.lv-wait-done.r | 17 +
test/unittest/io/tst.lv-wait-done.r.p | 1 +
test/unittest/io/tst.lv-wait-done.sh | 11 +
test/unittest/io/tst.lv-wait-start.r | 17 +
test/unittest/io/tst.lv-wait-start.r.p | 1 +
test/unittest/io/tst.lv-wait-start.sh | 11 +
test/unittest/io/tst.nfs.sh | 4 +-
test/unittest/io/tst.nfs2.sh | 102 +++++
test/unittest/io/tst.wait.sh | 3 +-
27 files changed, 1192 insertions(+), 12 deletions(-)
create mode 100644 libdtrace/dt_prov_io.c
create mode 100755 test/unittest/io/check_io_probe_args.sh
create mode 100644 test/unittest/io/dump_io_probe_args.d
create mode 100755 test/unittest/io/tst.local2.sh
create mode 100644 test/unittest/io/tst.lv-done.r
create mode 100755 test/unittest/io/tst.lv-done.r.p
create mode 100755 test/unittest/io/tst.lv-done.sh
create mode 100644 test/unittest/io/tst.lv-start.r
create mode 120000 test/unittest/io/tst.lv-start.r.p
create mode 100755 test/unittest/io/tst.lv-start.sh
create mode 100644 test/unittest/io/tst.lv-wait-done.r
create mode 120000 test/unittest/io/tst.lv-wait-done.r.p
create mode 100755 test/unittest/io/tst.lv-wait-done.sh
create mode 100644 test/unittest/io/tst.lv-wait-start.r
create mode 120000 test/unittest/io/tst.lv-wait-start.r.p
create mode 100755 test/unittest/io/tst.lv-wait-start.sh
create mode 100755 test/unittest/io/tst.nfs2.sh
diff --git a/libdtrace/Build b/libdtrace/Build
index cbeefe44..85d0c783 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -1,5 +1,5 @@
# Oracle Linux DTrace.
-# Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at
# http://oss.oracle.com/licenses/upl.
@@ -49,6 +49,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
dt_prov_cpc.c \
dt_prov_dtrace.c \
dt_prov_fbt.c \
+ dt_prov_io.c \
dt_prov_ip.c \
dt_prov_lockstat.c \
dt_prov_proc.c \
@@ -101,6 +102,7 @@ dt_proc.c_CFLAGS := -Wno-pedantic
dt_prov_cpc.c_CFLAGS := -Wno-pedantic
dt_prov_dtrace.c_CFLAGS := -Wno-pedantic
dt_prov_fbt.c_CFLAGS := -Wno-pedantic
+dt_prov_io.c_CFLAGS := -Wno-pedantic
dt_prov_ip.c_CFLAGS := -Wno-pedantic
dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
dt_prov_proc.c_CFLAGS := -Wno-pedantic
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 0be0ce9d..254cf3c3 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -68,6 +68,7 @@ static const dt_provimpl_t *dt_providers[] = {
&dt_dtrace, /* list dt_dtrace first */
&dt_cpc,
&dt_fbt,
+ &dt_io,
&dt_ip,
&dt_lockstat,
&dt_proc,
diff --git a/libdtrace/dt_prov_io.c b/libdtrace/dt_prov_io.c
new file mode 100644
index 00000000..37d08937
--- /dev/null
+++ b/libdtrace/dt_prov_io.c
@@ -0,0 +1,540 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ *
+ * The 'io' SDT provider for DTrace-specific probes.
+ *
+ * These io::: probes all provide three probe arguments:
+ * (bufinfo_t *, devinfo_t *, fileinfo_t *)
+ * where the first two are populated by translators based on a 'struct bio *'
+ * argument provided by the trampoline code. The third probe argument is
+ * always 0 on Linux.
+ *
+ * Most underlying probes provide the bio pointer as an argument.
+ *
+ * The nfs_* and xfs_* underlying probes do not provide a bio pointer. For
+ * them, we construct a "fake" struct bio in the -io-bio TLS variable based on
+ * the implementation specific arguments.
+ *
+ * For the submit_bio_wait-based probe, we store the bio pointer in the
+ * -io-bio-ptr TLS variable at function entry, and retrieve it at function
+ * return.
+ */
+#include <assert.h>
+#include <errno.h>
+
+#include "dt_dctx.h"
+#include "dt_cg.h"
+#include "dt_provider_sdt.h"
+#include "dt_probe.h"
+
+/* Defined in include/linux/blk_types.h */
+#define REQ_OP_READ 0
+#define REQ_OP_WRITE 1
+/* Defined in fs/xfs/xfs_buf.h */
+#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
+
+static const char prvname[] = "io";
+static const char modname[] = "vmlinux"; // FIXME: Really? Or blank?
+
+/*
+ * If the set of functions in the fbt probes changes,
+ * update the list in test/unittest/io/tst.fbt_probes.r.
+ */
+static probe_dep_t probes[] = {
+ { "wait-start",
+ DTRACE_PROBESPEC_NAME, "fbt::submit_bio_wait:entry" },
+ { "wait-start",
+ DTRACE_PROBESPEC_NAME, "rawtp:xfs::xfs_buf_iowait" },
+ { "wait-done",
+ DTRACE_PROBESPEC_FUNC, "fbt::submit_bio_wait" },
+ { "wait-done",
+ DTRACE_PROBESPEC_NAME, "rawtp:xfs::xfs_buf_iowait_done" },
+ { "done",
+ DTRACE_PROBESPEC_NAME, "rawtp:block::block_bio_complete" },
+ { "done",
+ DTRACE_PROBESPEC_NAME, "rawtp:block::block_rq_complete" },
+ { "done",
+ DTRACE_PROBESPEC_NAME, "rawtp:nfs::nfs_readpage_done" },
+ { "done",
+ DTRACE_PROBESPEC_NAME, "rawtp:nfs::nfs_writeback_done" },
+ { "start",
+ DTRACE_PROBESPEC_NAME, "rawtp:block::block_bio_queue" },
+ { "start",
+ DTRACE_PROBESPEC_NAME, "rawtp:nfs::nfs_initiate_read",
+ DT_VERSION_NUMBER(5, 6, 0), },
+ { "start",
+ DTRACE_PROBESPEC_NAME, "fbt:nfs:nfs_initiate_read:entry",
+ 0, DT_VERSION_NUMBER(5, 5, 19) },
+ { "start",
+ DTRACE_PROBESPEC_NAME, "rawtp:nfs::nfs_initiate_write",
+ DT_VERSION_NUMBER(5, 6, 0), },
+ { "start",
+ DTRACE_PROBESPEC_NAME, "fbt:nfs:nfs_initiate_write:entry",
+ 0, DT_VERSION_NUMBER(5, 5, 19) },
+ { NULL, }
+};
+
+/*
+ * All four probes have three probe args. The first two will be extracted
+ * by a translator from the (struct bio *) we supply. The (struct file *)
+ * we supply will be 0 in all cases.
+ */
+static probe_arg_t probe_args[] = {
+ { "start", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
+ { "start", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
+ { "start", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
+ { "done", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
+ { "done", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
+ { "done", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
+ { "wait-start", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
+ { "wait-start", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
+ { "wait-start", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
+ { "wait-done", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
+ { "wait-done", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
+ { "wait-done", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
+ { NULL, }
+};
+
+/* List of provider-specific TLS variables that need to be added. */
+static dt_ident_t tls_vars[] = {
+ { "-io-bio", DT_IDENT_SCALAR, DT_IDFLG_TLS | DT_IDFLG_WRITE, 0,
+ DT_ATTR_STABCMN, DT_VERS_2_0, &dt_idops_type, "struct bio" },
+ { "-io-bio-ptr", DT_IDENT_SCALAR, DT_IDFLG_TLS | DT_IDFLG_WRITE, 0,
+ DT_ATTR_STABCMN, DT_VERS_2_0, &dt_idops_type, "struct bio *" },
+ { NULL, }
+};
+
+static const dtrace_pattr_t pattr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
+/*
+ * Provide all the "io" SDT probes.
+ */
+static int populate(dtrace_hdl_t *dtp)
+{
+ dt_ident_t *vidp;
+ dt_idhash_t *dhp = dtp->dt_tls;
+
+ for (vidp = tls_vars; vidp->di_name != NULL; vidp++) {
+ dt_ident_t *idp;
+ uint_t id;
+ dtrace_typeinfo_t dtt;
+
+ if (dt_idhash_lookup(dhp, vidp->di_name))
+ continue;
+
+ if (dt_idhash_nextid(dhp, &id) == -1)
+ return dt_set_errno(dtp, EDT_ERRABORT);
+
+ if (dt_idhash_insert(dhp, vidp->di_name, vidp->di_kind,
+ vidp->di_flags, id, vidp->di_attr,
+ vidp->di_vers,
+ vidp->di_ops ? vidp->di_ops
+ : &dt_idops_thaw,
+ vidp->di_iarg, 0) == NULL)
+ return dt_set_errno(dtp, EDT_NOMEM);
+
+ idp = dt_idhash_lookup(dhp, vidp->di_name);
+ if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_EVERY, idp->di_iarg,
+ &dtt) == -1)
+ return dt_set_errno(dtp, EDT_CTF);
+
+ dt_ident_type_assign(idp, dtt.dtt_ctfp, dtt.dtt_type);
+ }
+
+ return dt_sdt_populate(dtp, prvname, modname, &dt_io, &pattr,
+ probe_args, probes);
+}
+
+/*
+ * Generate BPF instructions to dereference the pointer in %r3 (after applying
+ * an optional addend) and read a value of the given 'width'. The result isu
+ * stored in register 'reg' (where BPF_REG_0 <= reg <= BPF_REG_5).
+ *
+ * Registers %r0-%r5 will be clobbered. Register 'reg' holds the value.
+ */
+static void deref_r3(dt_irlist_t *dlp, uint_t exitlbl, int addend, int width,
+ int reg)
+{
+ assert(reg >= BPF_REG_0 && reg <= BPF_REG_5);
+
+ /* Use slot 0 as temporary storage. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_TRAMP_SP_SLOT(0)));
+
+ /* Specify the width of the scalar. */
+ emit(dlp, BPF_MOV_IMM(BPF_REG_2, width));
+
+ /* The source address is already in %r3, but add addend, if any. */
+ if (addend)
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, addend));
+
+ /* Perform the copy and check for success. */
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JSLT, BPF_REG_0, 0, exitlbl));
+
+ /* Load the result into the specified register. */
+ width = bpf_ldst_size(width, 0);
+ emit(dlp, BPF_LOAD(width, reg, BPF_REG_FP, DT_TRAMP_SP_SLOT(0)));
+}
+
+/*
+ * For NFS events, we have to construct a fake struct bio, which we have to
+ * populate from the nfs_pgio_header argument the underlying probe provides.
+ */
+static void io_nfs_args(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl,
+ const char *prb, const char *uprb)
+{
+ int off;
+ size_t siz;
+
+ /*
+ * Determine the various sizes and offsets we want.
+ *
+ * // Access these fields relative to &bio.
+ * struct bio bio = {
+ * .bi_opf = ...,
+ * .bi_iter.bi_size = ..., // struct bvec_iter bi_iter
+ * .bi_iter.bi_sector = ...,
+ * };
+ *
+ * // Access these fields relative to hdr.
+ * struct nfs_pgio_header *hdr;
+ * ... = hdr->args.count; // struct nfs_pgio_args args
+ * ... = hdr->res.count; // struct nfs_pgio_res res
+ */
+
+ /*
+ * Prepare the -io-bio TLS variable for construction and store its
+ * address in %r6.
+ */
+ dt_cg_tramp_get_var(pcb, "self->-io-bio", 1, BPF_REG_6);
+
+ /* Fill in bi_opf */
+ off = dt_cg_ctf_offsetof("struct bio", "bi_opf", &siz, 0);
+ siz = bpf_ldst_size(siz, 1);
+ if (strstr(uprb, "read"))
+ emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, REQ_OP_READ));
+ else
+ emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, REQ_OP_WRITE));
+
+ /*
+ * bio.bi_iter.bi_size = hdr->foo.count;
+ *
+ * hdr is:
+ * - arg0 for start
+ * - arg1 for done
+ */
+ if (strcmp(prb, "start") == 0) {
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof("struct nfs_pgio_header", "args", NULL, 0)
+ + dt_cg_ctf_offsetof("struct nfs_pgio_args", "count", &siz, 0);
+ } else {
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
+ off = dt_cg_ctf_offsetof("struct nfs_pgio_header", "res", NULL, 0)
+ + dt_cg_ctf_offsetof("struct nfs_pgio_res", "count", &siz, 0);
+ }
+ deref_r3(dlp, exitlbl, off, siz, BPF_REG_0);
+ off = dt_cg_ctf_offsetof("struct bio", "bi_iter", NULL, 0)
+ + dt_cg_ctf_offsetof("struct bvec_iter", "bi_size", &siz, 0);
+ siz = bpf_ldst_size(siz, 1);
+ emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
+
+ /*
+ * bio.bi_iter.bi_sector = hdr->inode;
+ */
+ /* get hdr */
+ if (strcmp(prb, "start") == 0)
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ else
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
+
+ off = dt_cg_ctf_offsetof("struct nfs_pgio_header", "inode", &siz, 0);
+ deref_r3(dlp, exitlbl, off, siz, BPF_REG_3);
+
+ off = dt_cg_ctf_offsetof("struct nfs_inode", "fileid", &siz, 0)
+ - dt_cg_ctf_offsetof("struct nfs_inode", "vfs_inode", NULL, 0);
+
+ deref_r3(dlp, exitlbl, off, siz, BPF_REG_0);
+
+ off = dt_cg_ctf_offsetof("struct bio", "bi_iter", NULL, 0)
+ + dt_cg_ctf_offsetof("struct bvec_iter", "bi_sector", &siz, 0);
+ siz = bpf_ldst_size(siz, 1);
+ emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
+
+ /* Store a pointer to the fake bio in arg0. */
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
+}
+
+/*
+ * For XFS events, we have to construct a fake struct bio, which we have to
+ * populate from the xfs_buf argument the underlying probe provides.
+ */
+static void io_xfs_args(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl)
+{
+ int off;
+ size_t siz;
+
+ /*
+ * Determine the various sizes and offsets we want.
+ *
+ * // Access these fields relative to &bio.
+ * struct bio bio = {
+ * .bi_opf = ...,
+ * .bi_iter.bi_size = ..., // struct bvec_iter bi_iter
+ * .bi_iter.bi_sector = ...,
+ * .bi_bdev = ..., // -or- .bi_disk = ...
+ * // and .bi_partno = ...
+ * };
+ *
+ * // Access these fields relative to bp.
+ * struct xfs_buf *bp;
+ * ... = (bp)->b_flags;
+ * ... = xfs_buf_daddr(bp);
+ * ... = (bp)->b_length;
+ * ... = (bp)->b_target->bt_bdev; // struct xfs_buftarg *b_target;
+ */
+
+ /*
+ * Prepare the -io-bio TLS variable for construction and store its
+ * address in %r6.
+ */
+ dt_cg_tramp_get_var(pcb, "self->-io-bio", 1, BPF_REG_6);
+
+ /* bio.bi_opf = (bp->b_flags & XBF_WRITE) ? REQ_OP_WRITE : REQ_OP_READ; */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof("struct xfs_buf", "b_flags", &siz, 0);
+ deref_r3(dlp, exitlbl, off, siz, BPF_REG_0);
+ emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_0, XBF_WRITE));
+ {
+ uint_t Lzero = dt_irlist_label(dlp);
+ uint_t Ldone = dt_irlist_label(dlp);
+
+ off = dt_cg_ctf_offsetof("struct bio", "bi_opf", &siz, 0);
+ siz = bpf_ldst_size(siz, 1);
+
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lzero));
+ emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, REQ_OP_WRITE));
+ emit(dlp, BPF_JUMP(Ldone));
+ emitl(dlp, Lzero,
+ BPF_NOP());
+ emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, REQ_OP_READ));
+ emitl(dlp, Ldone,
+ BPF_NOP());
+ }
+
+ /*
+ * bio.bi_iter.bi_size = bp->b_length;
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof("struct xfs_buf", "b_length", &siz, 0);
+ deref_r3(dlp, exitlbl, off, siz, BPF_REG_0);
+ off = dt_cg_ctf_offsetof("struct bio", "bi_iter", NULL, 0)
+ + dt_cg_ctf_offsetof("struct bvec_iter", "bi_size", &siz, 0);
+ siz = bpf_ldst_size(siz, 1);
+ emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
+
+ /*
+ * bio.bi_iter.bi_sector = xfs_buf_daddr(bp);
+ *
+ * In fs/xfs/xfs_buf.h, we have
+ *
+ * xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp)
+ * {
+ * return bp->b_maps[0].bm_bn;
+ * }
+ *
+ * So that gives
+ * bio.bi_iter.bi_sector = bp->b_maps->bm_bn;
+ *
+ * include/linux/blk_types.h
+ * struct bio {
+ * [...]
+ * struct bvec_iter bi_iter;
+ * [...]
+ * }
+ * include/linux/bvec.h
+ * struct bvec_iter {
+ * sector_t bi_sector;
+ * [...]
+ * };
+ * fs/xfs/xfs_buf.h
+ * struct xfs_buf_map {
+ * xfs_daddr_t bm_bn;
+ * [...]
+ * };
+ * struct xfs_buf {
+ * [...]
+ * struct xfs_buf_map *b_maps;
+ * [...]
+ * }
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof("struct xfs_buf", "b_maps", &siz, 0);
+ deref_r3(dlp, exitlbl, off, siz, BPF_REG_3);
+ off = dt_cg_ctf_offsetof("struct xfs_buf_map", "bm_bn", &siz, 0);
+ deref_r3(dlp, exitlbl, off, siz, BPF_REG_0);
+ off = dt_cg_ctf_offsetof("struct bio", "bi_iter", NULL, 0)
+ + dt_cg_ctf_offsetof("struct bvec_iter", "bi_sector", &siz, 0);
+ siz = bpf_ldst_size(siz, 1);
+ emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
+
+ /*
+ * bio.bi_bdev = (bp)->b_target->bt_bdev
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof("struct xfs_buf", "b_target", &siz, 0);
+ assert(siz == sizeof(void *));
+ deref_r3(dlp, exitlbl, off, 8, BPF_REG_3);
+ off = dt_cg_ctf_offsetof("struct xfs_buftarg", "bt_bdev", &siz, 0);
+ deref_r3(dlp, exitlbl, off, siz, BPF_REG_3);
+ off = dt_cg_ctf_offsetof("struct bio", "bi_bdev", &siz, 1);
+ if (off == -1)
+ off = dt_cg_ctf_offsetof("struct bio", "bi_disk", &siz, 0);
+ siz = bpf_ldst_size(siz, 1);
+ emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
+
+ /* Populate bi_partno if it exists. */
+ off = dt_cg_ctf_offsetof("struct bio", "bi_partno", &siz, 1);
+ if (off >= 0) {
+ int poff;
+ size_t psiz;
+
+ poff = dt_cg_ctf_offsetof("struct block_device", "bd_partno", &psiz, 0);
+ siz = bpf_ldst_size(siz, 1);
+ deref_r3(dlp, exitlbl, poff, psiz, BPF_REG_0);
+ emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
+ }
+
+ /* Store a pointer to the fake bio in arg0. */
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
+}
+
+/*
+ * Generate a BPF trampoline for a SDT probe.
+ *
+ * The trampoline function is called when a SDT probe triggers, and it must
+ * satisfy the following prototype:
+ *
+ * int dt_io(void *data)
+ *
+ * The trampoline will populate a dt_dctx_t struct and then call the function
+ * that implements the compiled D clause. It returns the value that it gets
+ * back from that function.
+ */
+static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
+{
+ dt_irlist_t *dlp = &pcb->pcb_ir;
+ dt_probe_t *prp = pcb->pcb_probe;
+ dt_probe_t *uprp = pcb->pcb_parent_probe;
+
+ /*
+ * The nfs_* and xfs_* probes do not pass a bio argument, and therefore
+ * we need to synthesize one.
+ */
+ if (strcmp(uprp->desc->mod, "nfs") == 0) {
+ /*
+ * If the underlying probe is an FBT probe, we pass function
+ * name. Otherwise, pass probe name.
+ */
+ io_nfs_args(pcb, dlp, exitlbl, prp->desc->prb,
+ strcmp(uprp->desc->prb, "entry") == 0
+ ? uprp->desc->fun : uprp->desc->prb);
+ goto done;
+ } else if (strcmp(uprp->desc->mod, "xfs") == 0) {
+ io_xfs_args(pcb, dlp, exitlbl);
+ goto done;
+ }
+
+ /* Handle the start and done probes (non-XFS, non-NFS). */
+ if (strcmp(prp->desc->prb, "start") == 0) {
+ /*
+ * Older kernels pass 2 arguments to block_bio_queue, and bio
+ * is in arg1. Newer kernels have bio in arg0 already.
+ */
+ if (uprp->nargc == 2) {
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+ }
+
+ goto done;
+ } else if (strcmp(prp->desc->prb, "done") == 0) {
+ if (strcmp(uprp->desc->prb, "block_bio_complete") == 0) {
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+ } else {
+ size_t size;
+ size_t off;
+
+ off = dt_cg_ctf_offsetof("struct request", "bio", &size, 0);
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ deref_r3(dlp, exitlbl, off, size, BPF_REG_0);
+
+ /*
+ * The bio member of the request might be NULL. In
+ * that case it is to be ignored.
+ */
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+ }
+
+ goto done;
+ }
+
+ /*
+ * The non-XFS wait-start flavor already has the bio in arg0, so there
+ * is nothing left to be done.
+ */
+ if (strcmp(prp->desc->prb, "wait-start") == 0)
+ goto done;
+
+ /* Handle the non-XFS wait-done flavor. */
+ if (strcmp(prp->desc->prb, "wait-done") == 0) {
+ /*
+ * We need instrument submit_bio_wait(struct bio *):
+ * - on entry, store bio in a TLS var
+ * - on return, get bio (and store 0 to delete the TLS var)
+ * We use a TLS var to distinguish among possible concurrent
+ * submit_bio_wait() on the CPU.
+ */
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ dt_cg_tramp_get_var(pcb, "self->-io-bio-ptr", 1, BPF_REG_3);
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_3, 0, BPF_REG_0));
+ return 1;
+ } else {
+ dt_cg_tramp_get_var(pcb, "self->-io-bio-ptr", 0, BPF_REG_0);
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+ dt_cg_tramp_del_var(pcb, "self->-io-bio-ptr");
+ }
+
+ }
+
+done:
+ /*
+ * Note: DTrace does not currently support the use of fileinfo_t with
+ * io probes. In Oracle Linux, there is no information about the file
+ * where the I/O request originated at the point where the io probes
+ * fire.
+ */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), 0));
+
+ return 0;
+}
+
+dt_provimpl_t dt_io = {
+ .name = prvname,
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ .populate = &populate,
+ .enable = &dt_sdt_enable,
+ .trampoline = &trampoline,
+ .probe_info = &dt_sdt_probe_info,
+};
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index 31ad028d..4d77a799 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -1,6 +1,6 @@
/*
* Oracle Linux DTrace.
- * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2024, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
@@ -70,6 +70,7 @@ typedef struct dt_provimpl {
extern dt_provimpl_t dt_dtrace;
extern dt_provimpl_t dt_cpc;
extern dt_provimpl_t dt_fbt;
+extern dt_provimpl_t dt_io;
extern dt_provimpl_t dt_ip;
extern dt_provimpl_t dt_lockstat;
extern dt_provimpl_t dt_proc;
diff --git a/test/demo/io/applicat.d b/test/demo/io/applicat.d
index 64cdb6af..14170145 100644
--- a/test/demo/io/applicat.d
+++ b/test/demo/io/applicat.d
@@ -4,7 +4,6 @@
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
io:::start
/execname == "soffice.bin" && args[2]->fi_name == "applicat.rdb"/
diff --git a/test/demo/io/iocpu.d b/test/demo/io/iocpu.d
index 7d29637d..c7b847e7 100644
--- a/test/demo/io/iocpu.d
+++ b/test/demo/io/iocpu.d
@@ -4,7 +4,6 @@
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
#pragma D option quiet
diff --git a/test/demo/io/iothrough.d b/test/demo/io/iothrough.d
index 0290b12e..57fda7f0 100644
--- a/test/demo/io/iothrough.d
+++ b/test/demo/io/iothrough.d
@@ -4,7 +4,6 @@
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
#pragma D option quiet
diff --git a/test/demo/io/whoio.d b/test/demo/io/whoio.d
index 17f7db54..d5fc444f 100644
--- a/test/demo/io/whoio.d
+++ b/test/demo/io/whoio.d
@@ -4,7 +4,6 @@
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
#pragma D option quiet
diff --git a/test/unittest/io/check_io_probe_args.sh b/test/unittest/io/check_io_probe_args.sh
new file mode 100755
index 00000000..1c3c88d1
--- /dev/null
+++ b/test/unittest/io/check_io_probe_args.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+#
+# @@skip: not used directly by the test hardness; called by other scripts
+#
+
+infile=$1
+retval=0
+
+echo check_io_probe_args $infile
+
+#
+# Start with some basic checks on the io probe args.
+#
+
+gawk '
+BEGIN {
+ err = 0; # set to 1 if we encounter any errors
+ nrecs = 0;
+}
+
+NF == 0 { next } # skip empty lines
+
+NF != 23 { err = 1; print "garbled input: " $0; next }
+
+{
+ nrecs++;
+
+ myprobeprov = $1
+ myprobemod = $2
+ myprobefunc = $3
+ myprobename = $4
+ myarg2 = $5
+ myb_flags = $6
+ myb_bcount = $7
+ myb_bufsize = $8
+ myb_addr = $9
+ myb_resid = $10
+ myb_error = $11
+ myb_lblkno = $12
+ myb_blkno = $13
+ myb_iodone = $14
+ myb_edev = $15
+ myb_major = $16
+ myb_minor = $17
+ mydev_major = $18
+ mydev_minor = $19
+ mydev_instance = $20
+ mydev_name = $21
+ mydev_statname = $22
+ mydev_pathname = $23
+
+ # Check probe description.
+
+ if (myprobeprov != "io:") { err = 1; print "provider is not io, got", myprobeprov }
+ if (myprobemod != "vmlinux:") { err = 1; print "module is not vmlinux, got", myprobemod }
+ if (myprobefunc != ":") { err = 1; print "function is not blank, got", myprobefunc }
+ if (myprobename != "wait-start" &&
+ myprobename != "wait-done" &&
+ myprobename != "start" &&
+ myprobename != "done") { err = 1; print "name is unrecognized", myprobename }
+
+ # Check that args[2] is 0.
+ if (myarg2 != 0) { err = 1; print "args[2] should be 0, got", myarg2 }
+
+ # Check for a legal set of flags.
+ {
+ B_PAGEIO = 0x000010;
+ B_PHYS = 0x000020;
+ B_READ = 0x000040;
+ B_WRITE = 0x000100;
+ B_ASYNC = 0x000400;
+ tmp = strtonum("0x"myb_flags);
+
+ # B_ASYNC may be set.
+ if (and(tmp, B_ASYNC) != 0) tmp -= B_ASYNC;
+
+ # B_WRITE or else B_READ must be set.
+ nflags = 0;
+ if (and(tmp, B_WRITE) != 0) {
+ tmp -= B_WRITE;
+ nflags++;
+ }
+ if (and(tmp, B_READ) != 0) {
+ tmp -= B_READ;
+ nflags++;
+ }
+ if (nflags != 1) {
+ printf "flags %x must be read or else write\n", myb_flags;
+ err = 1;
+ }
+
+ # B_PAGEIO or else B_PHYS must be set.
+ nflags = 0;
+ if (and(tmp, B_PAGEIO) != 0) {
+ tmp -= B_PAGEIO;
+ nflags++;
+ }
+ if (and(tmp, B_PHYS) != 0) {
+ tmp -= B_PHYS;
+ nflags++;
+ }
+ if (nflags != 1) {
+ printf "flags %x must be pageio or else phys\n", myb_flags;
+ err = 1;
+ }
+
+ # Check for any other flags.
+ if (tmp != 0) {
+ printf "flags %x has some expected flags %x set\n", myb_flags, tmp;
+ err = 1;
+ }
+ }
+
+ # FIXME: can we add a check for myb_bcount?
+
+ if (myb_bufsize != myb_bcount) { err = 1; print "bcount and bufsize do not match", myb_bcount, myb_bufsize }
+
+ if (myb_addr != "0") { err = 1; print "b_addr is not 0:", b_addr }
+ if (myb_resid != "0") { err = 1; print "b_resid is not 0:", b_resid }
+ if (myb_error != "0") { err = 1; print "b_error is not 0:", b_error }
+
+ # FIXME: can we add a check for myb_lblkno?
+
+ if (myb_blkno != myb_lblkno) { err = 1; print "lblkno and blkno do not match", myb_lblkno, myb_blkno }
+
+ # FIXME: can we add a check for myb_iodone?
+ # FIXME: can we add a check for myb_edev?
+
+ if ( myb_major != rshift(myb_edev, 20)) { err = 1; print "b_major inconsistent with edev", myb_major, myb_edev }
+ if ( myb_minor != and(myb_edev, 0xfffff)) { err = 1; print "b_minor inconsistent with edev", myb_minor, myb_edev }
+
+ if (mydev_major != myb_major) { err = 1; print "b_major and dev_major do not match", myb_major, mydev_major }
+ if (mydev_minor != myb_minor) { err = 1; print "b_minor and dev_minor do not match", myb_minor, mydev_minor }
+
+ if (mydev_instance != 0) { err = 1; print "dev_instance is not 0", mydev_instance }
+
+ # FIXME: can we add a check for mydev_name?
+ # FIXME: can we add a check for mydev_statname?
+ # FIXME: can we add a check for mydev_pathname?
+}
+END {
+ if (nrecs == 0) { err = 1; print "no records found" }
+ exit(err);
+}
+' $infile
+if [ $? -ne 0 ]; then
+ retval=1
+ cat $infile
+ exit $retval
+fi
+
+#
+# Check that all iodone PCs are 0 or else correspond to end*io functions.
+#
+
+if [ $UID -ne 0 ]; then
+ echo skipping iodone check since must be root to read PCs in kallmodsyms
+ retval=1
+else
+ for pc in `gawk 'NF == 23 { print $14 }' $infile | grep -wv 0 | sort | uniq`; do
+ gawk '$1 == "'$pc'" && /end.*io/ { found = 1; exit }
+ END { exit(found) }' /proc/kallmodsyms
+ if [ $? -eq 0 ]; then
+ echo $pc, " is not an end-io function"
+ grep $pc /proc/kallmodsyms
+ retval=1
+ fi
+ done
+fi
+
+#
+# For each statname, check that the reported major/minor numbers agree with "ls -l".
+#
+
+while read mymajor myminor mystatname; do
+ read mymajor0 myminor0 <<< $(ls -l /dev | gawk '$NF == "'$mystatname'" { print $(NF-5), $(NF-4) }' | tr ',' ' ')
+
+ if [ "x$mymajor0" == "x" ]; then
+ mymajor0="0"
+ fi
+ if [ "x$myminor0" == "x" ]; then
+ myminor0="0"
+ fi
+
+ if [ $mymajor != $mymajor0 -o $myminor != $myminor0 ]; then
+ echo ERROR: for $mystatname expect device major minor $mymajor $myminor but got $mymajor0 $myminor0
+ retval=1
+ fi
+done <<< $(gawk 'NF == 23 { print $16, $17, $22 }' $infile | sort | uniq)
+
+#
+# For each major number, check name.
+#
+
+while read mymajor myname; do
+ $dtrace $dt_flags -qn '
+ BEGIN {
+ trace(stringof(`major_names['$mymajor' % 255]->name));
+ exit(0);
+ }
+ ERROR { trace("nfs"); exit(0); }' -o chkmajnam.txt >& /dev/null
+ myname0=`cat chkmajnam.txt`
+ rm -f chkmajnam.txt
+ echo check major $mymajor is $myname and $myname0
+ if [ $myname != $myname0 ]; then
+ echo ERROR: for $mymajor expect name $myname0 but got $myname
+ retval=1
+ fi
+done <<< $(gawk 'NF == 23 { print $16, $21 }' $infile | sort | uniq)
+
+#
+# For name: Expect pathname:
+#
+# == "nfs" "<nfs>"
+# != "nfs" "<unknown>" # FIXME: "<unknown>"? Really?
+#
+
+gawk '
+BEGIN { err = 0 }
+NF == 23 {
+ if ($21 == "nfs") expect = "<nfs>";
+ else expect = "<unknown>";
+ if ($23 != expect) {
+ print "ERROR: for name " $21 " expect " expect " but got " $23;
+ err = 1;
+ }
+}
+END { exit(err) }
+' $infile
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+
+#
+# Check that for each name, there is a distinct major number.
+# This does not guarantee that the mapping is correct, but it
+# is a partial correctness check and we already checked the
+# statname mapping to edev numbers against "ls -l /dev".
+#
+
+gawk 'NF == 23 { print $21, $16 }' $infile | sort | uniq > map-name-to-major.txt
+nmaps=`cat map-name-to-major.txt | wc -l`
+nnames=`awk '{print $1}' map-name-to-major.txt | sort | uniq | wc -l`
+nmajor=`awk '{print $2}' map-name-to-major.txt | sort | uniq | wc -l`
+if [ $nnames -ne $nmaps -o $nmajor -ne $nmaps ]; then
+ echo "ERROR: name-to-major-number is not a one-to-one mapping"
+ cat map-name-to-major.txt
+ retval=1
+fi
+
+#
+# If the name is "nfs", the edev should be 0. FIXME: is this correct?
+#
+
+gawk '
+BEGIN { err = 0 }
+$21 == "nfs" && $15 != 0 { print "ERROR: name is nfs but edev is nonzero"; err = 1 }
+END { exit(err) }' $infile
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+
+#
+# Exit.
+#
+
+exit $retval
diff --git a/test/unittest/io/dump_io_probe_args.d b/test/unittest/io/dump_io_probe_args.d
new file mode 100644
index 00000000..fb4c702c
--- /dev/null
+++ b/test/unittest/io/dump_io_probe_args.d
@@ -0,0 +1,47 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+/* @@skip: not used directly by the test hardness; called by other scripts */
+
+/*
+ * For all io::: probes, dump "all" probe arguments (and their interesting members).
+ * It would be nice just to say "io:::", but our use of args[] forces us to
+ * enumerate the probes.
+ */
+io:::wait-start,
+io:::wait-done,
+io:::start,
+io:::done
+{
+ printf("%s: %s: %s: %11s %d %3x %9d %9d %p %d %d %5d %5d %p %d %d %d %d %d %d %s %s %s\n",
+ probeprov, probemod, probefunc, probename,
+ arg1,
+
+ args[0]->b_flags,
+
+ args[0]->b_bcount,
+ args[0]->b_bufsize,
+
+ args[0]->b_addr,
+ args[0]->b_resid,
+ args[0]->b_error,
+
+ args[0]->b_lblkno,
+ args[0]->b_blkno,
+
+ args[0]->b_iodone,
+
+ args[0]->b_edev,
+ getmajor(args[0]->b_edev),
+ getminor(args[0]->b_edev),
+
+ args[1]->dev_major,
+ args[1]->dev_minor,
+ args[1]->dev_instance,
+ args[1]->dev_name,
+ args[1]->dev_statname,
+ args[1]->dev_pathname);
+}
diff --git a/test/unittest/io/tst.local.sh b/test/unittest/io/tst.local.sh
index b6061fd6..bb3a9033 100755
--- a/test/unittest/io/tst.local.sh
+++ b/test/unittest/io/tst.local.sh
@@ -10,7 +10,6 @@
# Test the io:::start probe for write and read operations by creating
# a file and reading it back after clearing the caches.
#
-# @@xfail: dtv2
dtrace=$1
nblocks=1024
@@ -25,7 +24,7 @@ tempfile=`mktemp -u -p $iodir`
trap "umount $iodir; rmdir $iodir; rm -f $iodir.img" QUIT EXIT
# create loopback file system
-dd if=/dev/zero of=$iodir.img bs=1024 count=$((16*$nblocks)) status=none
+dd if=/dev/zero of=$iodir.img bs=1024 count=$((300*$nblocks)) status=none
mkfs.$fstype $iodir.img > /dev/null
mkdir $iodir
test/triggers/io-mount-local.sh $iodir $fstype $fsoptions
diff --git a/test/unittest/io/tst.local2.sh b/test/unittest/io/tst.local2.sh
new file mode 100755
index 00000000..62cc06b4
--- /dev/null
+++ b/test/unittest/io/tst.local2.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+#
+# Test the io:::start probe for write and read operations by creating
+# a file and reading it back after clearing the caches.
+#
+
+rundt="$1 $dt_flags -qs $PWD/test/unittest/io/dump_io_probe_args.d -c"
+check_args=$PWD/test/unittest/io/check_io_probe_args.sh
+retval=0
+
+DIRNAME="$tmpdir/io-local2.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+filesize=$((512*1024))
+
+fsoptions="loop,defaults,atime,diratime,nosuid,nodev"
+iodir=`mktemp -u`
+tempfile=`mktemp -u -p $iodir`
+
+trap "rm -f $tempfile; umount $iodir; rmdir $iodir; rm -f $iodir.img" QUIT
+
+dd if=/dev/zero of=$iodir.img bs=1024 count=$((300*1024)) status=none
+mkfs.xfs $iodir.img > /dev/null
+ mkdir $iodir
+ mount -t xfs -o $fsoptions $iodir.img $iodir
+ devnam=`losetup -j $iodir.img | awk 'BEGIN { FS = ":" } ; {print $1}'`
+ statname=`basename $devnam`
+
+ dd if=/dev/urandom of=$tempfile count=$filesize bs=1 status=none
+ $rundt "umount $iodir" -o log.write
+ mount -t xfs -o $fsoptions $iodir.img $iodir
+ $rundt "sum $tempfile" -o log.read
+ rm -f $tempfile
+ umount $iodir
+ rmdir $iodir
+rm -f $iodir.img
+
+# check the DTrace output
+
+$check_args log.write
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+$check_args log.read
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+
+cat > awk.txt << EOF
+# initialize
+BEGIN { err = 0; bytes = 0; nrec = 0 }
+
+# skip over uninteresting records
+NF == 0 { next }
+\$14 != myiodone { next }
+\$22 != "$statname" { next }
+
+# check
+\$4 != "start" &&
+\$4 != "done" { print "probe name should be start or done"; err = 1 }
+\$6 != myflags { print "flags are wrong"; err = 1 }
+\$4 == "start" { bytes += \$7; nrec++ }
+\$21 != "loop" { print "name is wrong"; err = 1 }
+END {
+ if (bytes != $filesize) {
+ print "total bytes should match filesize", bytes, $filesize;
+ err = 1;
+ }
+ if (nrecflag == 1 && nrec != 1) {
+ print "expected one record";
+ err = 1;
+ }
+ exit(err);
+}
+EOF
+
+myaddr=`awk '$4 == "xfs_end_bio" {print $1}' /proc/kallmodsyms`
+awk -v myflags=520 -v nrecflag=1 -v myiodone=$myaddr -f awk.txt log.write
+if [ $? -ne 0 ]; then
+ echo post-processing error log.write
+ cat log.write
+ retval=1
+fi
+
+myaddr=`awk '$4 == "iomap_read_end_io" {print $1}' /proc/kallmodsyms`
+awk -v myflags=460 -v nrecflag=2 -v myiodone=$myaddr -f awk.txt log.read
+if [ $? -ne 0 ]; then
+ echo post-processing error log.read
+ cat log.read
+ retval=1
+fi
+
+exit $retval
diff --git a/test/unittest/io/tst.lv-done.r b/test/unittest/io/tst.lv-done.r
new file mode 100644
index 00000000..35f539da
--- /dev/null
+++ b/test/unittest/io/tst.lv-done.r
@@ -0,0 +1,17 @@
+PROBE io vmlinux done
+
+ Probe Description Attributes
+ Identifier Names: Private
+ Data Semantics: Private
+ Dependency Class: Unknown
+
+ Argument Attributes
+ Identifier Names: Evolving
+ Data Semantics: Evolving
+ Dependency Class: ISA
+
+ Argument Types
+ args[0]: bufinfo_t *
+ args[1]: devinfo_t *
+ args[2]: fileinfo_t *
+
diff --git a/test/unittest/io/tst.lv-done.r.p b/test/unittest/io/tst.lv-done.r.p
new file mode 100755
index 00000000..c538e345
--- /dev/null
+++ b/test/unittest/io/tst.lv-done.r.p
@@ -0,0 +1,5 @@
+#!/usr/bin/awk -f
+NR == 1 { next; }
+NR == 2 { print "PROBE", $2, $3, $NF; next; }
+/^ *[0-9]+/ { exit; }
+{ print; }
diff --git a/test/unittest/io/tst.lv-done.sh b/test/unittest/io/tst.lv-done.sh
new file mode 100755
index 00000000..33948324
--- /dev/null
+++ b/test/unittest/io/tst.lv-done.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+dtrace=$1
+
+$dtrace $dt_flags -lvn io:::done
+exit $?
diff --git a/test/unittest/io/tst.lv-start.r b/test/unittest/io/tst.lv-start.r
new file mode 100644
index 00000000..d2ee9666
--- /dev/null
+++ b/test/unittest/io/tst.lv-start.r
@@ -0,0 +1,17 @@
+PROBE io vmlinux start
+
+ Probe Description Attributes
+ Identifier Names: Private
+ Data Semantics: Private
+ Dependency Class: Unknown
+
+ Argument Attributes
+ Identifier Names: Evolving
+ Data Semantics: Evolving
+ Dependency Class: ISA
+
+ Argument Types
+ args[0]: bufinfo_t *
+ args[1]: devinfo_t *
+ args[2]: fileinfo_t *
+
diff --git a/test/unittest/io/tst.lv-start.r.p b/test/unittest/io/tst.lv-start.r.p
new file mode 120000
index 00000000..4a56a9d3
--- /dev/null
+++ b/test/unittest/io/tst.lv-start.r.p
@@ -0,0 +1 @@
+tst.lv-done.r.p
\ No newline at end of file
diff --git a/test/unittest/io/tst.lv-start.sh b/test/unittest/io/tst.lv-start.sh
new file mode 100755
index 00000000..4b8f1248
--- /dev/null
+++ b/test/unittest/io/tst.lv-start.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+dtrace=$1
+
+$dtrace $dt_flags -lvn io:::start
+exit $?
diff --git a/test/unittest/io/tst.lv-wait-done.r b/test/unittest/io/tst.lv-wait-done.r
new file mode 100644
index 00000000..77f05e9f
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-done.r
@@ -0,0 +1,17 @@
+PROBE io vmlinux wait-done
+
+ Probe Description Attributes
+ Identifier Names: Private
+ Data Semantics: Private
+ Dependency Class: Unknown
+
+ Argument Attributes
+ Identifier Names: Evolving
+ Data Semantics: Evolving
+ Dependency Class: ISA
+
+ Argument Types
+ args[0]: bufinfo_t *
+ args[1]: devinfo_t *
+ args[2]: fileinfo_t *
+
diff --git a/test/unittest/io/tst.lv-wait-done.r.p b/test/unittest/io/tst.lv-wait-done.r.p
new file mode 120000
index 00000000..4a56a9d3
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-done.r.p
@@ -0,0 +1 @@
+tst.lv-done.r.p
\ No newline at end of file
diff --git a/test/unittest/io/tst.lv-wait-done.sh b/test/unittest/io/tst.lv-wait-done.sh
new file mode 100755
index 00000000..2187fa1f
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-done.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+dtrace=$1
+
+$dtrace $dt_flags -lvn io:::wait-done
+exit $?
diff --git a/test/unittest/io/tst.lv-wait-start.r b/test/unittest/io/tst.lv-wait-start.r
new file mode 100644
index 00000000..56f1b607
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-start.r
@@ -0,0 +1,17 @@
+PROBE io vmlinux wait-start
+
+ Probe Description Attributes
+ Identifier Names: Private
+ Data Semantics: Private
+ Dependency Class: Unknown
+
+ Argument Attributes
+ Identifier Names: Evolving
+ Data Semantics: Evolving
+ Dependency Class: ISA
+
+ Argument Types
+ args[0]: bufinfo_t *
+ args[1]: devinfo_t *
+ args[2]: fileinfo_t *
+
diff --git a/test/unittest/io/tst.lv-wait-start.r.p b/test/unittest/io/tst.lv-wait-start.r.p
new file mode 120000
index 00000000..4a56a9d3
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-start.r.p
@@ -0,0 +1 @@
+tst.lv-done.r.p
\ No newline at end of file
diff --git a/test/unittest/io/tst.lv-wait-start.sh b/test/unittest/io/tst.lv-wait-start.sh
new file mode 100755
index 00000000..b6b8e84b
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-start.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+dtrace=$1
+
+$dtrace $dt_flags -lvn io:::wait-start
+exit $?
diff --git a/test/unittest/io/tst.nfs.sh b/test/unittest/io/tst.nfs.sh
index f9222ff6..4e368244 100755
--- a/test/unittest/io/tst.nfs.sh
+++ b/test/unittest/io/tst.nfs.sh
@@ -9,7 +9,6 @@
# Test the io:::start probe for write and read operations by creating
# a file and reading it back after clearing the caches.
#
-# @@xfail: dtv2
dtrace=$1
filesize=$((1024*1024))
@@ -23,7 +22,8 @@ statname="nfs"
trap "rm -f $tempfile; umount $clientpath; rmdir $clientpath; exportfs -u 127.0.0.1:$serverpath; rmdir $serverpath" QUIT EXIT
# setup NFS server
-service nfs start > /dev/null 2>&1
+#service nfs start > /dev/null 2>&1
+systemctl enable --now nfs-server > /dev/null 2>&1
mkdir $serverpath
exportfs -i -v -o "rw,sync,no_root_squash,insecure,fsid=8434437287" 127.0.0.1:$serverpath > /dev/null
diff --git a/test/unittest/io/tst.nfs2.sh b/test/unittest/io/tst.nfs2.sh
new file mode 100755
index 00000000..273ecef4
--- /dev/null
+++ b/test/unittest/io/tst.nfs2.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+#
+# Test the io:::start probe for write and read operations by creating
+# a file and reading it back after clearing the caches.
+#
+
+rundt="$1 $dt_flags -qs $PWD/test/unittest/io/dump_io_probe_args.d -c"
+check_args=$PWD/test/unittest/io/check_io_probe_args.sh
+retval=0
+
+DIRNAME="$tmpdir/io-nfs2.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+filesize=$((1024*1024))
+
+exdir=`mktemp -u`
+iodir=`mktemp -u`
+tempfile=`mktemp -u -p $iodir`
+
+trap "rm -f $tempfile; umount $iodir; rmdir $iodir; exportfs -u 127.0.0.1:$exdir; rmdir $exdir" QUIT
+
+systemctl enable --now nfs-server > /dev/null 2>&1
+
+mkdir $exdir
+ # what is the fsid?
+ exportfs -i -v -o "rw,sync,no_root_squash,insecure,fsid=8434437287" 127.0.0.1:$exdir > /dev/null
+ mkdir $iodir
+ mount -t nfs -o nfsvers=3 127.0.0.1:$exdir $iodir
+ $rundt "dd if=/dev/urandom of=$tempfile count=$filesize bs=1 status=none" -o log.write
+ myinode=`stat $tempfile | awk '/ Inode: / {print $4}'`
+ umount $iodir
+ # remount so that data is not cached
+ mount -t nfs -o nfsvers=3 127.0.0.1:$exdir $iodir
+ $rundt "sum $tempfile" -o log.read
+ rm -f $tempfile
+ umount $iodir
+ rmdir $iodir
+ exportfs -u 127.0.0.1:$exdir
+rmdir $exdir
+
+# check the DTrace output
+
+$check_args log.write
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+$check_args log.read
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+
+cat > awk.txt << EOF
+# initialize
+BEGIN { err = 0; bytes = 0; nrec = 0 }
+
+# skip over uninteresting records
+NF == 0 { next }
+\$6 != myflags { next }
+\$22 != "nfs" { next }
+
+# check
+\$4 != "start" &&
+\$4 != "done" { print "probe name should be start or done"; err = 1 }
+\$4 == "start" { bytes += \$7; nrec++ }
+\$12 != "$myinode" { print "blknode should be inode"; err = 1 }
+\$14 != 0 { print "iodone should be 0"; err = 1 }
+\$21 != "nfs" { print "name should be nfs"; err = 1 }
+END {
+ if (bytes != $filesize) {
+ print "total bytes should match filesize", bytes, $filesize;
+ err = 1;
+ }
+ if (nrecflag == 1 && nrec != 1) {
+ print "expected one record";
+ err = 1;
+ }
+ exit(err);
+}
+EOF
+
+awk -v myflags=520 -v nrecflag=1 -f awk.txt log.write
+if [ $? -ne 0 ]; then
+ echo post-processing error log.write
+ cat log.write
+ retval=1
+fi
+
+awk -v myflags=460 -v nrecflag=2 -f awk.txt log.read
+if [ $? -ne 0 ]; then
+ echo post-processing error log.read
+ cat log.read
+ retval=1
+fi
+
+exit $retval
diff --git a/test/unittest/io/tst.wait.sh b/test/unittest/io/tst.wait.sh
index 7ef0abae..3765a3b9 100755
--- a/test/unittest/io/tst.wait.sh
+++ b/test/unittest/io/tst.wait.sh
@@ -8,7 +8,6 @@
#
# Test the io:::wait-start and io:::wait-done probes.
#
-# @@xfail: dtv2
dtrace=$1
nblocks=1024
@@ -22,7 +21,7 @@ tempfile=`mktemp -u -p $iodir`
trap "umount $iodir; rmdir $iodir; rm -f $iodir.img" QUIT EXIT
# create loopback file system
-dd if=/dev/zero of=$iodir.img bs=1024 count=$((16*$nblocks)) status=none
+dd if=/dev/zero of=$iodir.img bs=1024 count=$((300*$nblocks)) status=none
mkfs.$fstype $iodir.img > /dev/null
mkdir $iodir
test/triggers/io-mount-local.sh $iodir $fstype $fsoptions
--
2.42.0
More information about the DTrace-devel
mailing list