[DTrace-devel] [PATCH v2 4/7] Implement the io provider
eugene.loh at oracle.com
eugene.loh at oracle.com
Thu Aug 24 23:22:39 UTC 2023
From: Eugene Loh <eugene.loh at oracle.com>
Two problems remain.
First, the io:::start probe is not fully implemented on UEK6.
One of its instrumentation points is in submit_bio_checks(),
but neither it nor its caller __submit_bio() is in
/sys/kernel/debug/tracing/available_filter_functions on UEK6.
Going higher, this call stack is inside a loop, meaning that a
higher-level fbt:::entry probe and the intended submit_bio_checks
entry will no longer be one-for-one. For the time being, the
implementation does not fully support io:::start on UEK6.
There also appear to be some test failures on OL9 ARM.
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
libdtrace/Build | 2 +
libdtrace/dt_bpf_maps.h | 7 +
libdtrace/dt_open.c | 1 +
libdtrace/dt_prov_io.c | 696 ++++++++++++++++++++++++
libdtrace/dt_provider.h | 1 +
test/demo/io/applicat.d | 1 -
test/demo/io/iocpu.d | 1 -
test/demo/io/iothrough.d | 1 -
test/demo/io/whoio.d | 1 -
test/unittest/io/check_io_probe_args.sh | 273 ++++++++++
test/unittest/io/dump_io_probe_args.d | 47 ++
test/unittest/io/tst.fbt_probes.r | 8 +
test/unittest/io/tst.fbt_probes.sh | 20 +
test/unittest/io/tst.fbt_probes.x | 18 +
test/unittest/io/tst.local.sh | 1 -
test/unittest/io/tst.local.x | 1 +
test/unittest/io/tst.local2.sh | 100 ++++
test/unittest/io/tst.local2.x | 1 +
test/unittest/io/tst.lv-done.r | 17 +
test/unittest/io/tst.lv-done.r.p | 5 +
test/unittest/io/tst.lv-done.sh | 11 +
test/unittest/io/tst.lv-start.r | 17 +
test/unittest/io/tst.lv-start.r.p | 1 +
test/unittest/io/tst.lv-start.sh | 11 +
test/unittest/io/tst.lv-wait-done.r | 17 +
test/unittest/io/tst.lv-wait-done.r.p | 1 +
test/unittest/io/tst.lv-wait-done.sh | 11 +
test/unittest/io/tst.lv-wait-start.r | 17 +
test/unittest/io/tst.lv-wait-start.r.p | 1 +
test/unittest/io/tst.lv-wait-start.sh | 11 +
test/unittest/io/tst.nfs.sh | 4 +-
test/unittest/io/tst.nfs2.sh | 102 ++++
test/unittest/io/tst.nfs2.x | 1 +
test/unittest/io/tst.wait.sh | 1 -
test/unittest/io/tst.wait.x | 1 +
35 files changed, 1401 insertions(+), 8 deletions(-)
create mode 100644 libdtrace/dt_prov_io.c
create mode 100755 test/unittest/io/check_io_probe_args.sh
create mode 100644 test/unittest/io/dump_io_probe_args.d
create mode 100644 test/unittest/io/tst.fbt_probes.r
create mode 100755 test/unittest/io/tst.fbt_probes.sh
create mode 100755 test/unittest/io/tst.fbt_probes.x
create mode 120000 test/unittest/io/tst.local.x
create mode 100755 test/unittest/io/tst.local2.sh
create mode 120000 test/unittest/io/tst.local2.x
create mode 100644 test/unittest/io/tst.lv-done.r
create mode 100755 test/unittest/io/tst.lv-done.r.p
create mode 100755 test/unittest/io/tst.lv-done.sh
create mode 100644 test/unittest/io/tst.lv-start.r
create mode 120000 test/unittest/io/tst.lv-start.r.p
create mode 100755 test/unittest/io/tst.lv-start.sh
create mode 100644 test/unittest/io/tst.lv-wait-done.r
create mode 120000 test/unittest/io/tst.lv-wait-done.r.p
create mode 100755 test/unittest/io/tst.lv-wait-done.sh
create mode 100644 test/unittest/io/tst.lv-wait-start.r
create mode 120000 test/unittest/io/tst.lv-wait-start.r.p
create mode 100755 test/unittest/io/tst.lv-wait-start.sh
create mode 100755 test/unittest/io/tst.nfs2.sh
create mode 120000 test/unittest/io/tst.nfs2.x
create mode 120000 test/unittest/io/tst.wait.x
diff --git a/libdtrace/Build b/libdtrace/Build
index 7dc2d5d6..cc75d3c9 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -49,6 +49,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
dt_prov_cpc.c \
dt_prov_dtrace.c \
dt_prov_fbt.c \
+ dt_prov_io.c \
dt_prov_ip.c \
dt_prov_lockstat.c \
dt_prov_proc.c \
@@ -97,6 +98,7 @@ dt_proc.c_CFLAGS := -Wno-pedantic
dt_prov_cpc.c_CFLAGS := -Wno-pedantic
dt_prov_dtrace.c_CFLAGS := -Wno-pedantic
dt_prov_fbt.c_CFLAGS := -Wno-pedantic
+dt_prov_io.c_CFLAGS := -Wno-pedantic
dt_prov_ip.c_CFLAGS := -Wno-pedantic
dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
dt_prov_proc.c_CFLAGS := -Wno-pedantic
diff --git a/libdtrace/dt_bpf_maps.h b/libdtrace/dt_bpf_maps.h
index 0dd36b16..80b497c3 100644
--- a/libdtrace/dt_bpf_maps.h
+++ b/libdtrace/dt_bpf_maps.h
@@ -31,6 +31,8 @@ struct dt_bpf_specs {
* drain this buffer */
};
+#define IO_BIO_SIZ 256
+#define IO_BIO_STK 4
typedef struct dt_bpf_cpuinfo dt_bpf_cpuinfo_t;
struct dt_bpf_cpuinfo {
cpuinfo_t ci;
@@ -40,6 +42,11 @@ struct dt_bpf_cpuinfo {
uint64_t lockstat_bfrom; /* lockstat: block time start */
uint64_t lockstat_btime; /* lockstat: block time */
uint64_t lockstat_stime; /* lockstat: spin time */
+ uint64_t io_bio_ptr_wait; /* io: bio pointer (submit_bio_wait) */
+ uint64_t io_bio_ptr_checks; /* io: bio pointer (submit_bio_checks) */
+ uint64_t io_bio_ptr_endio[IO_BIO_STK]; /* io: bio pointer (bio_endio), to be followed immediately by io_bio_stk_n */
+ uint64_t io_bio_stk_n; /* io: bio pointer stack index */
+ char io_bio_fake[IO_BIO_SIZ]; /* io: bio fake struct */
};
#ifdef __cplusplus
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 6d0a29f8..32059b33 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -68,6 +68,7 @@ static const dt_provimpl_t *dt_providers[] = {
&dt_dtrace, /* list dt_dtrace first */
&dt_cpc,
&dt_fbt,
+ &dt_io,
&dt_ip,
&dt_lockstat,
&dt_proc,
diff --git a/libdtrace/dt_prov_io.c b/libdtrace/dt_prov_io.c
new file mode 100644
index 00000000..1aa8846c
--- /dev/null
+++ b/libdtrace/dt_prov_io.c
@@ -0,0 +1,696 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ *
+ * The 'io' SDT provider for DTrace-specific probes.
+ *
+ * These io::: probes mimic the instrumentation in legacy DTrace.
+ * Specifically, all probes have three probe args:
+ * bufinfo_t *
+ * devinfo_t *
+ * fileinfo_t *
+ * but the trampoline really only needs to supply a bio pointer,
+ * from which translators will make the first two args. The fileinfo_t *
+ * is 0 on Linux.
+ *
+ * The bio pointer is passed into some functions and is easily captured
+ * if we are using an fbt:::entry probe on such a function. See
+ * DTRACE_IO() sites in the legacy implementation.
+ *
+ * For fbt:::entry probes on nfs_ and xfs_ functions, however, get only
+ * a hdr arg. For them, we have a "fake struct bio", which the trampoline
+ * populates from the function's hdr arg. See DTRACE_IO_NFS() and
+ * DTRACE_IO_XFS() sites in the legacy implementation.
+ *
+ * In some cases, we have to use fbt:::return probes, for which we no
+ * longer have the function's arguments. So, these cases rely on the
+ * corresponding entry probe to cache the bio pointer (or populate the
+ * fake bio), which the return probe can then retrieve.
+ *
+ * Unfortunately, more than one function may be active at any time (on a
+ * CPU). So the return function needs to know which bio pointer or fake
+ * bio to use. These rules are used:
+ *
+ * - For nfs_ and xfs_ functions, just use the fake bio.
+ *
+ * - For most other functions, use the bio pointer for that
+ * function.
+ *
+ * - For bio_endio, which might re-enter itself, keep a
+ * stack of bio pointers.
+ */
+#include <assert.h>
+#include <errno.h>
+
+#include "dt_dctx.h"
+#include "dt_cg.h"
+#include "dt_provider_sdt.h"
+#include "dt_probe.h"
+
+/* Defined in include/linux/blk_types.h */
+#define REQ_OP_READ 0
+#define REQ_OP_WRITE 1
+/* Defined in fs/xfs/xfs_buf.h */
+#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
+
+static const char prvname[] = "io";
+static const char modname[] = "vmlinux"; // FIXME: Really? Or blank?
+
+/*
+ * If the set of functions in the fbt probes changes,
+ * update the list in test/unittest/io/tst.fbt_probes.r.
+ */
+static probe_dep_t probes[] = {
+ { "wait-start",
+ DTRACE_PROBESPEC_NAME, "fbt::submit_bio_wait:entry" },
+ { "wait-start",
+ DTRACE_PROBESPEC_NAME, "fbt::xfs_buf_iowait:entry" },
+ { "wait-done",
+ DTRACE_PROBESPEC_FUNC, "fbt::submit_bio_wait" },
+ { "wait-done",
+ DTRACE_PROBESPEC_FUNC, "fbt::xfs_buf_iowait" },
+ { "done",
+ DTRACE_PROBESPEC_FUNC, "fbt::bio_endio" },
+ { "done",
+ DTRACE_PROBESPEC_NAME, "fbt::nfs_readpage_done:entry" },
+ { "done",
+ DTRACE_PROBESPEC_NAME, "fbt::nfs_writeback_done:entry" },
+ { "start",
+ DTRACE_PROBESPEC_FUNC, "fbt::submit_bio_checks" },
+ { "start",
+ DTRACE_PROBESPEC_NAME, "fbt::nfs_initiate_read:entry" },
+ { "start",
+ DTRACE_PROBESPEC_NAME, "fbt::nfs_initiate_write:entry" }, /* or return? */
+ { NULL, }
+};
+
+/*
+ * All four probes have three probe args. The first two will be extracted
+ * by a translator from the (struct bio *) we supply. The (struct file *)
+ * we supply will be 0 in all cases.
+ */
+static probe_arg_t probe_args[] = {
+ { "start", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
+ { "start", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
+ { "start", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
+ { "done", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
+ { "done", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
+ { "done", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
+ { "wait-start", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
+ { "wait-start", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
+ { "wait-start", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
+ { "wait-done", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
+ { "wait-done", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
+ { "wait-done", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
+ { NULL, }
+};
+
+static const dtrace_pattr_t pattr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
+/*
+ * Provide all the "io" SDT probes.
+ */
+static int populate(dtrace_hdl_t *dtp)
+{
+ return dt_sdt_populate(dtp, prvname, modname, &dt_io, &pattr,
+ probe_args, probes);
+}
+
+/*
+ * Get a reference to the cpuinfo structure for the current CPU.
+ *
+ * Clobbers %r0 through %r5
+ * Stores pointer to cpuinfo struct in %r6
+ */
+static void get_cpuinfo(dtrace_hdl_t *dtp, dt_irlist_t *dlp, uint_t exitlbl)
+{
+ dt_ident_t *idp = dt_dlib_get_map(dtp, "cpuinfo");
+
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+ emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+}
+
+static int BPF_width(size_t sz)
+{
+ switch(sz) {
+ case 1: return BPF_B;
+ case 2: return BPF_H;
+ case 4: return BPF_W;
+ case 8: return BPF_DW;
+ default: assert(0);
+ }
+}
+
+/*
+ * Generate BPF instructions to dereference the pointer in %r3.
+ *
+ * We often have to dereference a pointer. However, the pointer might
+ * not look safe to the BPF verifier. So we use bpf_probe_read() to
+ * copy to a safe location (use slot 0) and then load from there.
+ *
+ * Since we will use bpf_probe_read(), this code generation will assume
+ * that the source pointer is already in %r3. Nonetheless, we will allow
+ * a scalar offset to be added to it.
+ *
+ * Arguments are:
+ *
+ * ptr_off: scalar offset to add to %r3 before dereferencing
+ * read_width: width of the scalar being read;
+ * it must be 1, 2, 4, or 8
+ * out_reg: register where the read scalar will be placed;
+ * BPF_REG_0 <= out_reg <= BPF_REG_5
+ *
+ * Registers %r0-%r5 will be clobbered, with the loaded value
+ * appearing in out_reg.
+ */
+static void deref_reg3(dt_irlist_t *dlp, uint_t exitlbl,
+ int ptr_off, int read_width, int out_reg)
+{
+ assert(out_reg >= BPF_REG_0 && out_reg <= BPF_REG_5);
+
+ /* Use slot 0 as temporary storage. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_TRAMP_SP_SLOT(0)));
+
+ /* Specify the width of the scalar. */
+ emit(dlp, BPF_MOV_IMM(BPF_REG_2, read_width));
+
+ /* The source address is already in %r3, but add offset, if any. */
+ if (ptr_off)
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, ptr_off));
+
+ /* Perform the copy and check for success. */
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JSLT, BPF_REG_0, 0, exitlbl));
+
+ /* Load the result into the specified register. */
+ emit(dlp, BPF_LOAD(BPF_width(read_width), out_reg, BPF_REG_FP, DT_TRAMP_SP_SLOT(0)));
+}
+
+/*
+ * Zero out the entire fake struct bio area.
+ * We assume %r6 already points to the area.
+ */
+static void io_zero_bio(dtrace_hdl_t *dtp, dt_irlist_t *dlp)
+{
+ ctf_file_t *cfp = dtp->dt_shared_ctf;
+ ctf_id_t type;
+ size_t sz;
+
+ if (!cfp)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOCTF);
+
+ type = ctf_lookup_by_name(cfp, "struct bio");
+ if (type == CTF_ERR)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOCTF);
+
+ sz = ctf_type_size(cfp, type);
+ if (sz > IO_BIO_SIZ)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOCTF); /* FIXME change EDT_NOCTF */
+
+ emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_6));
+ emit(dlp, BPF_MOV_IMM(BPF_REG_2, sz));
+ emit(dlp, BPF_MOV_REG(BPF_REG_3, BPF_REG_9)); /* in trampoline, dctx is in %r9 */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_3, DCTX_STRTAB));
+ emite(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -1), dt_dlib_get_var(dtp, "ZERO_OFF"));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
+}
+
+/*
+ * For NFS events, we have to construct a fake struct bio,
+ * which we have to populate.
+ */
+static void io_nfs_args(dtrace_hdl_t *dtp, dt_irlist_t *dlp, uint_t exitlbl, const char *ufunc)
+{
+ int off;
+ size_t siz;
+
+ /*
+ * Determine the various sizes and offsets we want.
+ *
+ * // Access these fields relative to &bio.
+ * struct bio bio = {
+ * .bi_opf = ...,
+ * .bi_iter.bi_size = ..., // struct bvec_iter bi_iter
+ * .bi_iter.bi_sector = ...,
+ * };
+ *
+ * // Access these fields relative to hdr.
+ * struct nfs_pgio_header *hdr;
+ * ... = hdr->args.count; // struct nfs_pgio_args args
+ * ... = hdr->res.count; // struct nfs_pgio_res res
+ */
+
+ /* Put pointer to the fake struct bio area in %r6. */
+ get_cpuinfo(dtp, dlp, exitlbl);
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, io_bio_fake)));
+
+ /* Zero out the entire fake struct bio area, pointed to by %r6. */
+ io_zero_bio(dtp, dlp);
+
+ /* Fill in bi_opf */
+ off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_opf", &siz);
+ if (strstr(ufunc, "read"))
+ emit(dlp, BPF_STORE_IMM(BPF_width(siz), BPF_REG_6, off, REQ_OP_READ));
+ else
+ emit(dlp, BPF_STORE_IMM(BPF_width(siz), BPF_REG_6, off, REQ_OP_WRITE));
+
+ /*
+ * bio.bi_iter.bi_size = hdr->foo.count;
+ *
+ * hdr is:
+ * - arg0 for nfs_initiate_[read|write]()
+ * - arg1 for nfs_[readpage|writeback]_done()
+ */
+ if (strncmp(ufunc, "nfs_initiate_", 13) == 0) {
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_header", "args", NULL)
+ + dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_args", "count", &siz);
+ } else {
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
+ off = dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_header", "res", NULL)
+ + dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_res", "count", &siz);
+ }
+ deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
+ off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_iter", NULL)
+ + dt_cg_ctf_offsetof(dtp, "struct bvec_iter", "bi_size", &siz);
+ emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
+
+ /*
+ * bio.bi_iter.bi_sector = hdr->inode;
+ */
+ /* get hdr */
+ if (strncmp(ufunc, "nfs_initiate_", 13) == 0)
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ else
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
+
+ off = dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_header", "inode", &siz);
+ deref_reg3(dlp, exitlbl, off, siz, BPF_REG_3);
+
+ off = dt_cg_ctf_offsetof(dtp, "struct nfs_inode", "fileid", &siz)
+ - dt_cg_ctf_offsetof(dtp, "struct nfs_inode", "vfs_inode", NULL);
+
+ deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
+
+ off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_iter", NULL)
+ + dt_cg_ctf_offsetof(dtp, "struct bvec_iter", "bi_sector", &siz);
+ emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
+
+ /* Pass a pointer to the space */
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
+}
+
+/*
+ * For XFS events, we have to construct a fake struct bio,
+ * which we have to populate.
+ *
+ * We end up with a pointer to the fake struct in %r6.
+ */
+static void io_xfs_args(dtrace_hdl_t *dtp, dt_irlist_t *dlp, uint_t exitlbl, const char *ufunc)
+{
+ int off;
+ size_t siz;
+
+ /*
+ * Determine the various sizes and offsets we want.
+ *
+ * // Access these fields relative to &bio.
+ * struct bio bio = {
+ * .bi_opf = ...,
+ * .bi_iter.bi_size = ..., // struct bvec_iter bi_iter
+ * .bi_iter.bi_sector = ...,
+ * .bi_bdev = ...,
+ * };
+ *
+ * // Access these fields relative to bp.
+ * struct xfs_buf *bp;
+ * ... = (bp)->b_flags;
+ * ... = xfs_buf_daddr(bp);
+ * ... = (bp)->b_length;
+ * ... = (bp)->b_target->bt_bdev; // struct xfs_buftarg *b_target;
+ */
+
+ /* Put pointer to the fake struct bio area in %r6. */
+ get_cpuinfo(dtp, dlp, exitlbl);
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, io_bio_fake)));
+
+ /* Zero out the entire fake struct bio area, pointed to by %r6. */
+ io_zero_bio(dtp, dlp);
+
+ /* bio.bi_opf = (bp->b_flags & XBF_WRITE) ? REQ_OP_WRITE : REQ_OP_READ; */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf", "b_flags", &siz);
+ deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
+ emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_0, XBF_WRITE));
+ {
+ uint_t Lzero = dt_irlist_label(dlp);
+ uint_t Ldone = dt_irlist_label(dlp);
+
+ off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_opf", &siz);
+
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lzero));
+ emit(dlp, BPF_STORE_IMM(BPF_width(siz), BPF_REG_6, off, REQ_OP_WRITE));
+ emit(dlp, BPF_JUMP(Ldone));
+ emitl(dlp, Lzero,
+ BPF_NOP());
+ emit(dlp, BPF_STORE_IMM(BPF_width(siz), BPF_REG_6, off, REQ_OP_READ));
+ emitl(dlp, Ldone,
+ BPF_NOP());
+ }
+
+ /*
+ * bio.bi_iter.bi_size = bp->b_length;
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf", "b_length", &siz);
+ deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
+ off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_iter", NULL)
+ + dt_cg_ctf_offsetof(dtp, "struct bvec_iter", "bi_size", &siz);
+ emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
+
+ /*
+ * bio.bi_iter.bi_sector = xfs_buf_daddr(bp);
+ *
+ * In fs/xfs/xfs_buf.h, we have
+ *
+ * xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp)
+ * {
+ * return bp->b_maps[0].bm_bn;
+ * }
+ *
+ * So that gives
+ * bio.bi_iter.bi_sector = bp->b_maps->bm_bn;
+ *
+ * include/linux/blk_types.h
+ * struct bio {
+ * [...]
+ * struct bvec_iter bi_iter;
+ * [...]
+ * }
+ * include/linux/bvec.h
+ * struct bvec_iter {
+ * sector_t bi_sector;
+ * [...]
+ * };
+ * fs/xfs/xfs_buf.h
+ * struct xfs_buf_map {
+ * xfs_daddr_t bm_bn;
+ * [...]
+ * };
+ * struct xfs_buf {
+ * [...]
+ * struct xfs_buf_map *b_maps;
+ * [...]
+ * }
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf", "b_maps", &siz);
+ deref_reg3(dlp, exitlbl, off, siz, BPF_REG_3);
+ off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf_map", "bm_bn", &siz);
+ deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
+ off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_iter", NULL)
+ + dt_cg_ctf_offsetof(dtp, "struct bvec_iter", "bi_sector", &siz);
+ emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
+
+ /*
+ * bio.bi_bdev = (bp)->b_target->bt_bdev
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf", "b_target", &siz);
+ assert(siz == sizeof(void *));
+ deref_reg3(dlp, exitlbl, off, 8, BPF_REG_3);
+ off = dt_cg_ctf_offsetof(dtp, "struct xfs_buftarg", "bt_bdev", &siz);
+ deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
+ off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_bdev", &siz);
+ emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
+}
+
+/*
+ * The io provider uses a special, private TLS variable. Here, we look
+ * up its id, or insert such a variable if it does not already exist.
+ *
+ * This code mimics insertion in either:
+ * - dt_node_decl()
+ * - dt_xcook_ident(...)
+ */
+static uint_t get_id_TLS(dtrace_hdl_t *dtp)
+{
+ dt_idhash_t *dhp = dtp->dt_tls;
+ const char name[] = "private TLS variable for io provider";
+ dt_ident_t *idp = dt_idhash_lookup(dhp, name);
+
+ if (idp) {
+ /* If it already exists, use its di_id. */
+ return idp->di_id;
+ } else {
+ /* Otherwise, insert it. Its flags and attributes hardly matter. */
+ uint_t id = 0;
+
+ if (dt_idhash_nextid(dhp, &id) == -1)
+ xyerror(D_ID_OFLOW, "cannot create %s: limit on number of %s variables exceeded\n", name, dt_idhash_name(dhp));
+
+ idp = dt_idhash_insert(dhp, name, 0, DT_IDFLG_TLS, id, _dtrace_defattr, 0, NULL, NULL, 0);
+ if (idp == NULL)
+ longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
+
+ dt_ident_set_storage(idp, 8, 8);
+
+ return id;
+ }
+}
+
+/*
+ * Generate a BPF trampoline for a SDT probe.
+ *
+ * The trampoline function is called when a SDT probe triggers, and it must
+ * satisfy the following prototype:
+ *
+ * int dt_io(void *data)
+ *
+ * The trampoline will populate a dt_dctx_t struct and then call the function
+ * that implements the compiled D clause. It returns the value that it gets
+ * back from that function.
+ */
+static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
+{
+ dtrace_hdl_t *dtp = pcb->pcb_hdl;
+ dt_irlist_t *dlp = &pcb->pcb_ir;
+ dt_probe_t *prp = pcb->pcb_probe;
+ dt_probe_t *uprp = pcb->pcb_parent_probe;
+ size_t bio_ptr_off = 0;
+
+ /* Figure out the offset to the bio pointer we want to use. */
+
+ if (strcmp(uprp->desc->fun, "submit_bio_wait") == 0)
+ bio_ptr_off = offsetof(dt_bpf_cpuinfo_t, io_bio_ptr_wait);
+ else if (strcmp(uprp->desc->fun, "submit_bio_checks") == 0)
+ bio_ptr_off = offsetof(dt_bpf_cpuinfo_t, io_bio_ptr_checks);
+ else if (strcmp(uprp->desc->fun, "bio_endio") == 0)
+ bio_ptr_off = offsetof(dt_bpf_cpuinfo_t, io_bio_ptr_endio);
+
+ /* Handle different probe cases. */
+
+ if (strncmp(uprp->desc->fun, "nfs_", 4) == 0) {
+ io_nfs_args(dtp, dlp, exitlbl, uprp->desc->fun);
+ } else if (strncmp(uprp->desc->fun, "xfs_", 4) == 0 && strcmp(uprp->desc->prb, "entry") == 0) {
+ io_xfs_args(dtp, dlp, exitlbl, uprp->desc->fun);
+
+ if (strcmp(prp->desc->prb, "wait-start") != 0)
+ return 1;
+
+ /* Pass a pointer to the space */
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
+ } else if (strncmp(uprp->desc->fun, "xfs_", 4) == 0) {
+ /* wait done */
+
+ /* Put pointer to the fake struct bio area in %r6 */
+ get_cpuinfo(dtp, dlp, exitlbl);
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, io_bio_fake)));
+
+ /* Pass a pointer to the space */
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
+ } else if (strcmp(prp->desc->prb, "wait-start") == 0) {
+#if 0
+ /* arg0 = arg0 is a no-op, this code is not needed */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+#endif
+ } else if (strcmp(uprp->desc->fun, "bio_endio") == 0) {
+ /* Determine stack bounds. */
+ size_t stk_n = offsetof(dt_bpf_cpuinfo_t, io_bio_stk_n);
+ int stk_min = 0;
+ int stk_max = stk_n - bio_ptr_off;
+
+ get_cpuinfo(dtp, dlp, exitlbl);
+
+ /* %r1 is the current stack size. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, stk_n));
+
+ /* Check stack size. */
+ if (strcmp(uprp->desc->prb, "entry") == 0)
+ stk_max -= sizeof(uint64_t);
+ else
+ stk_min += sizeof(uint64_t);
+ emit(dlp, BPF_BRANCH_IMM(BPF_JLT, BPF_REG_1, stk_min, exitlbl));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JGT, BPF_REG_1, stk_max, exitlbl));
+
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /* %r0 = bio (first arg) */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(0)));
+
+ /* Push %r0 onto the stack. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_6));
+ emit(dlp, BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_2, bio_ptr_off, BPF_REG_0));
+
+ /* Update and save the stack size. */
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, sizeof(uint64_t)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, stk_n, BPF_REG_1));
+
+ return 1;
+ } else {
+ /* Update and save the stack size. */
+ emit(dlp, BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, sizeof(uint64_t)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, stk_n, BPF_REG_1));
+
+ /* Pop the stack into %r0. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_6));
+ emit(dlp, BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_2, bio_ptr_off));
+
+ /* Save %r0 as the first arg. */
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+ }
+ } else if (strcmp(prp->desc->prb, "wait-done") == 0 && strcmp(uprp->desc->fun, "submit_bio_wait") == 0) {
+ /*
+ * For wait-done, we need to instrument submit_bio_wait(struct bio *).
+ * Upon entry, we store the bio pointer into a special TLS location.
+ * Upon return, we retrieve the pointer (and store a 0 back to the
+ * TLS variable). We use a TLS variable to distinguish among several
+ * submit_bio_wait() calls that may be pending concurrently on a CPU.
+ */
+ uint_t varid = get_id_TLS(dtp) - DIF_VAR_OTHER_UBASE;
+ dt_ident_t *fnp = dt_dlib_get_func(dtp, "dt_get_tvar");
+ dt_ident_t *zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
+
+ assert(fnp);
+ assert(zero_off);
+
+ /* If this is the return probe, retrieve the bio pointer from TLS. */
+ if (strcmp(uprp->desc->prb, "return") == 0) {
+ uint_t Lnull = dt_irlist_label(dlp);
+
+ /* Call dt_get_tvar() for our private io-provider TLS variable. */
+ emit(dlp, BPF_MOV_IMM(BPF_REG_1, varid));
+ emit(dlp, BPF_MOV_IMM(BPF_REG_2, 0));
+ emit(dlp, BPF_MOV_IMM(BPF_REG_3, 0));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_9, DCTX_STRTAB));
+ emite(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -1), zero_off);
+ emite(dlp, BPF_CALL_FUNC(fnp->di_id), fnp);
+
+ /* If we got a nonzero address, load from it. */
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lnull));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_0, 0));
+ emitl(dlp, Lnull,
+ BPF_NOP());
+
+ /* Store the retrieved value (bio pointer) as arg0. */
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+ }
+
+ /*
+ * Store (update) the TLS copy of the bio pointer:
+ * - return probe: store 0 (clear TLS, freeing storage)
+ * - entry probe: store arg0 (for the return probe to use later)
+ */
+ emit(dlp, BPF_MOV_IMM(BPF_REG_1, varid));
+ emit(dlp, BPF_MOV_IMM(BPF_REG_2, 1));
+ if (strcmp(uprp->desc->prb, "return") == 0)
+ emit(dlp, BPF_MOV_IMM(BPF_REG_3, 0));
+ else
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_9, DCTX_STRTAB));
+ emite(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -1), zero_off);
+ emite(dlp, BPF_CALL_FUNC(fnp->di_id), fnp);
+
+ /*
+ * At this point, the entry probe only has the TLS variable
+ * address. It has yet actually to store arg0 there,
+ * provided the address is nonzero.
+ */
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ uint_t Lnull = dt_irlist_label(dlp);
+
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lnull));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, 0, BPF_REG_1));
+ emitl(dlp, Lnull,
+ BPF_NOP());
+
+ return 1;
+ }
+ } else {
+ get_cpuinfo(dtp, dlp, exitlbl);
+
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /*
+ * Store the bio pointer in arg0 into the per-CPU cpuinfo
+ * structure referenced by %r6.
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, bio_ptr_off, BPF_REG_1));
+
+ return 1;
+ } else {
+ /*
+ * Load the bio pointer from the per-CPU cpuinfo structure
+ * referenced by %r6 into arg0.
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, bio_ptr_off));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+ /*
+ * Reset the source location to 0.
+ */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, bio_ptr_off, 0));
+ }
+ }
+
+ /*
+ * Note:DTrace does not currently support the use of fileinfo_t with io probes.
+ * In Oracle Linux, no information is readily accessible at the level
+ * where the io probes fire about the file where an I/O request originated.
+ */
+ /*
+ * FIXME: Given the "mapping"s in probe_args[] (above), I would have thought
+ * that to set arg2 here, we should write to arg1 (which is mapped to arg2).
+ * But apparently the correct thing to do is to write to arg2. Weird.
+ */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), 0));
+
+ return 0;
+}
+
+dt_provimpl_t dt_io = {
+ .name = prvname,
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ .populate = &populate,
+ .enable = &dt_sdt_enable,
+ .trampoline = &trampoline,
+ .probe_info = &dt_sdt_probe_info,
+};
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index 31ad028d..a50a2cf9 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -70,6 +70,7 @@ typedef struct dt_provimpl {
extern dt_provimpl_t dt_dtrace;
extern dt_provimpl_t dt_cpc;
extern dt_provimpl_t dt_fbt;
+extern dt_provimpl_t dt_io;
extern dt_provimpl_t dt_ip;
extern dt_provimpl_t dt_lockstat;
extern dt_provimpl_t dt_proc;
diff --git a/test/demo/io/applicat.d b/test/demo/io/applicat.d
index 64cdb6af..14170145 100644
--- a/test/demo/io/applicat.d
+++ b/test/demo/io/applicat.d
@@ -4,7 +4,6 @@
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
io:::start
/execname == "soffice.bin" && args[2]->fi_name == "applicat.rdb"/
diff --git a/test/demo/io/iocpu.d b/test/demo/io/iocpu.d
index 7d29637d..c7b847e7 100644
--- a/test/demo/io/iocpu.d
+++ b/test/demo/io/iocpu.d
@@ -4,7 +4,6 @@
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
#pragma D option quiet
diff --git a/test/demo/io/iothrough.d b/test/demo/io/iothrough.d
index 0290b12e..57fda7f0 100644
--- a/test/demo/io/iothrough.d
+++ b/test/demo/io/iothrough.d
@@ -4,7 +4,6 @@
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
#pragma D option quiet
diff --git a/test/demo/io/whoio.d b/test/demo/io/whoio.d
index 17f7db54..d5fc444f 100644
--- a/test/demo/io/whoio.d
+++ b/test/demo/io/whoio.d
@@ -4,7 +4,6 @@
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
-/* @@xfail: dtv2 */
#pragma D option quiet
diff --git a/test/unittest/io/check_io_probe_args.sh b/test/unittest/io/check_io_probe_args.sh
new file mode 100755
index 00000000..d8e6d264
--- /dev/null
+++ b/test/unittest/io/check_io_probe_args.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+#
+# @@skip: not used directly by the test hardness; called by other scripts
+#
+
+infile=$1
+retval=0
+
+echo check_io_probe_args $infile
+
+#
+# Start with some basic checks on the io probe args.
+#
+
+gawk '
+BEGIN {
+ err = 0; # set to 1 if we encounter any errors
+ nrecs = 0;
+}
+
+NF == 0 { next } # skip empty lines
+
+NF != 23 { err = 1; print "garbled input: " $0; next }
+
+{
+ nrecs++;
+
+ myprobeprov = $1
+ myprobemod = $2
+ myprobefunc = $3
+ myprobename = $4
+ myarg2 = $5
+ myb_flags = $6
+ myb_bcount = $7
+ myb_bufsize = $8
+ myb_addr = $9
+ myb_resid = $10
+ myb_error = $11
+ myb_lblkno = $12
+ myb_blkno = $13
+ myb_iodone = $14
+ myb_edev = $15
+ myb_major = $16
+ myb_minor = $17
+ mydev_major = $18
+ mydev_minor = $19
+ mydev_instance = $20
+ mydev_name = $21
+ mydev_statname = $22
+ mydev_pathname = $23
+
+ # Check probe description.
+
+ if (myprobeprov != "io:") { err = 1; print "provider is not io, got", myprobeprov }
+ if (myprobemod != "vmlinux:") { err = 1; print "module is not vmlinux, got", myprobemod }
+ if (myprobefunc != ":") { err = 1; print "function is not blank, got", myprobefunc }
+ if (myprobename != "wait-start" &&
+ myprobename != "wait-done" &&
+ myprobename != "start" &&
+ myprobename != "done") { err = 1; print "name is unrecognized", myprobename }
+
+ # Check that arg2 is 0.
+ if (myarg2 != 0) { err = 1; print "arg2 should be 0, got", myarg2 }
+
+ # Check for a legal set of flags.
+ {
+ B_PAGEIO = 0x000010;
+ B_PHYS = 0x000020;
+ B_READ = 0x000040;
+ B_WRITE = 0x000100;
+ B_ASYNC = 0x000400;
+ tmp = strtonum("0x"myb_flags);
+
+ # B_ASYNC may be set.
+ if (and(tmp, B_ASYNC) != 0) tmp -= B_ASYNC;
+
+ # B_WRITE or else B_READ must be set.
+ nflags = 0;
+ if (and(tmp, B_WRITE) != 0) {
+ tmp -= B_WRITE;
+ nflags++;
+ }
+ if (and(tmp, B_READ) != 0) {
+ tmp -= B_READ;
+ nflags++;
+ }
+ if (nflags != 1) {
+ printf "flags %x must be read or else write\n", myb_flags;
+ err = 1;
+ }
+
+ # B_PAGEIO or else B_PHYS must be set.
+ nflags = 0;
+ if (and(tmp, B_PAGEIO) != 0) {
+ tmp -= B_PAGEIO;
+ nflags++;
+ }
+ if (and(tmp, B_PHYS) != 0) {
+ tmp -= B_PHYS;
+ nflags++;
+ }
+ if (nflags != 1) {
+ printf "flags %x must be pageio or else phys\n", myb_flags;
+ err = 1;
+ }
+
+ # Check for any other flags.
+ if (tmp != 0) {
+ printf "flags %x has some expected flags %x set\n", myb_flags, tmp;
+ err = 1;
+ }
+ }
+
+ # FIXME: can we add a check for myb_bcount?
+
+ if (myb_bufsize != myb_bcount) { err = 1; print "bcount and bufsize do not match", myb_bcount, myb_bufsize }
+
+ if (myb_addr != "0") { err = 1; print "b_addr is not 0:", b_addr }
+ if (myb_resid != "0") { err = 1; print "b_resid is not 0:", b_resid }
+ if (myb_error != "0") { err = 1; print "b_error is not 0:", b_error }
+
+ # FIXME: can we add a check for myb_lblkno?
+
+ if (myb_blkno != myb_lblkno) { err = 1; print "lblkno and blkno do not match", myb_lblkno, myb_blkno }
+
+ # FIXME: can we add a check for myb_iodone?
+ # FIXME: can we add a check for myb_edev?
+
+ if ( myb_major != rshift(myb_edev, 20)) { err = 1; print "b_major inconsistent with edev", myb_major, myb_edev }
+ if ( myb_minor != and(myb_edev, 0xfffff)) { err = 1; print "b_minor inconsistent with edev", myb_minor, myb_edev }
+
+ if (mydev_major != myb_major) { err = 1; print "b_major and dev_major do not match", myb_major, mydev_major }
+ if (mydev_minor != myb_minor) { err = 1; print "b_minor and dev_minor do not match", myb_minor, mydev_minor }
+
+ if (mydev_instance != 0) { err = 1; print "dev_instance is not 0", mydev_instance }
+
+ # FIXME: can we add a check for mydev_name?
+ # FIXME: can we add a check for mydev_statname?
+ # FIXME: can we add a check for mydev_pathname?
+}
+END {
+ if (nrecs == 0) { err = 1; print "no records found" }
+ exit(err);
+}
+' $infile
+if [ $? -ne 0 ]; then
+ retval=1
+ cat $infile
+ exit $retval
+fi
+
+#
+# Check that all iodone PCs are 0 or else correspond to end*io functions.
+#
+
+if [ $UID -ne 0 ]; then
+ echo skipping iodone check since must be root to read PCs in kallmodsyms
+ retval=1
+else
+ for pc in `gawk 'NF == 23 { print $14 }' $infile | grep -wv 0 | sort | uniq`; do
+ gawk '$1 == "'$pc'" && /end.*io/ { found = 1; exit }
+ END { exit(found) }' /proc/kallmodsyms
+ if [ $? -eq 0 ]; then
+ echo $pc, " is not an end-io function"
+ grep $pc /proc/kallmodsyms
+ retval=1
+ fi
+ done
+fi
+
+#
+# For each statname, check that the reported major/minor numbers agree with "ls -l".
+#
+
+while read mymajor myminor mystatname; do
+ read mymajor0 myminor0 <<< $(ls -l /dev | gawk '$NF == "'$mystatname'" { print $(NF-5), $(NF-4) }' | tr ',' ' ')
+
+ if [ "x$mymajor0" == "x" ]; then
+ mymajor0="0"
+ fi
+ if [ "x$myminor0" == "x" ]; then
+ myminor0="0"
+ fi
+
+ if [ $mymajor != $mymajor0 -o $myminor != $myminor0 ]; then
+ echo ERROR: for $mystatname expect device major minor $mymajor $myminor but got $mymajor0 $myminor0
+ retval=1
+ fi
+done <<< $(gawk 'NF == 23 { print $16, $17, $22 }' $infile | sort | uniq)
+
+#
+# For each major number, check name.
+#
+
+while read mymajor myname; do
+ $dtrace $dt_flags -qn '
+ BEGIN {
+ trace(stringof(`major_names['$mymajor' % 255]->name));
+ exit(0);
+ }
+ ERROR { trace("nfs"); exit(0); }' -o chkmajnam.txt >& /dev/null
+ myname0=`cat chkmajnam.txt`
+ rm -f chkmajnam.txt
+ echo check major $mymajor is $myname and $myname0
+ if [ $myname != $myname0 ]; then
+ echo ERROR: for $mymajor expect name $myname0 but got $myname
+ retval=1
+ fi
+done <<< $(gawk 'NF == 23 { print $16, $21 }' $infile | sort | uniq)
+
+#
+# For name: Expect pathname:
+#
+# == "nfs" "<nfs>"
+# != "nfs" "<unknown>" # FIXME: "<unknown>"? Really?
+#
+
+gawk '
+BEGIN { err = 0 }
+NF == 23 {
+ if ($21 == "nfs") expect = "<nfs>";
+ else expect = "<unknown>";
+ if ($23 != expect) {
+ print "ERROR: for name " $21 " expect " expect " but got " $23;
+ err = 1;
+ }
+}
+END { exit(err) }
+' $infile
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+
+#
+# Check that for each name, there is a distinct major number.
+# This does not guarantee that the mapping is correct, but it
+# is a partial correctness check and we already checked the
+# statname mapping to edev numbers against "ls -l /dev".
+#
+
+gawk 'NF == 23 { print $21, $16 }' $infile | sort | uniq > map-name-to-major.txt
+nmaps=`cat map-name-to-major.txt | wc -l`
+nnames=`awk '{print $1}' map-name-to-major.txt | sort | uniq | wc -l`
+nmajor=`awk '{print $2}' map-name-to-major.txt | sort | uniq | wc -l`
+if [ $nnames -ne $nmaps -o $nmajor -ne $nmaps ]; then
+ echo "ERROR: name-to-major-number is not a one-to-one mapping"
+ cat map-name-to-major.txt
+ retval=1
+fi
+
+#
+# If the name is "nfs", the edev should be 0. FIXME: is this correct?
+#
+
+gawk '
+BEGIN { err = 0 }
+$21 == "nfs" && $15 != 0 { print "ERROR: name is nfs but edev is nonzero"; err = 1 }
+END { exit(err) }' $infile
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+
+#
+# Exit.
+#
+
+exit $retval
diff --git a/test/unittest/io/dump_io_probe_args.d b/test/unittest/io/dump_io_probe_args.d
new file mode 100644
index 00000000..afc1f1f0
--- /dev/null
+++ b/test/unittest/io/dump_io_probe_args.d
@@ -0,0 +1,47 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+/* @@skip: not used directly by the test hardness; called by other scripts */
+
+/*
+ * For all io::: probes, dump "all" probe arguments (and their interesting members).
+ * It would be nice just to say "io:::", but our use of args[] forces us to
+ * enumerate the probes.
+ */
+io:::wait-start,
+io:::wait-done,
+io:::start,
+io:::done
+{
+ printf("%s: %s: %s: %11s %d %3x %9d %9d %p %d %d %5d %5d %p %d %d %d %d %d %d %s %s %s\n",
+ probeprov, probemod, probefunc, probename,
+ arg2,
+
+ args[0]->b_flags,
+
+ args[0]->b_bcount,
+ args[0]->b_bufsize,
+
+ args[0]->b_addr,
+ args[0]->b_resid,
+ args[0]->b_error,
+
+ args[0]->b_lblkno,
+ args[0]->b_blkno,
+
+ args[0]->b_iodone,
+
+ args[0]->b_edev,
+ getmajor(args[0]->b_edev),
+ getminor(args[0]->b_edev),
+
+ args[1]->dev_major,
+ args[1]->dev_minor,
+ args[1]->dev_instance,
+ args[1]->dev_name,
+ args[1]->dev_statname,
+ args[1]->dev_pathname);
+}
diff --git a/test/unittest/io/tst.fbt_probes.r b/test/unittest/io/tst.fbt_probes.r
new file mode 100644
index 00000000..8ac323fe
--- /dev/null
+++ b/test/unittest/io/tst.fbt_probes.r
@@ -0,0 +1,8 @@
+bio_endio
+submit_bio_wait
+submit_bio_checks
+xfs_buf_iowait
+nfs_readpage_done
+nfs_writeback_done
+nfs_initiate_read
+nfs_initiate_write
diff --git a/test/unittest/io/tst.fbt_probes.sh b/test/unittest/io/tst.fbt_probes.sh
new file mode 100755
index 00000000..eef2eec7
--- /dev/null
+++ b/test/unittest/io/tst.fbt_probes.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+# @@nosort
+
+#
+# Check that the fbt probes on which the io provider relies are present.
+# The list of probes comes originally from the definition of probes[] in
+# dt_prov_io.c. Here, we read the list from our own .r file.
+#
+
+for myfunc in `cat ${0/.sh/.r}`; do
+ awk '$1 == "'$myfunc'" { print $1 }' /sys/kernel/debug/tracing/available_filter_functions
+done
+
+exit 0
diff --git a/test/unittest/io/tst.fbt_probes.x b/test/unittest/io/tst.fbt_probes.x
new file mode 100755
index 00000000..eab252a0
--- /dev/null
+++ b/test/unittest/io/tst.fbt_probes.x
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
+
+if [ $MAJOR -gt 5 ]; then
+ exit 0
+fi
+if [ $MAJOR -eq 5 -a $MINOR -ge 15 ]; then
+ exit 0
+fi
+
+# The io:::start probe depends in part on fbt::submit_bio_checks:entry,
+# but neither submit_bio_checks nor its caller __submit_bio appears in
+# /sys/kernel/debug/tracing/available_filter_functions.
+# For now, io:::start is not fully supported on UEKr6.
+
+echo "io:::start not fully supported before 5.15"
+exit 1
diff --git a/test/unittest/io/tst.local.sh b/test/unittest/io/tst.local.sh
index b6061fd6..7449a915 100755
--- a/test/unittest/io/tst.local.sh
+++ b/test/unittest/io/tst.local.sh
@@ -10,7 +10,6 @@
# Test the io:::start probe for write and read operations by creating
# a file and reading it back after clearing the caches.
#
-# @@xfail: dtv2
dtrace=$1
nblocks=1024
diff --git a/test/unittest/io/tst.local.x b/test/unittest/io/tst.local.x
new file mode 120000
index 00000000..7504b502
--- /dev/null
+++ b/test/unittest/io/tst.local.x
@@ -0,0 +1 @@
+tst.fbt_probes.x
\ No newline at end of file
diff --git a/test/unittest/io/tst.local2.sh b/test/unittest/io/tst.local2.sh
new file mode 100755
index 00000000..fa2a4bb0
--- /dev/null
+++ b/test/unittest/io/tst.local2.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+#
+# Test the io:::start probe for write and read operations by creating
+# a file and reading it back after clearing the caches.
+#
+
+rundt="$1 $dt_flags -qs $PWD/test/unittest/io/dump_io_probe_args.d -c"
+check_args=$PWD/test/unittest/io/check_io_probe_args.sh
+retval=0
+
+DIRNAME="$tmpdir/io-local2.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+filesize=$((512*1024))
+
+fsoptions="loop,defaults,atime,diratime,nosuid,nodev"
+iodir=`mktemp -u`
+tempfile=`mktemp -u -p $iodir`
+
+trap "rm -f $tempfile; umount $iodir; rmdir $iodir; rm -f $iodir.img" QUIT
+
+dd if=/dev/zero of=$iodir.img bs=1024 count=$((16*1024)) status=none
+mkfs.xfs $iodir.img > /dev/null
+ mkdir $iodir
+ mount -t xfs -o $fsoptions $iodir.img $iodir
+ devnam=`losetup -j $iodir.img | awk 'BEGIN { FS = ":" } ; {print $1}'`
+ statname=`basename $devnam`
+
+ dd if=/dev/urandom of=$tempfile count=$filesize bs=1 status=none
+ $rundt "umount $iodir" -o log.write
+ mount -t xfs -o $fsoptions $iodir.img $iodir
+ $rundt "sum $tempfile" -o log.read
+ rm -f $tempfile
+ umount $iodir
+ rmdir $iodir
+rm -f $iodir.img
+
+# check the DTrace output
+
+$check_args log.write
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+$check_args log.read
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+
+cat > awk.txt << EOF
+# initialize
+BEGIN { err = 0; bytes = 0; nrec = 0 }
+
+# skip over uninteresting records
+NF == 0 { next }
+\$14 != myiodone { next }
+\$22 != "$statname" { next }
+
+# check
+\$4 != "start" &&
+\$4 != "done" { print "probe name should be start or done"; err = 1 }
+\$6 != myflags { print "flags are wrong"; err = 1 }
+\$4 == "start" { bytes += \$7; nrec++ }
+\$21 != "loop" { print "name is wrong"; err = 1 }
+END {
+ if (bytes != $filesize) {
+ print "total bytes should match filesize", bytes, $filesize;
+ err = 1;
+ }
+ if (nrecflag == 1 && nrec != 1) {
+ print "expected one record";
+ err = 1;
+ }
+ exit(err);
+}
+EOF
+
+myaddr=`awk '$4 == "xfs_end_bio" {print $1}' /proc/kallmodsyms`
+awk -v myflags=520 -v nrecflag=1 -v myiodone=$myaddr -f awk.txt log.write
+if [ $? -ne 0 ]; then
+ echo post-processing error log.write
+ cat log.write
+ retval=1
+fi
+
+myaddr=`awk '$4 == "iomap_read_end_io" {print $1}' /proc/kallmodsyms`
+awk -v myflags=460 -v nrecflag=2 -v myiodone=$myaddr -f awk.txt log.read
+if [ $? -ne 0 ]; then
+ echo post-processing error log.read
+ cat log.read
+ retval=1
+fi
+
+exit $retval
diff --git a/test/unittest/io/tst.local2.x b/test/unittest/io/tst.local2.x
new file mode 120000
index 00000000..7504b502
--- /dev/null
+++ b/test/unittest/io/tst.local2.x
@@ -0,0 +1 @@
+tst.fbt_probes.x
\ No newline at end of file
diff --git a/test/unittest/io/tst.lv-done.r b/test/unittest/io/tst.lv-done.r
new file mode 100644
index 00000000..35f539da
--- /dev/null
+++ b/test/unittest/io/tst.lv-done.r
@@ -0,0 +1,17 @@
+PROBE io vmlinux done
+
+ Probe Description Attributes
+ Identifier Names: Private
+ Data Semantics: Private
+ Dependency Class: Unknown
+
+ Argument Attributes
+ Identifier Names: Evolving
+ Data Semantics: Evolving
+ Dependency Class: ISA
+
+ Argument Types
+ args[0]: bufinfo_t *
+ args[1]: devinfo_t *
+ args[2]: fileinfo_t *
+
diff --git a/test/unittest/io/tst.lv-done.r.p b/test/unittest/io/tst.lv-done.r.p
new file mode 100755
index 00000000..c538e345
--- /dev/null
+++ b/test/unittest/io/tst.lv-done.r.p
@@ -0,0 +1,5 @@
+#!/usr/bin/awk -f
+NR == 1 { next; }
+NR == 2 { print "PROBE", $2, $3, $NF; next; }
+/^ *[0-9]+/ { exit; }
+{ print; }
diff --git a/test/unittest/io/tst.lv-done.sh b/test/unittest/io/tst.lv-done.sh
new file mode 100755
index 00000000..33948324
--- /dev/null
+++ b/test/unittest/io/tst.lv-done.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+dtrace=$1
+
+$dtrace $dt_flags -lvn io:::done
+exit $?
diff --git a/test/unittest/io/tst.lv-start.r b/test/unittest/io/tst.lv-start.r
new file mode 100644
index 00000000..d2ee9666
--- /dev/null
+++ b/test/unittest/io/tst.lv-start.r
@@ -0,0 +1,17 @@
+PROBE io vmlinux start
+
+ Probe Description Attributes
+ Identifier Names: Private
+ Data Semantics: Private
+ Dependency Class: Unknown
+
+ Argument Attributes
+ Identifier Names: Evolving
+ Data Semantics: Evolving
+ Dependency Class: ISA
+
+ Argument Types
+ args[0]: bufinfo_t *
+ args[1]: devinfo_t *
+ args[2]: fileinfo_t *
+
diff --git a/test/unittest/io/tst.lv-start.r.p b/test/unittest/io/tst.lv-start.r.p
new file mode 120000
index 00000000..4a56a9d3
--- /dev/null
+++ b/test/unittest/io/tst.lv-start.r.p
@@ -0,0 +1 @@
+tst.lv-done.r.p
\ No newline at end of file
diff --git a/test/unittest/io/tst.lv-start.sh b/test/unittest/io/tst.lv-start.sh
new file mode 100755
index 00000000..4b8f1248
--- /dev/null
+++ b/test/unittest/io/tst.lv-start.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+dtrace=$1
+
+$dtrace $dt_flags -lvn io:::start
+exit $?
diff --git a/test/unittest/io/tst.lv-wait-done.r b/test/unittest/io/tst.lv-wait-done.r
new file mode 100644
index 00000000..77f05e9f
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-done.r
@@ -0,0 +1,17 @@
+PROBE io vmlinux wait-done
+
+ Probe Description Attributes
+ Identifier Names: Private
+ Data Semantics: Private
+ Dependency Class: Unknown
+
+ Argument Attributes
+ Identifier Names: Evolving
+ Data Semantics: Evolving
+ Dependency Class: ISA
+
+ Argument Types
+ args[0]: bufinfo_t *
+ args[1]: devinfo_t *
+ args[2]: fileinfo_t *
+
diff --git a/test/unittest/io/tst.lv-wait-done.r.p b/test/unittest/io/tst.lv-wait-done.r.p
new file mode 120000
index 00000000..4a56a9d3
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-done.r.p
@@ -0,0 +1 @@
+tst.lv-done.r.p
\ No newline at end of file
diff --git a/test/unittest/io/tst.lv-wait-done.sh b/test/unittest/io/tst.lv-wait-done.sh
new file mode 100755
index 00000000..2187fa1f
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-done.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+dtrace=$1
+
+$dtrace $dt_flags -lvn io:::wait-done
+exit $?
diff --git a/test/unittest/io/tst.lv-wait-start.r b/test/unittest/io/tst.lv-wait-start.r
new file mode 100644
index 00000000..56f1b607
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-start.r
@@ -0,0 +1,17 @@
+PROBE io vmlinux wait-start
+
+ Probe Description Attributes
+ Identifier Names: Private
+ Data Semantics: Private
+ Dependency Class: Unknown
+
+ Argument Attributes
+ Identifier Names: Evolving
+ Data Semantics: Evolving
+ Dependency Class: ISA
+
+ Argument Types
+ args[0]: bufinfo_t *
+ args[1]: devinfo_t *
+ args[2]: fileinfo_t *
+
diff --git a/test/unittest/io/tst.lv-wait-start.r.p b/test/unittest/io/tst.lv-wait-start.r.p
new file mode 120000
index 00000000..4a56a9d3
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-start.r.p
@@ -0,0 +1 @@
+tst.lv-done.r.p
\ No newline at end of file
diff --git a/test/unittest/io/tst.lv-wait-start.sh b/test/unittest/io/tst.lv-wait-start.sh
new file mode 100755
index 00000000..b6b8e84b
--- /dev/null
+++ b/test/unittest/io/tst.lv-wait-start.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+dtrace=$1
+
+$dtrace $dt_flags -lvn io:::wait-start
+exit $?
diff --git a/test/unittest/io/tst.nfs.sh b/test/unittest/io/tst.nfs.sh
index f9222ff6..4e368244 100755
--- a/test/unittest/io/tst.nfs.sh
+++ b/test/unittest/io/tst.nfs.sh
@@ -9,7 +9,6 @@
# Test the io:::start probe for write and read operations by creating
# a file and reading it back after clearing the caches.
#
-# @@xfail: dtv2
dtrace=$1
filesize=$((1024*1024))
@@ -23,7 +22,8 @@ statname="nfs"
trap "rm -f $tempfile; umount $clientpath; rmdir $clientpath; exportfs -u 127.0.0.1:$serverpath; rmdir $serverpath" QUIT EXIT
# setup NFS server
-service nfs start > /dev/null 2>&1
+#service nfs start > /dev/null 2>&1
+systemctl enable --now nfs-server > /dev/null 2>&1
mkdir $serverpath
exportfs -i -v -o "rw,sync,no_root_squash,insecure,fsid=8434437287" 127.0.0.1:$serverpath > /dev/null
diff --git a/test/unittest/io/tst.nfs2.sh b/test/unittest/io/tst.nfs2.sh
new file mode 100755
index 00000000..273ecef4
--- /dev/null
+++ b/test/unittest/io/tst.nfs2.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+#
+# Test the io:::start probe for write and read operations by creating
+# a file and reading it back after clearing the caches.
+#
+
+rundt="$1 $dt_flags -qs $PWD/test/unittest/io/dump_io_probe_args.d -c"
+check_args=$PWD/test/unittest/io/check_io_probe_args.sh
+retval=0
+
+DIRNAME="$tmpdir/io-nfs2.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+filesize=$((1024*1024))
+
+exdir=`mktemp -u`
+iodir=`mktemp -u`
+tempfile=`mktemp -u -p $iodir`
+
+trap "rm -f $tempfile; umount $iodir; rmdir $iodir; exportfs -u 127.0.0.1:$exdir; rmdir $exdir" QUIT
+
+systemctl enable --now nfs-server > /dev/null 2>&1
+
+mkdir $exdir
+ # what is the fsid?
+ exportfs -i -v -o "rw,sync,no_root_squash,insecure,fsid=8434437287" 127.0.0.1:$exdir > /dev/null
+ mkdir $iodir
+ mount -t nfs -o nfsvers=3 127.0.0.1:$exdir $iodir
+ $rundt "dd if=/dev/urandom of=$tempfile count=$filesize bs=1 status=none" -o log.write
+ myinode=`stat $tempfile | awk '/ Inode: / {print $4}'`
+ umount $iodir
+ # remount so that data is not cached
+ mount -t nfs -o nfsvers=3 127.0.0.1:$exdir $iodir
+ $rundt "sum $tempfile" -o log.read
+ rm -f $tempfile
+ umount $iodir
+ rmdir $iodir
+ exportfs -u 127.0.0.1:$exdir
+rmdir $exdir
+
+# check the DTrace output
+
+$check_args log.write
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+$check_args log.read
+if [ $? -ne 0 ]; then
+ retval=1
+fi
+
+cat > awk.txt << EOF
+# initialize
+BEGIN { err = 0; bytes = 0; nrec = 0 }
+
+# skip over uninteresting records
+NF == 0 { next }
+\$6 != myflags { next }
+\$22 != "nfs" { next }
+
+# check
+\$4 != "start" &&
+\$4 != "done" { print "probe name should be start or done"; err = 1 }
+\$4 == "start" { bytes += \$7; nrec++ }
+\$12 != "$myinode" { print "blknode should be inode"; err = 1 }
+\$14 != 0 { print "iodone should be 0"; err = 1 }
+\$21 != "nfs" { print "name should be nfs"; err = 1 }
+END {
+ if (bytes != $filesize) {
+ print "total bytes should match filesize", bytes, $filesize;
+ err = 1;
+ }
+ if (nrecflag == 1 && nrec != 1) {
+ print "expected one record";
+ err = 1;
+ }
+ exit(err);
+}
+EOF
+
+awk -v myflags=520 -v nrecflag=1 -f awk.txt log.write
+if [ $? -ne 0 ]; then
+ echo post-processing error log.write
+ cat log.write
+ retval=1
+fi
+
+awk -v myflags=460 -v nrecflag=2 -f awk.txt log.read
+if [ $? -ne 0 ]; then
+ echo post-processing error log.read
+ cat log.read
+ retval=1
+fi
+
+exit $retval
diff --git a/test/unittest/io/tst.nfs2.x b/test/unittest/io/tst.nfs2.x
new file mode 120000
index 00000000..7504b502
--- /dev/null
+++ b/test/unittest/io/tst.nfs2.x
@@ -0,0 +1 @@
+tst.fbt_probes.x
\ No newline at end of file
diff --git a/test/unittest/io/tst.wait.sh b/test/unittest/io/tst.wait.sh
index 7ef0abae..d41f9ce7 100755
--- a/test/unittest/io/tst.wait.sh
+++ b/test/unittest/io/tst.wait.sh
@@ -8,7 +8,6 @@
#
# Test the io:::wait-start and io:::wait-done probes.
#
-# @@xfail: dtv2
dtrace=$1
nblocks=1024
diff --git a/test/unittest/io/tst.wait.x b/test/unittest/io/tst.wait.x
new file mode 120000
index 00000000..7504b502
--- /dev/null
+++ b/test/unittest/io/tst.wait.x
@@ -0,0 +1 @@
+tst.fbt_probes.x
\ No newline at end of file
--
2.18.4
More information about the DTrace-devel
mailing list