[DTrace-devel] [PATCH v2 4/7] Implement the io provider

Kris Van Hees kris.van.hees at oracle.com
Wed Sep 27 21:37:19 UTC 2023


Reviewing but wanted to give a little first impression/comments.

Having worked on several SDT providers, I know the amount of work that goes
into digging through the kernel internals to find the right points to probe
and the conditions under which they work.  And then there is the challenge
to find the correct way to process the available data into the probe args.

This is a great piece of work.  Thank you for doing this.

On Thu, Aug 24, 2023 at 07:22:39PM -0400, eugene.loh at oracle.com wrote:
> 
> Two problems remain.
> 
> First, the io:::start probe is not fully implemented on UEK6.
> One of its instrumentation points is in submit_bio_checks(),
> but neither it nor its caller __submit_bio() is in
> /sys/kernel/debug/tracing/available_filter_functions on UEK6.
> Going higher, this call stack is inside a loop, meaning that a
> higher-level fbt:::entry probe and the intended submit_bio_checks
> entry will no longer be one-for-one.  For the time being, the
> implementation does not fully support io:::start on UEK6.
> 
> There also appear to be some test failures on OL9 ARM.
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>  libdtrace/Build                         |   2 +
>  libdtrace/dt_bpf_maps.h                 |   7 +
>  libdtrace/dt_open.c                     |   1 +
>  libdtrace/dt_prov_io.c                  | 696 ++++++++++++++++++++++++
>  libdtrace/dt_provider.h                 |   1 +
>  test/demo/io/applicat.d                 |   1 -
>  test/demo/io/iocpu.d                    |   1 -
>  test/demo/io/iothrough.d                |   1 -
>  test/demo/io/whoio.d                    |   1 -
>  test/unittest/io/check_io_probe_args.sh | 273 ++++++++++
>  test/unittest/io/dump_io_probe_args.d   |  47 ++
>  test/unittest/io/tst.fbt_probes.r       |   8 +
>  test/unittest/io/tst.fbt_probes.sh      |  20 +
>  test/unittest/io/tst.fbt_probes.x       |  18 +
>  test/unittest/io/tst.local.sh           |   1 -
>  test/unittest/io/tst.local.x            |   1 +
>  test/unittest/io/tst.local2.sh          | 100 ++++
>  test/unittest/io/tst.local2.x           |   1 +
>  test/unittest/io/tst.lv-done.r          |  17 +
>  test/unittest/io/tst.lv-done.r.p        |   5 +
>  test/unittest/io/tst.lv-done.sh         |  11 +
>  test/unittest/io/tst.lv-start.r         |  17 +
>  test/unittest/io/tst.lv-start.r.p       |   1 +
>  test/unittest/io/tst.lv-start.sh        |  11 +
>  test/unittest/io/tst.lv-wait-done.r     |  17 +
>  test/unittest/io/tst.lv-wait-done.r.p   |   1 +
>  test/unittest/io/tst.lv-wait-done.sh    |  11 +
>  test/unittest/io/tst.lv-wait-start.r    |  17 +
>  test/unittest/io/tst.lv-wait-start.r.p  |   1 +
>  test/unittest/io/tst.lv-wait-start.sh   |  11 +
>  test/unittest/io/tst.nfs.sh             |   4 +-
>  test/unittest/io/tst.nfs2.sh            | 102 ++++
>  test/unittest/io/tst.nfs2.x             |   1 +
>  test/unittest/io/tst.wait.sh            |   1 -
>  test/unittest/io/tst.wait.x             |   1 +
>  35 files changed, 1401 insertions(+), 8 deletions(-)
>  create mode 100644 libdtrace/dt_prov_io.c
>  create mode 100755 test/unittest/io/check_io_probe_args.sh
>  create mode 100644 test/unittest/io/dump_io_probe_args.d
>  create mode 100644 test/unittest/io/tst.fbt_probes.r
>  create mode 100755 test/unittest/io/tst.fbt_probes.sh
>  create mode 100755 test/unittest/io/tst.fbt_probes.x
>  create mode 120000 test/unittest/io/tst.local.x
>  create mode 100755 test/unittest/io/tst.local2.sh
>  create mode 120000 test/unittest/io/tst.local2.x
>  create mode 100644 test/unittest/io/tst.lv-done.r
>  create mode 100755 test/unittest/io/tst.lv-done.r.p
>  create mode 100755 test/unittest/io/tst.lv-done.sh
>  create mode 100644 test/unittest/io/tst.lv-start.r
>  create mode 120000 test/unittest/io/tst.lv-start.r.p
>  create mode 100755 test/unittest/io/tst.lv-start.sh
>  create mode 100644 test/unittest/io/tst.lv-wait-done.r
>  create mode 120000 test/unittest/io/tst.lv-wait-done.r.p
>  create mode 100755 test/unittest/io/tst.lv-wait-done.sh
>  create mode 100644 test/unittest/io/tst.lv-wait-start.r
>  create mode 120000 test/unittest/io/tst.lv-wait-start.r.p
>  create mode 100755 test/unittest/io/tst.lv-wait-start.sh
>  create mode 100755 test/unittest/io/tst.nfs2.sh
>  create mode 120000 test/unittest/io/tst.nfs2.x
>  create mode 120000 test/unittest/io/tst.wait.x
> 
> diff --git a/libdtrace/Build b/libdtrace/Build
> index 7dc2d5d6..cc75d3c9 100644
> --- a/libdtrace/Build
> +++ b/libdtrace/Build
> @@ -49,6 +49,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
>  			  dt_prov_cpc.c \
>  			  dt_prov_dtrace.c \
>  			  dt_prov_fbt.c \
> +			  dt_prov_io.c \
>  			  dt_prov_ip.c \
>  			  dt_prov_lockstat.c \
>  			  dt_prov_proc.c \
> @@ -97,6 +98,7 @@ dt_proc.c_CFLAGS := -Wno-pedantic
>  dt_prov_cpc.c_CFLAGS := -Wno-pedantic
>  dt_prov_dtrace.c_CFLAGS := -Wno-pedantic
>  dt_prov_fbt.c_CFLAGS := -Wno-pedantic
> +dt_prov_io.c_CFLAGS := -Wno-pedantic
>  dt_prov_ip.c_CFLAGS := -Wno-pedantic
>  dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
>  dt_prov_proc.c_CFLAGS := -Wno-pedantic
> diff --git a/libdtrace/dt_bpf_maps.h b/libdtrace/dt_bpf_maps.h
> index 0dd36b16..80b497c3 100644
> --- a/libdtrace/dt_bpf_maps.h
> +++ b/libdtrace/dt_bpf_maps.h
> @@ -31,6 +31,8 @@ struct dt_bpf_specs {
>  					 * drain this buffer */
>  };
>  
> +#define IO_BIO_SIZ 256
> +#define IO_BIO_STK 4
>  typedef struct dt_bpf_cpuinfo	dt_bpf_cpuinfo_t;
>  struct dt_bpf_cpuinfo {
>  	cpuinfo_t	ci;
> @@ -40,6 +42,11 @@ struct dt_bpf_cpuinfo {
>  	uint64_t	lockstat_bfrom;	/* lockstat: block time start */
>  	uint64_t	lockstat_btime;	/* lockstat: block time */
>  	uint64_t	lockstat_stime;	/* lockstat: spin time */
> +	uint64_t	io_bio_ptr_wait;		/* io: bio pointer (submit_bio_wait) */
> +	uint64_t	io_bio_ptr_checks;		/* io: bio pointer (submit_bio_checks) */
> +	uint64_t	io_bio_ptr_endio[IO_BIO_STK];	/* io: bio pointer (bio_endio), to be followed immediately by io_bio_stk_n */
> +	uint64_t	io_bio_stk_n;			/* io: bio pointer stack index */
> +	char		io_bio_fake[IO_BIO_SIZ];	/* io: bio fake struct */
>  };
>  
>  #ifdef  __cplusplus
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index 6d0a29f8..32059b33 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -68,6 +68,7 @@ static const dt_provimpl_t *dt_providers[] = {
>  	&dt_dtrace,		/* list dt_dtrace first */
>  	&dt_cpc,
>  	&dt_fbt,
> +	&dt_io,
>  	&dt_ip,
>  	&dt_lockstat,
>  	&dt_proc,
> diff --git a/libdtrace/dt_prov_io.c b/libdtrace/dt_prov_io.c
> new file mode 100644
> index 00000000..1aa8846c
> --- /dev/null
> +++ b/libdtrace/dt_prov_io.c
> @@ -0,0 +1,696 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The 'io' SDT provider for DTrace-specific probes.
> + *
> + * These io::: probes mimic the instrumentation in legacy DTrace.
> + * Specifically, all probes have three probe args:
> + *     bufinfo_t *
> + *     devinfo_t *
> + *     fileinfo_t *
> + * but the trampoline really only needs to supply a bio pointer,
> + * from which translators will make the first two args.  The fileinfo_t *
> + * is 0 on Linux.

This is not entirely correct.  The translators need both arg0 and arg1 in
order to populate the args[] translated arguments.  We cannot populate the
fileinfo_t struct but we do need to provide arg1 as 0 so that the translator
knows that there is no fileinfo_t data.  Because args[2] will still be
available to dereference, and that will require arg2 (and it being NULL will
trigger the correct machinery in the rest of the code).

> + *
> + * The bio pointer is passed into some functions and is easily captured
> + * if we are using an fbt:::entry probe on such a function.  See
> + * DTRACE_IO() sites in the legacy implementation.
> + *
> + * For fbt:::entry probes on nfs_ and xfs_ functions, however, get only
> + * a hdr arg.  For them, we have a "fake struct bio", which the trampoline
> + * populates from the function's hdr arg.  See DTRACE_IO_NFS() and
> + * DTRACE_IO_XFS() sites in the legacy implementation.

I would take out references to the legacy implementation unless absolutely
necessary.  This is a complete reimplementation of the io provider and thus
we primarily implement what is documented.  The legacy implementation is not
the main reference on how to do this.

> + *
> + * In some cases, we have to use fbt:::return probes, for which we no
> + * longer have the function's arguments.  So, these cases rely on the
> + * corresponding entry probe to cache the bio pointer (or populate the
> + * fake bio), which the return probe can then retrieve.

Not needed - this is common practice in DTrace anyway.

> + *
> + * Unfortunately, more than one function may be active at any time (on a
> + * CPU).  So the return function needs to know which bio pointer or fake
> + * bio to use.  These rules are used:
> + *
> + *   - For nfs_ and xfs_ functions, just use the fake bio.
> + *
> + *   - For most other functions, use the bio pointer for that
> + *     function.
> + *
> + *   - For bio_endio, which might re-enter itself, keep a
> + *     stack of bio pointers.
> + */
> +#include <assert.h>
> +#include <errno.h>
> +
> +#include "dt_dctx.h"
> +#include "dt_cg.h"
> +#include "dt_provider_sdt.h"
> +#include "dt_probe.h"
> +
> +/* Defined in include/linux/blk_types.h */
> +#define REQ_OP_READ	0
> +#define REQ_OP_WRITE	1
> +/* Defined in fs/xfs/xfs_buf.h */
> +#define XBF_WRITE	(1 << 1) /* buffer intended for writing to device */
> +
> +static const char		prvname[] = "io";
> +static const char		modname[] = "vmlinux";  // FIXME:  Really?  Or blank?

I am going to look at this on the wider scale of SDT probes so let's leave this
for now.

> +/*
> + * If the set of functions in the fbt probes changes,
> + * update the list in test/unittest/io/tst.fbt_probes.r.
> + */
> +static probe_dep_t	probes[] = {
> +	{ "wait-start",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::submit_bio_wait:entry" },
> +	{ "wait-start",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::xfs_buf_iowait:entry" },
> +	{ "wait-done",
> +	  DTRACE_PROBESPEC_FUNC,	"fbt::submit_bio_wait" },
> +	{ "wait-done",
> +	  DTRACE_PROBESPEC_FUNC,	"fbt::xfs_buf_iowait" },
> +	{ "done",
> +	  DTRACE_PROBESPEC_FUNC,	"fbt::bio_endio" },

I believe it would be better to have a probe on rawtp:::block_bio_complete and
rawtp:::block_rq_complete.  If anything, it avoids messing with entry and
return probes.

> +	{ "done",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::nfs_readpage_done:entry" },
> +	{ "done",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::nfs_writeback_done:entry" },
> +	{ "start",
> +	  DTRACE_PROBESPEC_FUNC,	"fbt::submit_bio_checks" },

I believe it would be better to have a probe on rawtp:::block_bio_queue.  If
anything, it avoids messing with entry and return probes.

> +	{ "start",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::nfs_initiate_read:entry" },
> +	{ "start",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::nfs_initiate_write:entry" },   /* or return? */
> +	{ NULL, }
> +};
> +
> +/*
> + * All four probes have three probe args.  The first two will be extracted
> + * by a translator from the (struct bio *) we supply.  The (struct file *)
> + * we supply will be 0 in all cases.
> + */
> +static probe_arg_t probe_args[] = {
> +	{ "start", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
> +	{ "start", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
> +	{ "start", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
> +	{ "done", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
> +	{ "done", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
> +	{ "done", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
> +	{ "wait-start", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
> +	{ "wait-start", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
> +	{ "wait-start", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
> +	{ "wait-done", 0, { 0, 0, "struct bio *", "bufinfo_t *" } },
> +	{ "wait-done", 1, { 0, 0, "struct bio *", "devinfo_t *" } },
> +	{ "wait-done", 2, { 1, 0, "struct file *", "fileinfo_t *", } },
> +	{ NULL, }
> +};
> +
> +static const dtrace_pattr_t	pattr = {
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +};
> +
> +/*
> + * Provide all the "io" SDT probes.
> + */
> +static int populate(dtrace_hdl_t *dtp)
> +{
> +	return dt_sdt_populate(dtp, prvname, modname, &dt_io, &pattr,
> +			       probe_args, probes);
> +}
> +
> +/*
> + * Get a reference to the cpuinfo structure for the current CPU.
> + *
> + * Clobbers %r0 through %r5
> + * Stores pointer to cpuinfo struct in %r6
> + */
> +static void get_cpuinfo(dtrace_hdl_t *dtp, dt_irlist_t *dlp, uint_t exitlbl)
> +{
> +	dt_ident_t	*idp = dt_dlib_get_map(dtp, "cpuinfo");
> +
> +	assert(idp != NULL);
> +	dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +	emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +	emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +	emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +	emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +	emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> +}

If we still need this in the final patch, we should put it elsewhere, e.g. in
dt_cg.c, and use it here and in dt_prov_lockstat.  But I hope we can defer to
using TLS variables instead.

> +
> +static int BPF_width(size_t sz)
> +{
> +	switch(sz) {
> +	case 1: return BPF_B;
> +	case 2: return BPF_H;
> +	case 4: return BPF_W;
> +	case 8: return BPF_DW;
> +	default: assert(0);
> +	}
> +}

It feels odd that we would need this here.  Surely, something in dt_cg.c can
be used instead?

> +
> +/*
> + * Generate BPF instructions to dereference the pointer in %r3.
> + *
> + * We often have to dereference a pointer.  However, the pointer might
> + * not look safe to the BPF verifier.  So we use bpf_probe_read() to
> + * copy to a safe location (use slot 0) and then load from there.
> + *
> + * Since we will use bpf_probe_read(), this code generation will assume
> + * that the source pointer is already in %r3.  Nonetheless, we will allow
> + * a scalar offset to be added to it.

I think you can safely say that all cases where this is used the pointer will
not be safe in the eyes of the verifier because it will be an arbitrary
pointer.  Also, this should use bpf_probe_oread_kernel().

> + *
> + * Arguments are:
> + *
> + *     ptr_off:  scalar offset to add to %r3 before dereferencing
> + *     read_width:  width of the scalar being read;
> + *                  it must be 1, 2, 4, or 8
> + *     out_reg:  register where the read scalar will be placed;
> + *               BPF_REG_0 <= out_reg <= BPF_REG_5
> + *
> + * Registers %r0-%r5 will be clobbered, with the loaded value
> + * appearing in out_reg.
> + */
> +static void deref_reg3(dt_irlist_t *dlp, uint_t exitlbl,
> +		       int ptr_off, int read_width, int out_reg)

Can we think of a better name for this?  How about deref_ptr and pass in the
register (even if it is always going to be %r3)?

> +{
> +	assert(out_reg >= BPF_REG_0 && out_reg <= BPF_REG_5);
> +
> +	/* Use slot 0 as temporary storage. */
> +	emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_TRAMP_SP_SLOT(0)));
> +
> +	/* Specify the width of the scalar. */
> +	emit(dlp, BPF_MOV_IMM(BPF_REG_2, read_width));
> +
> +	/* The source address is already in %r3, but add offset, if any. */
> +	if (ptr_off)
> +		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, ptr_off));
> +
> +	/* Perform the copy and check for success. */
> +	emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
> +	emit(dlp, BPF_BRANCH_IMM(BPF_JSLT, BPF_REG_0, 0, exitlbl));
> +
> +	/* Load the result into the specified register. */
> +	emit(dlp, BPF_LOAD(BPF_width(read_width), out_reg, BPF_REG_FP, DT_TRAMP_SP_SLOT(0)));
> +}
> +
> +/*
> + * Zero out the entire fake struct bio area.
> + * We assume %r6 already points to the area.
> + */
> +static void io_zero_bio(dtrace_hdl_t *dtp, dt_irlist_t *dlp)
> +{
> +	ctf_file_t *cfp = dtp->dt_shared_ctf;
> +	ctf_id_t type;
> +	size_t sz;
> +
> +	if (!cfp)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOCTF);
> +
> +	type = ctf_lookup_by_name(cfp, "struct bio");
> +	if (type == CTF_ERR)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOCTF);
> +
> +	sz = ctf_type_size(cfp, type);
> +	if (sz > IO_BIO_SIZ)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOCTF);    /* FIXME change EDT_NOCTF */
> +
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_6));
> +	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, sz));
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_3, BPF_REG_9)); /* in trampoline, dctx is in %r9 */
> +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_3, DCTX_STRTAB));
> +	emite(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -1), dt_dlib_get_var(dtp, "ZERO_OFF"));
> +	emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_probe_read));
> +}
> +
> +/*
> + * For NFS events, we have to construct a fake struct bio,
> + * which we have to populate.
> + */
> +static void io_nfs_args(dtrace_hdl_t *dtp, dt_irlist_t *dlp, uint_t exitlbl, const char *ufunc)
> +{
> +	int	off;
> +	size_t	siz;
> +
> +	/*
> +	 * Determine the various sizes and offsets we want.
> +	 *
> +	 *     // Access these fields relative to &bio.
> +	 *     struct bio bio = {
> +	 *         .bi_opf = ...,
> +	 *         .bi_iter.bi_size = ...,      // struct bvec_iter bi_iter
> +	 *         .bi_iter.bi_sector = ...,
> +	 *     };
> +	 *
> +	 *     // Access these fields relative to hdr.
> +	 *     struct nfs_pgio_header *hdr;
> +	 *     ... = hdr->args.count;           // struct nfs_pgio_args args
> +	 *     ... = hdr->res.count;            // struct nfs_pgio_res  res
> +	 */
> +
> +	/* Put pointer to the fake struct bio area in %r6. */
> +	get_cpuinfo(dtp, dlp, exitlbl);
> +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, io_bio_fake)));
> +
> +	/* Zero out the entire fake struct bio area, pointed to by %r6. */
> +	io_zero_bio(dtp, dlp);
> +
> +	/* Fill in bi_opf */
> +	off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_opf", &siz);
> +	if (strstr(ufunc, "read"))
> +		emit(dlp, BPF_STORE_IMM(BPF_width(siz), BPF_REG_6, off, REQ_OP_READ));
> +	else
> +		emit(dlp, BPF_STORE_IMM(BPF_width(siz), BPF_REG_6, off, REQ_OP_WRITE));
> +
> +	/*
> +	 * bio.bi_iter.bi_size = hdr->foo.count;
> +	 *
> +	 * hdr is:
> +	 *   - arg0 for nfs_initiate_[read|write]()
> +	 *   - arg1 for nfs_[readpage|writeback]_done()
> +	 */
> +	if (strncmp(ufunc, "nfs_initiate_", 13) == 0) {
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
> +		off = dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_header", "args", NULL)
> +		    + dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_args", "count", &siz);
> +	} else {
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
> +		off = dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_header", "res", NULL)
> +		    + dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_res", "count", &siz);
> +	}
> +	deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
> +	off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_iter", NULL)
> +	    + dt_cg_ctf_offsetof(dtp, "struct bvec_iter", "bi_size", &siz);
> +	emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
> +
> +	/*
> +	 * bio.bi_iter.bi_sector = hdr->inode;
> +	 */
> +	/* get hdr */
> +	if (strncmp(ufunc, "nfs_initiate_", 13) == 0)
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
> +	else
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
> +
> +	off = dt_cg_ctf_offsetof(dtp, "struct nfs_pgio_header", "inode", &siz);
> +	deref_reg3(dlp, exitlbl, off, siz, BPF_REG_3);
> +
> +	off = dt_cg_ctf_offsetof(dtp, "struct nfs_inode", "fileid", &siz)
> +	    - dt_cg_ctf_offsetof(dtp, "struct nfs_inode", "vfs_inode", NULL);
> +
> +	deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
> +
> +	off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_iter", NULL)
> +	    + dt_cg_ctf_offsetof(dtp, "struct bvec_iter", "bi_sector", &siz);
> +	emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
> +
> +	/* Pass a pointer to the space */
> +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
> +}
> +
> +/*
> + * For XFS events, we have to construct a fake struct bio,
> + * which we have to populate.
> + *
> + * We end up with a pointer to the fake struct in %r6.
> + */
> +static void io_xfs_args(dtrace_hdl_t *dtp, dt_irlist_t *dlp, uint_t exitlbl, const char *ufunc)
> +{
> +	int	off;
> +	size_t	siz;
> +
> +	/*
> +	 * Determine the various sizes and offsets we want.
> +	 *
> +	 *     // Access these fields relative to &bio.
> +	 *     struct bio bio = {
> +	 *         .bi_opf = ...,
> +	 *         .bi_iter.bi_size = ...,      // struct bvec_iter bi_iter
> +	 *         .bi_iter.bi_sector = ...,
> +	 *         .bi_bdev = ...,
> +	 *     };
> +	 *
> +	 *     // Access these fields relative to bp.
> +	 *     struct xfs_buf *bp;
> +	 *     ... = (bp)->b_flags;
> +	 *     ... = xfs_buf_daddr(bp);
> +	 *     ... = (bp)->b_length;
> +	 *     ... = (bp)->b_target->bt_bdev;   // struct xfs_buftarg *b_target;
> +	 */
> +
> +	/* Put pointer to the fake struct bio area in %r6. */
> +	get_cpuinfo(dtp, dlp, exitlbl);
> +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, io_bio_fake)));
> +
> +	/* Zero out the entire fake struct bio area, pointed to by %r6. */
> +	io_zero_bio(dtp, dlp);
> +
> +	/* bio.bi_opf = (bp->b_flags & XBF_WRITE) ? REQ_OP_WRITE : REQ_OP_READ; */
> +	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
> +	off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf", "b_flags", &siz);
> +	deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
> +	emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_0, XBF_WRITE));
> +	{
> +		uint_t Lzero = dt_irlist_label(dlp);
> +		uint_t Ldone = dt_irlist_label(dlp);
> +
> +		off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_opf", &siz);
> +
> +		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lzero));
> +		emit(dlp,  BPF_STORE_IMM(BPF_width(siz), BPF_REG_6, off, REQ_OP_WRITE));
> +		emit(dlp,  BPF_JUMP(Ldone));
> +		emitl(dlp, Lzero,
> +			   BPF_NOP());
> +		emit(dlp,  BPF_STORE_IMM(BPF_width(siz), BPF_REG_6, off, REQ_OP_READ));
> +		emitl(dlp, Ldone,
> +			   BPF_NOP());
> +	}
> +
> +	/*
> +	 * bio.bi_iter.bi_size = bp->b_length;
> +	 */
> +	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
> +	off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf", "b_length", &siz);
> +	deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
> +	off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_iter", NULL)
> +	    + dt_cg_ctf_offsetof(dtp, "struct bvec_iter", "bi_size", &siz);
> +	emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
> +
> +	/*
> +	 * bio.bi_iter.bi_sector = xfs_buf_daddr(bp);
> +	 *
> +	 * In fs/xfs/xfs_buf.h, we have
> +	 *
> +	 *     xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp)
> +	 *     {
> +	 *         return bp->b_maps[0].bm_bn;
> +	 *     }
> +	 *
> +	 * So that gives
> +	 *     bio.bi_iter.bi_sector = bp->b_maps->bm_bn;
> +	 *
> +	 * include/linux/blk_types.h
> +	 *     struct bio {
> +	 *         [...]
> +	 *         struct bvec_iter        bi_iter;
> +	 *         [...]
> +	 *     }
> +	 * include/linux/bvec.h
> +	 *     struct bvec_iter {
> +	 *         sector_t                bi_sector;
> +	 *         [...]
> +	 *     };
> +	 * fs/xfs/xfs_buf.h
> +	 *     struct xfs_buf_map {
> +	 *         xfs_daddr_t             bm_bn;
> +	 *         [...]
> +	 *     };
> +	 *     struct xfs_buf {
> +	 *         [...]
> +	 *         struct xfs_buf_map      *b_maps;
> +	 *         [...]
> +	 *     }
> +	 */
> +	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
> +	off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf", "b_maps", &siz);
> +	deref_reg3(dlp, exitlbl, off, siz, BPF_REG_3);
> +	off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf_map", "bm_bn", &siz);
> +	deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
> +	off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_iter", NULL)
> +	    + dt_cg_ctf_offsetof(dtp, "struct bvec_iter", "bi_sector", &siz);
> +	emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
> +
> +	/*
> +	 * bio.bi_bdev = (bp)->b_target->bt_bdev
> +	 */
> +	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
> +	off = dt_cg_ctf_offsetof(dtp, "struct xfs_buf", "b_target", &siz);
> +	assert(siz == sizeof(void *));
> +	deref_reg3(dlp, exitlbl, off, 8, BPF_REG_3);
> +	off = dt_cg_ctf_offsetof(dtp, "struct xfs_buftarg", "bt_bdev", &siz);
> +	deref_reg3(dlp, exitlbl, off, siz, BPF_REG_0);
> +	off = dt_cg_ctf_offsetof(dtp, "struct bio", "bi_bdev", &siz);
> +	emit(dlp, BPF_STORE(BPF_width(siz), BPF_REG_6, off, BPF_REG_0));
> +}
> +
> +/*
> + * The io provider uses a special, private TLS variable.  Here, we look
> + * up its id, or insert such a variable if it does not already exist.
> + *
> + * This code mimics insertion in either:
> + *   - dt_node_decl()
> + *   - dt_xcook_ident(...)
> + */
> +static uint_t get_id_TLS(dtrace_hdl_t *dtp)
> +{
> +	dt_idhash_t *dhp = dtp->dt_tls;
> +	const char name[] = "private TLS variable for io provider";
> +	dt_ident_t *idp = dt_idhash_lookup(dhp, name);
> +
> +	if (idp) {
> +		/* If it already exists, use its di_id. */
> +		return idp->di_id;
> +	} else {
> +		/* Otherwise, insert it.  Its flags and attributes hardly matter. */
> +		uint_t id = 0;
> +
> +		if (dt_idhash_nextid(dhp, &id) == -1)
> +			xyerror(D_ID_OFLOW, "cannot create %s: limit on number of %s variables exceeded\n", name, dt_idhash_name(dhp));
> +
> +		idp = dt_idhash_insert(dhp, name, 0, DT_IDFLG_TLS, id, _dtrace_defattr, 0, NULL, NULL, 0);
> +		if (idp == NULL)
> +			longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
> +
> +		dt_ident_set_storage(idp, 8, 8);
> +
> +		return id;
> +	}
> +}
> +
> +/*
> + * Generate a BPF trampoline for a SDT probe.
> + *
> + * The trampoline function is called when a SDT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + *	int dt_io(void *data)
> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause.  It returns the value that it gets
> + * back from that function.
> + */
> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> +	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
> +	dt_irlist_t	*dlp = &pcb->pcb_ir;
> +	dt_probe_t	*prp = pcb->pcb_probe;
> +	dt_probe_t	*uprp = pcb->pcb_parent_probe;
> +	size_t		bio_ptr_off = 0;
> +
> +	/* Figure out the offset to the bio pointer we want to use. */
> +
> +	if (strcmp(uprp->desc->fun, "submit_bio_wait") == 0)
> +		bio_ptr_off = offsetof(dt_bpf_cpuinfo_t, io_bio_ptr_wait);
> +	else if (strcmp(uprp->desc->fun, "submit_bio_checks") == 0)
> +		bio_ptr_off = offsetof(dt_bpf_cpuinfo_t, io_bio_ptr_checks);
> +	else if (strcmp(uprp->desc->fun, "bio_endio") == 0)
> +		bio_ptr_off = offsetof(dt_bpf_cpuinfo_t, io_bio_ptr_endio);
> +
> +	/* Handle different probe cases. */
> +
> +	if (strncmp(uprp->desc->fun, "nfs_", 4) == 0) {
> +		io_nfs_args(dtp, dlp, exitlbl, uprp->desc->fun);
> +	} else if (strncmp(uprp->desc->fun, "xfs_", 4) == 0 && strcmp(uprp->desc->prb, "entry") == 0) {
> +		io_xfs_args(dtp, dlp, exitlbl, uprp->desc->fun);
> +
> +		if (strcmp(prp->desc->prb, "wait-start") != 0)
> +			return 1;
> +
> +		/* Pass a pointer to the space */
> +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
> +	} else if (strncmp(uprp->desc->fun, "xfs_", 4) == 0) {
> +		/* wait done */
> +
> +		/* Put pointer to the fake struct bio area in %r6 */
> +		get_cpuinfo(dtp, dlp, exitlbl);
> +		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, io_bio_fake)));
> +
> +		/* Pass a pointer to the space */
> +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
> +	} else if (strcmp(prp->desc->prb, "wait-start") == 0) {
> +#if 0
> +		/* arg0 = arg0 is a no-op, this code is not needed */
> +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(0)));
> +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +#endif
> +	} else if (strcmp(uprp->desc->fun, "bio_endio") == 0) {
> +		/* Determine stack bounds. */
> +		size_t stk_n = offsetof(dt_bpf_cpuinfo_t, io_bio_stk_n);
> +		int stk_min = 0;
> +		int stk_max = stk_n - bio_ptr_off;
> +
> +		get_cpuinfo(dtp, dlp, exitlbl);
> +
> +		/* %r1 is the current stack size. */
> +		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, stk_n));
> +
> +		/* Check stack size. */
> +		if (strcmp(uprp->desc->prb, "entry") == 0)
> +			stk_max -= sizeof(uint64_t);
> +		else
> +			stk_min += sizeof(uint64_t);
> +		emit(dlp, BPF_BRANCH_IMM(BPF_JLT, BPF_REG_1, stk_min, exitlbl));
> +		emit(dlp, BPF_BRANCH_IMM(BPF_JGT, BPF_REG_1, stk_max, exitlbl));
> +
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/* %r0 = bio (first arg) */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(0)));
> +
> +			/* Push %r0 onto the stack. */
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_6));
> +			emit(dlp, BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_2, bio_ptr_off, BPF_REG_0));
> +
> +			/* Update and save the stack size. */
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, sizeof(uint64_t)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, stk_n, BPF_REG_1));
> +
> +			return 1;
> +		} else {
> +			/* Update and save the stack size. */
> +			emit(dlp, BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, sizeof(uint64_t)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, stk_n, BPF_REG_1));
> +
> +			/* Pop the stack into %r0. */
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_6));
> +			emit(dlp, BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1));
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_2, bio_ptr_off));
> +
> +			/* Save %r0 as the first arg. */
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +		}
> +	} else if (strcmp(prp->desc->prb, "wait-done") == 0 && strcmp(uprp->desc->fun, "submit_bio_wait") == 0) {
> +		/*
> +		 * For wait-done, we need to instrument submit_bio_wait(struct bio *).
> +		 * Upon entry, we store the bio pointer into a special TLS location.
> +		 * Upon return, we retrieve the pointer (and store a 0 back to the
> +		 * TLS variable).  We use a TLS variable to distinguish among several
> +		 * submit_bio_wait() calls that may be pending concurrently on a CPU.
> +		 */
> +		uint_t varid = get_id_TLS(dtp) - DIF_VAR_OTHER_UBASE;
> +		dt_ident_t *fnp = dt_dlib_get_func(dtp, "dt_get_tvar");
> +		dt_ident_t *zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
> +
> +		assert(fnp);
> +		assert(zero_off);
> +
> +		/* If this is the return probe, retrieve the bio pointer from TLS. */
> +		if (strcmp(uprp->desc->prb, "return") == 0) {
> +			uint_t Lnull = dt_irlist_label(dlp);
> +
> +			/* Call dt_get_tvar() for our private io-provider TLS variable. */
> +			emit(dlp,  BPF_MOV_IMM(BPF_REG_1, varid));
> +			emit(dlp,  BPF_MOV_IMM(BPF_REG_2, 0));
> +			emit(dlp,  BPF_MOV_IMM(BPF_REG_3, 0));
> +			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_9, DCTX_STRTAB));
> +			emite(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -1), zero_off);
> +			emite(dlp, BPF_CALL_FUNC(fnp->di_id), fnp);
> +
> +			/* If we got a nonzero address, load from it. */
> +			emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lnull));
> +			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_0, 0));
> +			emitl(dlp, Lnull,
> +				   BPF_NOP());
> +
> +			/* Store the retrieved value (bio pointer) as arg0. */
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +		}
> +
> +		/*
> +		 * Store (update) the TLS copy of the bio pointer:
> +		 *   - return probe: store 0 (clear TLS, freeing storage)
> +		 *   - entry probe: store arg0 (for the return probe to use later)
> +		 */
> +		emit(dlp,  BPF_MOV_IMM(BPF_REG_1, varid));
> +		emit(dlp,  BPF_MOV_IMM(BPF_REG_2, 1));
> +		if (strcmp(uprp->desc->prb, "return") == 0)
> +			emit(dlp,  BPF_MOV_IMM(BPF_REG_3, 0));
> +		else
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
> +		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_9, DCTX_STRTAB));
> +		emite(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -1), zero_off);
> +		emite(dlp, BPF_CALL_FUNC(fnp->di_id), fnp);
> +
> +		/*
> +		 * At this point, the entry probe only has the TLS variable
> +		 * address.  It has yet actually to store arg0 there,
> +		 * provided the address is nonzero.
> +		 */
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			uint_t Lnull = dt_irlist_label(dlp);
> +
> +			emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, Lnull));
> +			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +			emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_0, 0, BPF_REG_1));
> +			emitl(dlp, Lnull,
> +				   BPF_NOP());
> +
> +			return 1;
> +		}
> +	} else {

At this point in the compound conditional I habe lost track of what cases were
already covered and therefore I have no clue what probes this appplies to.  I
of course figure it out by looking back but a comment would be nice :)

> +		get_cpuinfo(dtp, dlp, exitlbl);
> +
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/*
> +			 * Store the bio pointer in arg0 into the per-CPU cpuinfo
> +			 * structure referenced by %r6.
> +			 */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, bio_ptr_off, BPF_REG_1));
> +
> +			return 1;
> +		} else {
> +			/*
> +			 * Load the bio pointer from the per-CPU cpuinfo structure
> +			 * referenced by %r6 into arg0.
> +			 */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, bio_ptr_off));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
> +
> +			/*
> +			 * Reset the source location to 0.
> +			 */
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, bio_ptr_off, 0));
> +		}
> +	}
> +
> +	/*
> +	 * Note:DTrace does not currently support the use of fileinfo_t with io probes.

Space after :

> +	 * In Oracle Linux, no information is readily accessible at the level
> +	 * where the io probes fire about the file where an I/O request originated.
> +	 */
> +	/*
> +	 * FIXME: Given the "mapping"s in probe_args[] (above), I would have thought
> +	 * that to set arg2 here, we should write to arg1 (which is mapped to arg2).
> +	 * But apparently the correct thing to do is to write to arg2.  Weird.
> +	 */
> +	emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), 0));

args[2] is a translated argument and its source is arg1, so you need to set
arg1 to 0, not arg2.

> +
> +	return 0;
> +}
> +
> +dt_provimpl_t	dt_io = {
> +	.name		= prvname,
> +	.prog_type	= BPF_PROG_TYPE_UNSPEC,
> +	.populate	= &populate,
> +	.enable		= &dt_sdt_enable,
> +	.trampoline	= &trampoline,
> +	.probe_info	= &dt_sdt_probe_info,
> +};
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index 31ad028d..a50a2cf9 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -70,6 +70,7 @@ typedef struct dt_provimpl {
>  extern dt_provimpl_t dt_dtrace;
>  extern dt_provimpl_t dt_cpc;
>  extern dt_provimpl_t dt_fbt;
> +extern dt_provimpl_t dt_io;
>  extern dt_provimpl_t dt_ip;
>  extern dt_provimpl_t dt_lockstat;
>  extern dt_provimpl_t dt_proc;
> diff --git a/test/demo/io/applicat.d b/test/demo/io/applicat.d
> index 64cdb6af..14170145 100644
> --- a/test/demo/io/applicat.d
> +++ b/test/demo/io/applicat.d
> @@ -4,7 +4,6 @@
>   * Licensed under the Universal Permissive License v 1.0 as shown at
>   * http://oss.oracle.com/licenses/upl.
>   */
> -/* @@xfail: dtv2 */
>  
>  io:::start
>  /execname == "soffice.bin" && args[2]->fi_name == "applicat.rdb"/
> diff --git a/test/demo/io/iocpu.d b/test/demo/io/iocpu.d
> index 7d29637d..c7b847e7 100644
> --- a/test/demo/io/iocpu.d
> +++ b/test/demo/io/iocpu.d
> @@ -4,7 +4,6 @@
>   * Licensed under the Universal Permissive License v 1.0 as shown at
>   * http://oss.oracle.com/licenses/upl.
>   */
> -/* @@xfail: dtv2 */
>  
>  #pragma D option quiet
>  
> diff --git a/test/demo/io/iothrough.d b/test/demo/io/iothrough.d
> index 0290b12e..57fda7f0 100644
> --- a/test/demo/io/iothrough.d
> +++ b/test/demo/io/iothrough.d
> @@ -4,7 +4,6 @@
>   * Licensed under the Universal Permissive License v 1.0 as shown at
>   * http://oss.oracle.com/licenses/upl.
>   */
> -/* @@xfail: dtv2 */
>  
>  #pragma D option quiet
>  
> diff --git a/test/demo/io/whoio.d b/test/demo/io/whoio.d
> index 17f7db54..d5fc444f 100644
> --- a/test/demo/io/whoio.d
> +++ b/test/demo/io/whoio.d
> @@ -4,7 +4,6 @@
>   * Licensed under the Universal Permissive License v 1.0 as shown at
>   * http://oss.oracle.com/licenses/upl.
>   */
> -/* @@xfail: dtv2 */
>  
>  #pragma D option quiet
>  
> diff --git a/test/unittest/io/check_io_probe_args.sh b/test/unittest/io/check_io_probe_args.sh
> new file mode 100755
> index 00000000..d8e6d264
> --- /dev/null
> +++ b/test/unittest/io/check_io_probe_args.sh
> @@ -0,0 +1,273 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +#
> +# @@skip: not used directly by the test hardness; called by other scripts
> +#
> +
> +infile=$1
> +retval=0
> +
> +echo check_io_probe_args $infile
> +
> +#
> +# Start with some basic checks on the io probe args.
> +#
> +
> +gawk '
> +BEGIN {
> +    err = 0;    # set to 1 if we encounter any errors
> +    nrecs = 0;
> +}
> +
> +NF == 0 { next }      # skip empty lines
> +
> +NF != 23 { err = 1; print "garbled input: " $0; next }
> +
> +{
> +    nrecs++;
> +
> +    myprobeprov = $1
> +    myprobemod = $2
> +    myprobefunc = $3
> +    myprobename = $4
> +    myarg2 = $5
> +    myb_flags = $6
> +    myb_bcount = $7
> +    myb_bufsize = $8
> +    myb_addr = $9
> +    myb_resid = $10
> +    myb_error = $11
> +    myb_lblkno = $12
> +    myb_blkno = $13
> +    myb_iodone = $14
> +    myb_edev = $15
> +    myb_major = $16
> +    myb_minor = $17
> +    mydev_major = $18
> +    mydev_minor = $19
> +    mydev_instance = $20
> +    mydev_name = $21
> +    mydev_statname = $22
> +    mydev_pathname = $23
> +
> +    # Check probe description.
> +
> +    if (myprobeprov != "io:") { err = 1; print "provider is not io, got", myprobeprov }
> +    if (myprobemod != "vmlinux:") { err = 1; print "module is not vmlinux, got", myprobemod }
> +    if (myprobefunc != ":") { err = 1; print "function is not blank, got", myprobefunc }
> +    if (myprobename != "wait-start" &&
> +        myprobename != "wait-done" &&
> +        myprobename != "start" &&
> +        myprobename != "done") { err = 1; print "name is unrecognized", myprobename }
> +
> +    # Check that arg2 is 0.
> +    if (myarg2 != 0) { err = 1; print "arg2 should be 0, got", myarg2 }
> +
> +    # Check for a legal set of flags.
> +    {
> +        B_PAGEIO = 0x000010;
> +        B_PHYS   = 0x000020;
> +        B_READ   = 0x000040;
> +        B_WRITE  = 0x000100;
> +        B_ASYNC  = 0x000400;
> +        tmp = strtonum("0x"myb_flags);
> +
> +        # B_ASYNC may be set.
> +        if (and(tmp, B_ASYNC) != 0) tmp -= B_ASYNC;
> +
> +        # B_WRITE or else B_READ must be set.
> +        nflags = 0;
> +        if (and(tmp, B_WRITE) != 0) {
> +            tmp -= B_WRITE;
> +            nflags++;
> +        }
> +        if (and(tmp, B_READ) != 0) {
> +            tmp -= B_READ;
> +            nflags++;
> +        }
> +        if (nflags != 1) {
> +            printf "flags %x must be read or else write\n", myb_flags;
> +            err = 1;
> +        }
> +
> +        # B_PAGEIO or else B_PHYS must be set.
> +        nflags = 0;
> +        if (and(tmp, B_PAGEIO) != 0) {
> +            tmp -= B_PAGEIO;
> +            nflags++;
> +        }
> +        if (and(tmp, B_PHYS) != 0) {
> +            tmp -= B_PHYS;
> +            nflags++;
> +        }
> +        if (nflags != 1) {
> +            printf "flags %x must be pageio or else phys\n", myb_flags;
> +            err = 1;
> +        }
> +
> +        # Check for any other flags.
> +        if (tmp != 0) {
> +            printf "flags %x has some expected flags %x set\n", myb_flags, tmp;
> +            err = 1;
> +        }
> +    }
> +
> +    # FIXME: can we add a check for myb_bcount?
> +
> +    if (myb_bufsize != myb_bcount) { err = 1; print "bcount and bufsize do not match", myb_bcount, myb_bufsize }
> +
> +    if (myb_addr != "0") { err = 1; print "b_addr is not 0:", b_addr }
> +    if (myb_resid != "0") { err = 1; print "b_resid is not 0:", b_resid }
> +    if (myb_error != "0") { err = 1; print "b_error is not 0:", b_error }
> +
> +    # FIXME: can we add a check for myb_lblkno?
> +
> +    if (myb_blkno != myb_lblkno) { err = 1; print "lblkno and blkno do not match", myb_lblkno, myb_blkno }
> +
> +    # FIXME: can we add a check for myb_iodone?
> +    # FIXME: can we add a check for myb_edev?
> +
> +    if ( myb_major != rshift(myb_edev, 20)) { err = 1; print "b_major inconsistent with edev", myb_major, myb_edev }
> +    if ( myb_minor != and(myb_edev, 0xfffff)) { err = 1; print "b_minor inconsistent with edev", myb_minor, myb_edev }
> +
> +    if (mydev_major != myb_major) { err = 1; print "b_major and dev_major do not match", myb_major, mydev_major }
> +    if (mydev_minor != myb_minor) { err = 1; print "b_minor and dev_minor do not match", myb_minor, mydev_minor }
> +
> +    if (mydev_instance != 0) { err = 1; print "dev_instance is not 0", mydev_instance }
> +
> +    # FIXME: can we add a check for mydev_name?
> +    # FIXME: can we add a check for mydev_statname?
> +    # FIXME: can we add a check for mydev_pathname?
> +}
> +END {
> +    if (nrecs == 0) { err = 1; print "no records found" }
> +    exit(err);
> +}
> +' $infile
> +if [ $? -ne 0 ]; then
> +    retval=1
> +    cat $infile
> +    exit $retval
> +fi
> +
> +#
> +# Check that all iodone PCs are 0 or else correspond to end*io functions.
> +#
> +
> +if [ $UID -ne 0 ]; then
> +    echo skipping iodone check since must be root to read PCs in kallmodsyms
> +    retval=1
> +else
> +    for pc in `gawk 'NF == 23 { print $14 }' $infile | grep -wv 0 | sort | uniq`; do
> +        gawk '$1 == "'$pc'" && /end.*io/ { found = 1; exit }
> +            END { exit(found) }' /proc/kallmodsyms
> +        if [ $? -eq 0 ]; then
> +            echo $pc, " is not an end-io function"
> +            grep $pc /proc/kallmodsyms
> +            retval=1
> +        fi
> +    done
> +fi
> +
> +#
> +# For each statname, check that the reported major/minor numbers agree with "ls -l".
> +#
> +
> +while read mymajor myminor mystatname; do
> +    read mymajor0 myminor0 <<< $(ls -l /dev | gawk '$NF == "'$mystatname'" { print $(NF-5), $(NF-4) }' | tr ',' ' ')
> +
> +    if [ "x$mymajor0" == "x" ]; then
> +        mymajor0="0"
> +    fi
> +    if [ "x$myminor0" == "x" ]; then
> +        myminor0="0"
> +    fi
> +
> +    if [ $mymajor != $mymajor0 -o $myminor != $myminor0 ]; then
> +        echo ERROR: for $mystatname expect device major minor $mymajor $myminor but got $mymajor0 $myminor0
> +        retval=1
> +    fi
> +done <<< $(gawk 'NF == 23 { print $16, $17, $22 }' $infile | sort | uniq)
> +
> +#
> +# For each major number, check name.
> +#
> +
> +while read mymajor myname; do
> +    $dtrace $dt_flags -qn '
> +        BEGIN {
> +            trace(stringof(`major_names['$mymajor' % 255]->name));
> +            exit(0);
> +        }
> +        ERROR { trace("nfs"); exit(0); }' -o chkmajnam.txt >& /dev/null
> +    myname0=`cat chkmajnam.txt`
> +    rm -f chkmajnam.txt
> +    echo check major $mymajor is $myname and $myname0
> +    if [ $myname != $myname0 ]; then
> +        echo ERROR: for $mymajor expect name $myname0 but got $myname
> +        retval=1
> +    fi
> +done <<< $(gawk 'NF == 23 { print $16, $21 }' $infile | sort | uniq)
> +
> +#
> +# For name:      Expect pathname:
> +#
> +# == "nfs"       "<nfs>"
> +# != "nfs"       "<unknown>"     # FIXME: "<unknown>"?  Really?
> +#
> +
> +gawk '
> +BEGIN { err = 0 }
> +NF == 23 {
> +    if ($21 == "nfs") expect = "<nfs>";
> +    else              expect = "<unknown>";
> +    if ($23 != expect) {
> +        print "ERROR: for name " $21 " expect " expect " but got " $23;
> +        err = 1;
> +    }
> +}
> +END { exit(err) }
> +' $infile
> +if [ $? -ne 0 ]; then
> +    retval=1
> +fi
> +
> +#
> +# Check that for each name, there is a distinct major number.
> +# This does not guarantee that the mapping is correct, but it
> +# is a partial correctness check and we already checked the
> +# statname mapping to edev numbers against "ls -l /dev".
> +#
> +
> +gawk 'NF == 23 { print $21, $16 }' $infile | sort | uniq > map-name-to-major.txt
> +nmaps=`cat map-name-to-major.txt | wc -l`
> +nnames=`awk '{print $1}' map-name-to-major.txt | sort | uniq | wc -l`
> +nmajor=`awk '{print $2}' map-name-to-major.txt | sort | uniq | wc -l`
> +if [ $nnames -ne $nmaps -o $nmajor -ne $nmaps ]; then
> +    echo "ERROR: name-to-major-number is not a one-to-one mapping"
> +    cat map-name-to-major.txt
> +    retval=1
> +fi
> +
> +#
> +# If the name is "nfs", the edev should be 0.  FIXME: is this correct?
> +#
> +
> +gawk '
> +BEGIN { err = 0 }
> +$21 == "nfs" && $15 != 0 { print "ERROR: name is nfs but edev is nonzero"; err = 1 }
> +END { exit(err) }' $infile
> +if [ $? -ne 0 ]; then
> +    retval=1
> +fi
> +
> +#
> +# Exit.
> +#
> +
> +exit $retval
> diff --git a/test/unittest/io/dump_io_probe_args.d b/test/unittest/io/dump_io_probe_args.d
> new file mode 100644
> index 00000000..afc1f1f0
> --- /dev/null
> +++ b/test/unittest/io/dump_io_probe_args.d
> @@ -0,0 +1,47 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +/* @@skip: not used directly by the test hardness; called by other scripts */
> +
> +/*
> + * For all io::: probes, dump "all" probe arguments (and their interesting members).
> + * It would be nice just to say "io:::", but our use of args[] forces us to
> + * enumerate the probes.
> + */
> +io:::wait-start,
> +io:::wait-done,
> +io:::start,
> +io:::done
> +{
> +	printf("%s: %s: %s: %11s %d %3x  %9d %9d   %p %d %d  %5d %5d    %p   %d %d %d   %d %d %d %s %s %s\n",
> +	    probeprov, probemod, probefunc, probename,
> +	    arg2,

This is wrong because arg0..arg9 are the *non-translated* arguments and you
actually expect this to be 0 because the fileinfo_t * (args[2]) is supposed to
be NULL.  But you cannot just use args[2] because it is a dynamic type (as a
translated argument).  So, you need to print the source argument here that is
used to populate args[1] and that would be arg1.  Alternatively, you can use
args[2]->fi_name here and test that it is "<none>".

> +
> +	    args[0]->b_flags,
> +
> +	    args[0]->b_bcount,
> +	    args[0]->b_bufsize,
> +
> +	    args[0]->b_addr,
> +	    args[0]->b_resid,
> +	    args[0]->b_error,
> +
> +	    args[0]->b_lblkno,
> +	    args[0]->b_blkno,
> +
> +	    args[0]->b_iodone,
> +
> +	    args[0]->b_edev,
> +	    getmajor(args[0]->b_edev),
> +	    getminor(args[0]->b_edev),
> +
> +	    args[1]->dev_major,
> +	    args[1]->dev_minor,
> +	    args[1]->dev_instance,
> +	    args[1]->dev_name,
> +	    args[1]->dev_statname,
> +	    args[1]->dev_pathname);
> +}
> diff --git a/test/unittest/io/tst.fbt_probes.r b/test/unittest/io/tst.fbt_probes.r
> new file mode 100644
> index 00000000..8ac323fe
> --- /dev/null
> +++ b/test/unittest/io/tst.fbt_probes.r
> @@ -0,0 +1,8 @@
> +bio_endio
> +submit_bio_wait
> +submit_bio_checks
> +xfs_buf_iowait
> +nfs_readpage_done
> +nfs_writeback_done
> +nfs_initiate_read
> +nfs_initiate_write
> diff --git a/test/unittest/io/tst.fbt_probes.sh b/test/unittest/io/tst.fbt_probes.sh
> new file mode 100755
> index 00000000..eef2eec7
> --- /dev/null
> +++ b/test/unittest/io/tst.fbt_probes.sh
> @@ -0,0 +1,20 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +# @@nosort
> +
> +#
> +# Check that the fbt probes on which the io provider relies are present.
> +# The list of probes comes originally from the definition of probes[] in
> +# dt_prov_io.c.  Here, we read the list from our own .r file.
> +#
> +
> +for myfunc in `cat ${0/.sh/.r}`; do
> +    awk '$1 == "'$myfunc'" { print $1 }' /sys/kernel/debug/tracing/available_filter_functions 

Trailing space.

> +done
> +
> +exit 0
> diff --git a/test/unittest/io/tst.fbt_probes.x b/test/unittest/io/tst.fbt_probes.x
> new file mode 100755
> index 00000000..eab252a0
> --- /dev/null
> +++ b/test/unittest/io/tst.fbt_probes.x
> @@ -0,0 +1,18 @@
> +#!/bin/bash
> +
> +read MAJOR MINOR <<< `uname -r | grep -Eo '^[0-9]+\.[0-9]+' | tr '.' ' '`
> +
> +if [ $MAJOR -gt 5 ]; then
> +        exit 0
> +fi      
> +if [ $MAJOR -eq 5 -a $MINOR -ge 15 ]; then
> +        exit 0
> +fi
> +
> +# The io:::start probe depends in part on fbt::submit_bio_checks:entry,
> +# but neither submit_bio_checks nor its caller __submit_bio appears in
> +# /sys/kernel/debug/tracing/available_filter_functions.
> +# For now, io:::start is not fully supported on UEKr6.
> +
> +echo "io:::start not fully supported before 5.15"
> +exit 1
> diff --git a/test/unittest/io/tst.local.sh b/test/unittest/io/tst.local.sh
> index b6061fd6..7449a915 100755
> --- a/test/unittest/io/tst.local.sh
> +++ b/test/unittest/io/tst.local.sh
> @@ -10,7 +10,6 @@
>  # Test the io:::start probe for write and read operations by creating
>  # a file and reading it back after clearing the caches.
>  #
> -# @@xfail: dtv2
>  
>  dtrace=$1
>  nblocks=1024
> diff --git a/test/unittest/io/tst.local.x b/test/unittest/io/tst.local.x
> new file mode 120000
> index 00000000..7504b502
> --- /dev/null
> +++ b/test/unittest/io/tst.local.x
> @@ -0,0 +1 @@
> +tst.fbt_probes.x
> \ No newline at end of file
> diff --git a/test/unittest/io/tst.local2.sh b/test/unittest/io/tst.local2.sh
> new file mode 100755
> index 00000000..fa2a4bb0
> --- /dev/null
> +++ b/test/unittest/io/tst.local2.sh
> @@ -0,0 +1,100 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +#
> +# Test the io:::start probe for write and read operations by creating
> +# a file and reading it back after clearing the caches.
> +#
> +
> +rundt="$1 $dt_flags -qs $PWD/test/unittest/io/dump_io_probe_args.d -c"
> +check_args=$PWD/test/unittest/io/check_io_probe_args.sh
> +retval=0
> +
> +DIRNAME="$tmpdir/io-local2.$$.$RANDOM"
> +mkdir -p $DIRNAME
> +cd $DIRNAME
> +
> +filesize=$((512*1024))
> +
> +fsoptions="loop,defaults,atime,diratime,nosuid,nodev"
> +iodir=`mktemp -u`
> +tempfile=`mktemp -u -p $iodir`
> +
> +trap "rm -f $tempfile; umount $iodir; rmdir $iodir; rm -f $iodir.img" QUIT
> +
> +dd if=/dev/zero of=$iodir.img bs=1024 count=$((16*1024)) status=none
> +mkfs.xfs $iodir.img > /dev/null
> +    mkdir $iodir
> +        mount -t xfs -o $fsoptions $iodir.img $iodir
> +            devnam=`losetup -j $iodir.img | awk 'BEGIN { FS = ":" } ; {print $1}'`
> +            statname=`basename $devnam`
> +
> +            dd if=/dev/urandom of=$tempfile count=$filesize bs=1 status=none
> +        $rundt "umount $iodir"                                             -o log.write
> +        mount -t xfs -o $fsoptions $iodir.img $iodir
> +            $rundt "sum $tempfile"                                         -o log.read
> +            rm -f $tempfile
> +        umount $iodir
> +    rmdir $iodir
> +rm -f $iodir.img
> +
> +# check the DTrace output
> +
> +$check_args log.write
> +if [ $? -ne 0 ]; then
> +    retval=1
> +fi
> +$check_args log.read
> +if [ $? -ne 0 ]; then
> +    retval=1
> +fi
> +
> +cat > awk.txt << EOF
> +# initialize
> +BEGIN { err = 0; bytes = 0; nrec = 0 }
> +
> +# skip over uninteresting records
> +NF == 0 { next }
> +\$14 != myiodone { next }
> +\$22 != "$statname" { next }
> +
> +# check
> +\$4 != "start" &&
> +\$4 != "done" { print "probe name should be start or done"; err = 1 }
> +\$6 != myflags { print "flags are wrong"; err = 1 }
> +\$4 == "start" { bytes += \$7; nrec++ }
> +\$21 != "loop" { print "name is wrong"; err = 1 }
> +END {
> +      if (bytes != $filesize) {
> +          print "total bytes should match filesize", bytes, $filesize;
> +          err = 1;
> +      }
> +      if (nrecflag == 1 && nrec != 1) {
> +          print "expected one record";
> +          err = 1;
> +      }
> +      exit(err);
> +}
> +EOF
> +
> +myaddr=`awk '$4 == "xfs_end_bio"       {print $1}' /proc/kallmodsyms`
> +awk -v myflags=520 -v nrecflag=1 -v myiodone=$myaddr -f awk.txt log.write
> +if [ $? -ne 0 ]; then
> +    echo post-processing error log.write
> +    cat log.write
> +    retval=1
> +fi
> +
> +myaddr=`awk '$4 == "iomap_read_end_io" {print $1}' /proc/kallmodsyms`
> +awk -v myflags=460 -v nrecflag=2 -v myiodone=$myaddr -f awk.txt log.read
> +if [ $? -ne 0 ]; then
> +    echo post-processing error log.read
> +    cat log.read
> +    retval=1
> +fi
> +
> +exit $retval
> diff --git a/test/unittest/io/tst.local2.x b/test/unittest/io/tst.local2.x
> new file mode 120000
> index 00000000..7504b502
> --- /dev/null
> +++ b/test/unittest/io/tst.local2.x
> @@ -0,0 +1 @@
> +tst.fbt_probes.x
> \ No newline at end of file
> diff --git a/test/unittest/io/tst.lv-done.r b/test/unittest/io/tst.lv-done.r
> new file mode 100644
> index 00000000..35f539da
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-done.r
> @@ -0,0 +1,17 @@
> +PROBE io vmlinux done
> +
> +	Probe Description Attributes
> +		Identifier Names: Private
> +		Data Semantics:   Private
> +		Dependency Class: Unknown
> +
> +	Argument Attributes
> +		Identifier Names: Evolving
> +		Data Semantics:   Evolving
> +		Dependency Class: ISA
> +
> +	Argument Types
> +		args[0]: bufinfo_t *
> +		args[1]: devinfo_t *
> +		args[2]: fileinfo_t *
> +
> diff --git a/test/unittest/io/tst.lv-done.r.p b/test/unittest/io/tst.lv-done.r.p
> new file mode 100755
> index 00000000..c538e345
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-done.r.p
> @@ -0,0 +1,5 @@
> +#!/usr/bin/awk -f
> +NR == 1 { next; }
> +NR == 2 { print "PROBE", $2, $3, $NF; next; }
> +/^ *[0-9]+/ { exit; }
> +{ print; }
> diff --git a/test/unittest/io/tst.lv-done.sh b/test/unittest/io/tst.lv-done.sh
> new file mode 100755
> index 00000000..33948324
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-done.sh
> @@ -0,0 +1,11 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +dtrace=$1
> +
> +$dtrace $dt_flags -lvn io:::done
> +exit $?
> diff --git a/test/unittest/io/tst.lv-start.r b/test/unittest/io/tst.lv-start.r
> new file mode 100644
> index 00000000..d2ee9666
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-start.r
> @@ -0,0 +1,17 @@
> +PROBE io vmlinux start
> +
> +	Probe Description Attributes
> +		Identifier Names: Private
> +		Data Semantics:   Private
> +		Dependency Class: Unknown
> +
> +	Argument Attributes
> +		Identifier Names: Evolving
> +		Data Semantics:   Evolving
> +		Dependency Class: ISA
> +
> +	Argument Types
> +		args[0]: bufinfo_t *
> +		args[1]: devinfo_t *
> +		args[2]: fileinfo_t *
> +
> diff --git a/test/unittest/io/tst.lv-start.r.p b/test/unittest/io/tst.lv-start.r.p
> new file mode 120000
> index 00000000..4a56a9d3
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-start.r.p
> @@ -0,0 +1 @@
> +tst.lv-done.r.p
> \ No newline at end of file
> diff --git a/test/unittest/io/tst.lv-start.sh b/test/unittest/io/tst.lv-start.sh
> new file mode 100755
> index 00000000..4b8f1248
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-start.sh
> @@ -0,0 +1,11 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +dtrace=$1
> +
> +$dtrace $dt_flags -lvn io:::start
> +exit $?
> diff --git a/test/unittest/io/tst.lv-wait-done.r b/test/unittest/io/tst.lv-wait-done.r
> new file mode 100644
> index 00000000..77f05e9f
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-wait-done.r
> @@ -0,0 +1,17 @@
> +PROBE io vmlinux wait-done
> +
> +	Probe Description Attributes
> +		Identifier Names: Private
> +		Data Semantics:   Private
> +		Dependency Class: Unknown
> +
> +	Argument Attributes
> +		Identifier Names: Evolving
> +		Data Semantics:   Evolving
> +		Dependency Class: ISA
> +
> +	Argument Types
> +		args[0]: bufinfo_t *
> +		args[1]: devinfo_t *
> +		args[2]: fileinfo_t *
> +
> diff --git a/test/unittest/io/tst.lv-wait-done.r.p b/test/unittest/io/tst.lv-wait-done.r.p
> new file mode 120000
> index 00000000..4a56a9d3
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-wait-done.r.p
> @@ -0,0 +1 @@
> +tst.lv-done.r.p
> \ No newline at end of file
> diff --git a/test/unittest/io/tst.lv-wait-done.sh b/test/unittest/io/tst.lv-wait-done.sh
> new file mode 100755
> index 00000000..2187fa1f
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-wait-done.sh
> @@ -0,0 +1,11 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +dtrace=$1
> +
> +$dtrace $dt_flags -lvn io:::wait-done
> +exit $?
> diff --git a/test/unittest/io/tst.lv-wait-start.r b/test/unittest/io/tst.lv-wait-start.r
> new file mode 100644
> index 00000000..56f1b607
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-wait-start.r
> @@ -0,0 +1,17 @@
> +PROBE io vmlinux wait-start
> +
> +	Probe Description Attributes
> +		Identifier Names: Private
> +		Data Semantics:   Private
> +		Dependency Class: Unknown
> +
> +	Argument Attributes
> +		Identifier Names: Evolving
> +		Data Semantics:   Evolving
> +		Dependency Class: ISA
> +
> +	Argument Types
> +		args[0]: bufinfo_t *
> +		args[1]: devinfo_t *
> +		args[2]: fileinfo_t *
> +
> diff --git a/test/unittest/io/tst.lv-wait-start.r.p b/test/unittest/io/tst.lv-wait-start.r.p
> new file mode 120000
> index 00000000..4a56a9d3
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-wait-start.r.p
> @@ -0,0 +1 @@
> +tst.lv-done.r.p
> \ No newline at end of file
> diff --git a/test/unittest/io/tst.lv-wait-start.sh b/test/unittest/io/tst.lv-wait-start.sh
> new file mode 100755
> index 00000000..b6b8e84b
> --- /dev/null
> +++ b/test/unittest/io/tst.lv-wait-start.sh
> @@ -0,0 +1,11 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +dtrace=$1
> +
> +$dtrace $dt_flags -lvn io:::wait-start
> +exit $?
> diff --git a/test/unittest/io/tst.nfs.sh b/test/unittest/io/tst.nfs.sh
> index f9222ff6..4e368244 100755
> --- a/test/unittest/io/tst.nfs.sh
> +++ b/test/unittest/io/tst.nfs.sh
> @@ -9,7 +9,6 @@
>  # Test the io:::start probe for write and read operations by creating
>  # a file and reading it back after clearing the caches.
>  #
> -# @@xfail: dtv2
>  
>  dtrace=$1
>  filesize=$((1024*1024))
> @@ -23,7 +22,8 @@ statname="nfs"
>  trap "rm -f $tempfile; umount $clientpath; rmdir $clientpath; exportfs -u 127.0.0.1:$serverpath; rmdir $serverpath" QUIT EXIT
>  
>  # setup NFS server
> -service nfs start > /dev/null 2>&1
> +#service nfs start > /dev/null 2>&1
> +systemctl enable --now nfs-server > /dev/null 2>&1
>  mkdir $serverpath
>  exportfs -i -v -o "rw,sync,no_root_squash,insecure,fsid=8434437287" 127.0.0.1:$serverpath > /dev/null
>  
> diff --git a/test/unittest/io/tst.nfs2.sh b/test/unittest/io/tst.nfs2.sh
> new file mode 100755
> index 00000000..273ecef4
> --- /dev/null
> +++ b/test/unittest/io/tst.nfs2.sh
> @@ -0,0 +1,102 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +#
> +# Test the io:::start probe for write and read operations by creating
> +# a file and reading it back after clearing the caches.
> +#
> +
> +rundt="$1 $dt_flags -qs $PWD/test/unittest/io/dump_io_probe_args.d -c"
> +check_args=$PWD/test/unittest/io/check_io_probe_args.sh
> +retval=0
> +
> +DIRNAME="$tmpdir/io-nfs2.$$.$RANDOM"
> +mkdir -p $DIRNAME
> +cd $DIRNAME
> +
> +filesize=$((1024*1024))
> +
> +exdir=`mktemp -u`
> +iodir=`mktemp -u`
> +tempfile=`mktemp -u -p $iodir`
> +
> +trap "rm -f $tempfile; umount $iodir; rmdir $iodir; exportfs -u 127.0.0.1:$exdir; rmdir $exdir" QUIT
> +
> +systemctl enable --now nfs-server > /dev/null 2>&1
> +
> +mkdir $exdir
> +  # what is the fsid?
> +  exportfs -i -v -o "rw,sync,no_root_squash,insecure,fsid=8434437287" 127.0.0.1:$exdir > /dev/null
> +    mkdir $iodir
> +        mount -t nfs -o nfsvers=3 127.0.0.1:$exdir $iodir
> +            $rundt "dd if=/dev/urandom of=$tempfile count=$filesize bs=1 status=none" -o log.write
> +            myinode=`stat $tempfile  | awk '/	Inode: / {print $4}'`
> +        umount $iodir
> +        # remount so that data is not cached
> +        mount -t nfs -o nfsvers=3 127.0.0.1:$exdir $iodir
> +            $rundt "sum $tempfile"                                                    -o log.read
> +            rm -f $tempfile
> +        umount $iodir
> +    rmdir $iodir
> +  exportfs -u 127.0.0.1:$exdir
> +rmdir $exdir
> +
> +# check the DTrace output
> +
> +$check_args log.write
> +if [ $? -ne 0 ]; then
> +    retval=1
> +fi
> +$check_args log.read
> +if [ $? -ne 0 ]; then
> +    retval=1
> +fi
> +
> +cat > awk.txt << EOF
> +# initialize
> +BEGIN { err = 0; bytes = 0; nrec = 0 }
> +
> +# skip over uninteresting records
> +NF == 0 { next }
> +\$6 != myflags { next }
> +\$22 != "nfs" { next }
> +
> +# check
> +\$4 != "start" &&
> +\$4 != "done" { print "probe name should be start or done"; err = 1 }
> +\$4 == "start" { bytes += \$7; nrec++ }
> +\$12 != "$myinode" { print "blknode should be inode"; err = 1 }
> +\$14 != 0 { print "iodone should be 0"; err = 1 }
> +\$21 != "nfs" { print "name should be nfs"; err = 1 }
> +END {
> +      if (bytes != $filesize) {
> +          print "total bytes should match filesize", bytes, $filesize;
> +          err = 1;
> +      }
> +      if (nrecflag == 1 && nrec != 1) {
> +          print "expected one record";
> +          err = 1;
> +      }
> +      exit(err);
> +}
> +EOF
> +
> +awk -v myflags=520 -v nrecflag=1 -f awk.txt log.write
> +if [ $? -ne 0 ]; then
> +    echo post-processing error log.write
> +    cat log.write
> +    retval=1
> +fi
> +
> +awk -v myflags=460 -v nrecflag=2 -f awk.txt log.read
> +if [ $? -ne 0 ]; then
> +    echo post-processing error log.read
> +    cat log.read
> +    retval=1
> +fi
> +
> +exit $retval
> diff --git a/test/unittest/io/tst.nfs2.x b/test/unittest/io/tst.nfs2.x
> new file mode 120000
> index 00000000..7504b502
> --- /dev/null
> +++ b/test/unittest/io/tst.nfs2.x
> @@ -0,0 +1 @@
> +tst.fbt_probes.x
> \ No newline at end of file
> diff --git a/test/unittest/io/tst.wait.sh b/test/unittest/io/tst.wait.sh
> index 7ef0abae..d41f9ce7 100755
> --- a/test/unittest/io/tst.wait.sh
> +++ b/test/unittest/io/tst.wait.sh
> @@ -8,7 +8,6 @@
>  #
>  # Test the io:::wait-start and io:::wait-done probes.
>  #
> -# @@xfail: dtv2
>  
>  dtrace=$1
>  nblocks=1024
> diff --git a/test/unittest/io/tst.wait.x b/test/unittest/io/tst.wait.x
> new file mode 120000
> index 00000000..7504b502
> --- /dev/null
> +++ b/test/unittest/io/tst.wait.x
> @@ -0,0 +1 @@
> +tst.fbt_probes.x
> \ No newline at end of file
> -- 
> 2.18.4
> 
> 



More information about the DTrace-devel mailing list