[DTrace-devel] [PATCH] io: adjust io provider for NFS tracepoint variants
Elena Zannoni
elena.zannoni at oracle.com
Wed Jan 8 01:47:53 UTC 2025
Reviewed-by: Elena Zannoni <elena.zannoni at oracle.com>
elena
On 1/7/25 15:44, Kris Van Hees wrote:
> Kernels prior to 5.6.0 pass 3 arguments (derived from the NFS hdr)
> to the nfs_initiate_read raw tracepoint, whereas kernels as of 5.6.0
> pass just the NFS hdr.
>
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> libdtrace/dt_prov_io.c | 127 +++++++++++++++++++++++++++++++++++------
> 1 file changed, 108 insertions(+), 19 deletions(-)
>
> diff --git a/libdtrace/dt_prov_io.c b/libdtrace/dt_prov_io.c
> index 385dc792..45cb875a 100644
> --- a/libdtrace/dt_prov_io.c
> +++ b/libdtrace/dt_prov_io.c
> @@ -63,17 +63,9 @@ static probe_dep_t probes[] = {
> { "start",
> DTRACE_PROBESPEC_NAME, "rawtp:block::block_bio_queue" },
> { "start",
> - DTRACE_PROBESPEC_NAME, "rawtp:nfs::nfs_initiate_read",
> - DT_VERSION_NUMBER(5, 6, 0), },
> + DTRACE_PROBESPEC_NAME, "rawtp:nfs::nfs_initiate_read" },
> { "start",
> - DTRACE_PROBESPEC_NAME, "fbt:nfs:nfs_initiate_read:entry",
> - 0, DT_VERSION_NUMBER(5, 5, 19) },
> - { "start",
> - DTRACE_PROBESPEC_NAME, "rawtp:nfs::nfs_initiate_write",
> - DT_VERSION_NUMBER(5, 6, 0), },
> - { "start",
> - DTRACE_PROBESPEC_NAME, "fbt:nfs:nfs_initiate_write:entry",
> - 0, DT_VERSION_NUMBER(5, 5, 19) },
> + DTRACE_PROBESPEC_NAME, "rawtp:nfs::nfs_initiate_write" },
> { NULL, }
> };
>
> @@ -155,12 +147,109 @@ static void deref_r3(dt_irlist_t *dlp, uint_t exitlbl, int addend, int width,
> emit(dlp, BPF_LOAD(width, reg, BPF_REG_FP, DT_TRAMP_SP_SLOT(0)));
> }
>
> +/*
> + * For NFS events, we have to construct a fake struct bio, which we have to
> + * populate from the inode (arg0) and hdr->good_bytes (arg2) arguments the
> + * underlying probe provides.
> + */
> +static void io_nfs_args_v1(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl,
> + const char *prb, const char *uprb)
> +{
> + int off;
> + size_t siz;
> +
> + /*
> + * Determine the various sizes and offsets we want.
> + *
> + * // Access these fields relative to &bio.
> + * struct bio bio = {
> + * .bi_opf = ...,
> + * .bi_iter.bi_size = ..., // struct bvec_iter bi_iter
> + * .bi_iter.bi_sector = ...,
> + * .bi_bdev = 0, // -or- .bi_disk = 0
> + * };
> + *
> + * // Access these fields relative to hdr.
> + * struct nfs_pgio_header *hdr;
> + * ... = hdr->res.count; // struct nfs_pgio_res res
> + */
> +
> + /*
> + * Declare the -io-bio variable and store its address in %r6.
> + */
> + dt_cg_tramp_decl_var(pcb, &v_bio);
> + dt_cg_tramp_get_var(pcb, "this->-io-bio", 1, BPF_REG_6);
> +
> + /* Fill in bi_opf */
> + off = dt_cg_ctf_offsetof("struct bio", "bi_opf", &siz, 0);
> + siz = bpf_ldst_size(siz, 1);
> + if (strstr(uprb, "read"))
> + emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, REQ_OP_READ));
> + else
> + emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, REQ_OP_WRITE));
> +
> + /*
> + * bio.bi_iter.bi_size = hdr->foo.count;
> + *
> + * For the 'start' probe, count is arg2
> + * For the 'done' probe, count is hdr->res.count (hdr in arg1)
> + */
> + if (strcmp(prb, "start") == 0) {
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(2)));
> + } else {
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
> + off = dt_cg_ctf_offsetof("struct nfs_pgio_header", "res", NULL, 0)
> + + dt_cg_ctf_offsetof("struct nfs_pgio_res", "count", &siz, 0);
> + deref_r3(dlp, exitlbl, off, siz, BPF_REG_0);
> + }
> +
> + off = dt_cg_ctf_offsetof("struct bio", "bi_iter", NULL, 0)
> + + dt_cg_ctf_offsetof("struct bvec_iter", "bi_size", &siz, 0);
> + siz = bpf_ldst_size(siz, 1);
> + emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
> +
> + /*
> + * bio.bi_iter.bi_sector = inode;
> + */
> + if (strcmp(prb, "start") == 0) {
> + /* inode is arg0 */
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(0)));
> + } else {
> + /* use hdr->inode, hdr is arg1 */
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_7, DMST_ARG(1)));
> +
> + off = dt_cg_ctf_offsetof("struct nfs_pgio_header", "inode", &siz, 0);
> + deref_r3(dlp, exitlbl, off, siz, BPF_REG_3);
> + }
> +
> + off = dt_cg_ctf_offsetof("struct nfs_inode", "fileid", &siz, 0)
> + - dt_cg_ctf_offsetof("struct nfs_inode", "vfs_inode", NULL, 0);
> + deref_r3(dlp, exitlbl, off, siz, BPF_REG_0);
> +
> + off = dt_cg_ctf_offsetof("struct bio", "bi_iter", NULL, 0)
> + + dt_cg_ctf_offsetof("struct bvec_iter", "bi_sector", &siz, 0);
> + siz = bpf_ldst_size(siz, 1);
> + emit(dlp, BPF_STORE(siz, BPF_REG_6, off, BPF_REG_0));
> +
> + /*
> + * bio.bi_bdev = 0;
> + */
> + off = dt_cg_ctf_offsetof("struct bio", "bi_bdev", &siz, 1);
> + if (off == -1)
> + off = dt_cg_ctf_offsetof("struct bio", "bi_disk", &siz, 0);
> + siz = bpf_ldst_size(siz, 1);
> + emit(dlp, BPF_STORE_IMM(siz, BPF_REG_6, off, 0));
> +
> + /* Store a pointer to the fake bio in arg0. */
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
> +}
> +
> /*
> * For NFS events, we have to construct a fake struct bio, which we have to
> * populate from the nfs_pgio_header argument the underlying probe provides.
> */
> -static void io_nfs_args(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl,
> - const char *prb, const char *uprb)
> +static void io_nfs_args_v2(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl,
> + const char *prb, const char *uprb)
> {
> int off;
> size_t siz;
> @@ -411,6 +500,7 @@ static void io_xfs_args(dt_pcb_t *pcb, dt_irlist_t *dlp, uint_t exitlbl)
> */
> static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> {
> + dtrace_hdl_t *dtp = pcb->pcb_hdl;
> dt_irlist_t *dlp = &pcb->pcb_ir;
> dt_probe_t *prp = pcb->pcb_probe;
> dt_probe_t *uprp = pcb->pcb_parent_probe;
> @@ -420,13 +510,12 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> * we need to synthesize one.
> */
> if (strcmp(uprp->desc->mod, "nfs") == 0) {
> - /*
> - * If the underlying probe is an FBT probe, we pass function
> - * name. Otherwise, pass probe name.
> - */
> - io_nfs_args(pcb, dlp, exitlbl, prp->desc->prb,
> - strcmp(uprp->desc->prb, "entry") == 0
> - ? uprp->desc->fun : uprp->desc->prb);
> + if (dtp->dt_kernver < DT_VERSION_NUMBER(5, 6, 0))
> + io_nfs_args_v1(pcb, dlp, exitlbl, prp->desc->prb,
> + uprp->desc->prb);
> + else
> + io_nfs_args_v2(pcb, dlp, exitlbl, prp->desc->prb,
> + uprp->desc->prb);
> goto done;
> } else if (strcmp(uprp->desc->mod, "xfs") == 0) {
> io_xfs_args(pcb, dlp, exitlbl);
More information about the DTrace-devel
mailing list