[DTrace-devel] [PATCH v2] Implement the ip provider
Eugene Loh
eugene.loh at oracle.com
Mon Sep 18 18:08:34 UTC 2023
What is the status of this patch? I reviewed a v1 on 8/16 and I cannot
tell if those comments were incorporated. I understand that perhaps
some of the feedback might have been rejected, but I cannot tell if some
of the feedback was simply overlooked.
Specifically, what about drop-in and drop-out probes?
And again:
There seems to be no correspondence
to the legacy DTRACE_IP() instrumentation points.
It would be nice to have a note about implementation.
How were the instrumentation points (fbt probes) chosen?
Is this related to __IP[6]_UPD_PO_STATS(..., IPSTATS_MIB_IN, ...)
and IP[6]_UPD_PO_STATS(..., IPSTATS_MIB_OUT, ...)?
And:
If the ip_addr_t problem is being fixed,
maybe reference Orabug 34855291?
In get_member(), I still think it makes no sense to set %r0 = reg right
before %r0 gets clobbered. And, should the return code of probe_read be
checked?
I still wonder about testing. On what platforms has this patch been
tested? Should there be "ls -v" tests as there are for (some) other
providers? Should some @@xfails be removed? I'm also curious about
test.x (check_provider): if there are no ip probes, does that mean
something is broken... or that we shouldn't test it?
Incidentally, in get_member(), is there really a use for
"assert(size...)" given that this is already checked in dt_cg_ldsize()?
On 9/15/23 23:04, Kris Van Hees via DTrace-devel wrote:
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> libdtrace/Build | 4 +-
> libdtrace/dt_open.c | 1 +
> libdtrace/dt_prov_ip.c | 189 ++++++++++++++++++++++++++++
> libdtrace/dt_provider.h | 1 +
> libdtrace/ip.d | 6 +-
> test/demo/ip/ipproto.d | 3 +-
> test/unittest/funcs/tst.inet_ntoa.d | 4 +-
> 7 files changed, 200 insertions(+), 8 deletions(-)
> create mode 100644 libdtrace/dt_prov_ip.c
>
> diff --git a/libdtrace/Build b/libdtrace/Build
> index d1b00933..7dc2d5d6 100644
> --- a/libdtrace/Build
> +++ b/libdtrace/Build
> @@ -49,6 +49,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
> dt_prov_cpc.c \
> dt_prov_dtrace.c \
> dt_prov_fbt.c \
> + dt_prov_ip.c \
> dt_prov_lockstat.c \
> dt_prov_proc.c \
> dt_prov_profile.c \
> @@ -96,8 +97,9 @@ dt_proc.c_CFLAGS := -Wno-pedantic
> dt_prov_cpc.c_CFLAGS := -Wno-pedantic
> dt_prov_dtrace.c_CFLAGS := -Wno-pedantic
> dt_prov_fbt.c_CFLAGS := -Wno-pedantic
> -dt_prov_proc.c_CFLAGS := -Wno-pedantic
> +dt_prov_ip.c_CFLAGS := -Wno-pedantic
> dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
> +dt_prov_proc.c_CFLAGS := -Wno-pedantic
> dt_prov_profile.c_CFLAGS := -Wno-pedantic
> dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
> dt_prov_sched.c_CFLAGS := -Wno-pedantic
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index 1eca6079..2db8ec38 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -68,6 +68,7 @@ static const dt_provimpl_t *dt_providers[] = {
> &dt_dtrace, /* list dt_dtrace first */
> &dt_cpc,
> &dt_fbt,
> + &dt_ip,
> &dt_lockstat,
> &dt_proc,
> &dt_profile,
> diff --git a/libdtrace/dt_prov_ip.c b/libdtrace/dt_prov_ip.c
> new file mode 100644
> index 00000000..8377e066
> --- /dev/null
> +++ b/libdtrace/dt_prov_ip.c
> @@ -0,0 +1,189 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The 'ip' SDT provider for DTrace-specific probes.
> + */
> +#include <assert.h>
> +#include <errno.h>
> +
> +#include "dt_dctx.h"
> +#include "dt_cg.h"
> +#include "dt_provider_sdt.h"
> +#include "dt_probe.h"
> +
> +static const char prvname[] = "ip";
> +static const char modname[] = "vmlinux";
> +
> +static probe_dep_t probes[] = {
> + { "receive",
> + DTRACE_PROBESPEC_NAME, "fbt::ip_local_deliver:entry" },
> + { "receive",
> + DTRACE_PROBESPEC_NAME, "fbt::ip6_input:entry" },
> + { "send",
> + DTRACE_PROBESPEC_NAME, "fbt::ip_finish_output:entry" },
> + { "send",
> + DTRACE_PROBESPEC_NAME, "fbt::ip6_finish_output:entry" },
> + { NULL, }
> +};
> +
> +static probe_arg_t probe_args[] = {
> + { "receive", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } },
> + { "receive", 1, { 1, 0, "struct sock *", "csinfo_t *" } },
> + { "receive", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } },
> + { "receive", 3, { 3, 0, "struct net_device *", "ifinfo_t *" } },
> + { "receive", 4, { 4, 0, "struct iphdr *", "ipv4info_t *" } },
> + { "receive", 5, { 5, 0, "struct ipv6hdr *", "ipv6info_t *"} },
> + { "send", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } },
> + { "send", 1, { 1, 0, "struct sock *", "csinfo_t *" } },
> + { "send", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } },
> + { "send", 3, { 3, 0, "struct net_device *", "ifinfo_t *" } },
> + { "send", 4, { 4, 0, "struct iphdr *", "ipv4info_t *" } },
> + { "send", 5, { 5, 0, "struct ipv6hdr *", "ipv6info_t *"} },
> + { NULL, }
> +};
> +
> +static const dtrace_pattr_t pattr = {
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +};
> +
> +/*
> + * Provide all the "ip" SDT probes.
> + */
> +static int populate(dtrace_hdl_t *dtp)
> +{
> + return dt_sdt_populate(dtp, prvname, modname, &dt_ip, &pattr,
> + probe_args, probes);
> +}
> +
> +/*
> + * Retrieve the value of a member in a given struct.
> + *
> + * Entry:
> + * reg = TYPE *ptr
> + *
> + * Return:
> + * %r0 = ptr->member
> + * Clobbers:
> + * %r1 .. %r5
> + */
> +static int get_member(dt_pcb_t *pcb, const char *name, int reg,
> + const char *member) {
> + dtrace_hdl_t *dtp = pcb->pcb_hdl;
> + dt_irlist_t *dlp = &pcb->pcb_ir;
> + dtrace_typeinfo_t tt;
> + ctf_membinfo_t ctm;
> + size_t size;
> + uint_t ldop;
> +
> + if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_KMODS, name, &tt) == -1 ||
> + ctf_member_info(tt.dtt_ctfp, tt.dtt_type, member, &ctm) == CTF_ERR)
> + return -1;
> +
> + ldop = dt_cg_ldsize(NULL, tt.dtt_ctfp, ctm.ctm_type, &size);
> +
> + assert(size > 0 && size <= 8 && (size & (size - 1)) == 0);
> +
> + if (reg != BPF_REG_0)
> + emit(dlp, BPF_MOV_REG(BPF_REG_0, reg));
> +
> + emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_TRAMP_SP_BASE));
> + emit(dlp, BPF_MOV_IMM(BPF_REG_2, size));
> + emit(dlp, BPF_MOV_REG(BPF_REG_3, reg));
> + emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, ctm.ctm_offset / NBBY));
> + emit(dlp, BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel]));
> + emit(dlp, BPF_LOAD(ldop, BPF_REG_0, BPF_REG_FP, DT_TRAMP_SP_BASE));
> +
> + return 0;
> +}
> +
> +/*
> + * Generate a BPF trampoline for a SDT probe.
> + *
> + * The trampoline function is called when a SDT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + * int dt_ip(void *data)
> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause. It returns the value that it gets
> + * back from that function.
> + */
> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> + dt_irlist_t *dlp = &pcb->pcb_ir;
> + dt_probe_t *prp = pcb->pcb_probe;
> + dt_probe_t *uprp = pcb->pcb_parent_probe;
> + uint_t skbreg;
> +
> + /*
> + * Determine the register that holds a pointer to the skb passed from
> + * the underlying probe.
> + */
> + if (strcmp(prp->desc->prb, "receive") == 0)
> + skbreg = 0;
> + else
> + skbreg = 2;
> +
> + /*
> + * We construct the ip:::(receive,send) probe arguments as
> + * follows:
> + * args[0] = skb
> + * args[1] = skb->sk
> + * args[2] = ip_hdr(skb)
> + * args[3] = skb->dev
> + * args[4] = [IPv4] ip_hdr(skb) -or- [IPv6] NULL
> + * args[5] = [IPv4] NULL -or- [IPv6] ipv6_hdr(skb)
> + */
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skbreg)));
> + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl));
> +
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
> +
> + get_member(pcb, "struct sk_buff", BPF_REG_6, "sk");
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> +
> + /*
> + * ip_hdr(skb) =
> + * skb_network_header(skb) = (include/linux/ip.h)
> + * skb->head + skb->network_header (include/linux/skbuff.h)
> + */
> + get_member(pcb, "struct sk_buff", BPF_REG_6, "head");
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0));
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0));
> + get_member(pcb, "struct sk_buff", BPF_REG_6, "network_header");
> + emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
> + emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0));
> + emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0));
> +
> + /*
> + * We can use the name of the underlying probe to determine whether we
> + * are dealing with IPv4 (ip_*) or IPv6 (ip6_*).
> + */
> + if (uprp->desc->fun[2] == '6')
> + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(4), 0));
> + else
> + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(5), 0));
> +
> + get_member(pcb, "struct sk_buff", BPF_REG_6, "dev");
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_0));
> +
> + return 0;
> +}
> +
> +dt_provimpl_t dt_ip = {
> + .name = prvname,
> + .prog_type = BPF_PROG_TYPE_UNSPEC,
> + .populate = &populate,
> + .enable = &dt_sdt_enable,
> + .trampoline = &trampoline,
> + .probe_info = &dt_sdt_probe_info,
> +};
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index 8face769..31ad028d 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -70,6 +70,7 @@ typedef struct dt_provimpl {
> extern dt_provimpl_t dt_dtrace;
> extern dt_provimpl_t dt_cpc;
> extern dt_provimpl_t dt_fbt;
> +extern dt_provimpl_t dt_ip;
> extern dt_provimpl_t dt_lockstat;
> extern dt_provimpl_t dt_proc;
> extern dt_provimpl_t dt_profile;
> diff --git a/libdtrace/ip.d b/libdtrace/ip.d
> index f66316c3..f8b77f12 100644
> --- a/libdtrace/ip.d
> +++ b/libdtrace/ip.d
> @@ -1,6 +1,6 @@
> /*
> * Oracle Linux DTrace.
> - * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2007, 2023, Oracle and/or its affiliates. All rights reserved.
> * Licensed under the Universal Permissive License v 1.0 as shown at
> * http://oss.oracle.com/licenses/upl.
> */
> @@ -51,6 +51,8 @@ inline int TCP_MIN_HEADER_LENGTH = 20;
> * to the net namespace (nd_net in struct net_device).
> */
> typedef uint64_t netstackid_t;
> +typedef __be32 ipaddr_t;
> +typedef struct in6_addr in6_addr_t;
>
> /*
> * pktinfo is where packet ID info can be made available for deeper
> @@ -159,7 +161,7 @@ translator csinfo_t < struct sock *s > {
> #pragma D binding "1.5" translator
> translator ipinfo_t < struct iphdr *I > {
> ip_ver = 4;
> - ip_plength = I != NULL ? (ntohs(I->tot_len) - (*(uint8_t *)I & 0xf) << 2) : 0;
> + ip_plength = I != NULL ? (ntohs(I->tot_len) - I->ihl << 2) : 0;
> ip_saddr = I != NULL ? inet_ntoa(&I->saddr) : "<unknown>";
> ip_daddr = I != NULL ? inet_ntoa(&I->daddr) : "<unknown>";
> };
> diff --git a/test/demo/ip/ipproto.d b/test/demo/ip/ipproto.d
> index 0538abe3..da499f74 100644
> --- a/test/demo/ip/ipproto.d
> +++ b/test/demo/ip/ipproto.d
> @@ -1,10 +1,9 @@
> /*
> * Oracle Linux DTrace.
> - * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
> * Licensed under the Universal Permissive License v 1.0 as shown at
> * http://oss.oracle.com/licenses/upl.
> */
> -/* @@xfail: dtv2 */
>
> #pragma D option quiet
>
> diff --git a/test/unittest/funcs/tst.inet_ntoa.d b/test/unittest/funcs/tst.inet_ntoa.d
> index 42ea7107..3b7b7556 100644
> --- a/test/unittest/funcs/tst.inet_ntoa.d
> +++ b/test/unittest/funcs/tst.inet_ntoa.d
> @@ -1,14 +1,12 @@
> /*
> * Oracle Linux DTrace.
> - * Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2007, 2023, Oracle and/or its affiliates. All rights reserved.
> * Licensed under the Universal Permissive License v 1.0 as shown at
> * http://oss.oracle.com/licenses/upl.
> */
>
> #pragma D option quiet
>
> -typedef vmlinux`__be32 ipaddr_t; /* FIXME: how should this really be handled? */
> -
> ipaddr_t *ip4a;
> ipaddr_t *ip4b;
> ipaddr_t *ip4c;
More information about the DTrace-devel
mailing list