[DTrace-devel] [PATCH v2] Implement the ip provider

Eugene Loh eugene.loh at oracle.com
Mon Sep 18 18:08:34 UTC 2023


What is the status of this patch?  I reviewed a v1 on 8/16 and I cannot 
tell if those comments were incorporated.  I understand that perhaps 
some of the feedback might have been rejected, but I cannot tell if some 
of the feedback was simply overlooked.

Specifically, what about drop-in and drop-out probes?

And again:
     There seems to be no correspondence
     to the legacy DTRACE_IP() instrumentation points.
     It would be nice to have a note about implementation.
     How were the instrumentation points (fbt probes) chosen?
     Is this related to __IP[6]_UPD_PO_STATS(..., IPSTATS_MIB_IN, ...)
     and IP[6]_UPD_PO_STATS(..., IPSTATS_MIB_OUT, ...)?

And:
     If the ip_addr_t problem is being fixed,
     maybe reference Orabug 34855291?

In get_member(), I still think it makes no sense to set %r0 = reg right 
before %r0 gets clobbered.  And, should the return code of probe_read be 
checked?

I still wonder about testing.  On what platforms has this patch been 
tested?  Should there be "ls -v" tests as there are for (some) other 
providers?  Should some @@xfails be removed?  I'm also curious about 
test.x (check_provider):  if there are no ip probes, does that mean 
something is broken... or that we shouldn't test it?

Incidentally, in get_member(), is there really a use for 
"assert(size...)" given that this is already checked in dt_cg_ldsize()?

On 9/15/23 23:04, Kris Van Hees via DTrace-devel wrote:
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
>   libdtrace/Build                     |   4 +-
>   libdtrace/dt_open.c                 |   1 +
>   libdtrace/dt_prov_ip.c              | 189 ++++++++++++++++++++++++++++
>   libdtrace/dt_provider.h             |   1 +
>   libdtrace/ip.d                      |   6 +-
>   test/demo/ip/ipproto.d              |   3 +-
>   test/unittest/funcs/tst.inet_ntoa.d |   4 +-
>   7 files changed, 200 insertions(+), 8 deletions(-)
>   create mode 100644 libdtrace/dt_prov_ip.c
>
> diff --git a/libdtrace/Build b/libdtrace/Build
> index d1b00933..7dc2d5d6 100644
> --- a/libdtrace/Build
> +++ b/libdtrace/Build
> @@ -49,6 +49,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
>   			  dt_prov_cpc.c \
>   			  dt_prov_dtrace.c \
>   			  dt_prov_fbt.c \
> +			  dt_prov_ip.c \
>   			  dt_prov_lockstat.c \
>   			  dt_prov_proc.c \
>   			  dt_prov_profile.c \
> @@ -96,8 +97,9 @@ dt_proc.c_CFLAGS := -Wno-pedantic
>   dt_prov_cpc.c_CFLAGS := -Wno-pedantic
>   dt_prov_dtrace.c_CFLAGS := -Wno-pedantic
>   dt_prov_fbt.c_CFLAGS := -Wno-pedantic
> -dt_prov_proc.c_CFLAGS := -Wno-pedantic
> +dt_prov_ip.c_CFLAGS := -Wno-pedantic
>   dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
> +dt_prov_proc.c_CFLAGS := -Wno-pedantic
>   dt_prov_profile.c_CFLAGS := -Wno-pedantic
>   dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
>   dt_prov_sched.c_CFLAGS := -Wno-pedantic
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index 1eca6079..2db8ec38 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -68,6 +68,7 @@ static const dt_provimpl_t *dt_providers[] = {
>   	&dt_dtrace,		/* list dt_dtrace first */
>   	&dt_cpc,
>   	&dt_fbt,
> +	&dt_ip,
>   	&dt_lockstat,
>   	&dt_proc,
>   	&dt_profile,
> diff --git a/libdtrace/dt_prov_ip.c b/libdtrace/dt_prov_ip.c
> new file mode 100644
> index 00000000..8377e066
> --- /dev/null
> +++ b/libdtrace/dt_prov_ip.c
> @@ -0,0 +1,189 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The 'ip' SDT provider for DTrace-specific probes.
> + */
> +#include <assert.h>
> +#include <errno.h>
> +
> +#include "dt_dctx.h"
> +#include "dt_cg.h"
> +#include "dt_provider_sdt.h"
> +#include "dt_probe.h"
> +
> +static const char		prvname[] = "ip";
> +static const char		modname[] = "vmlinux";
> +
> +static probe_dep_t	probes[] = {
> +	{ "receive",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::ip_local_deliver:entry" },
> +	{ "receive",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::ip6_input:entry" },
> +	{ "send",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::ip_finish_output:entry" },
> +	{ "send",
> +	  DTRACE_PROBESPEC_NAME,	"fbt::ip6_finish_output:entry" },
> +	{ NULL, }
> +};
> +
> +static probe_arg_t probe_args[] = {
> +	{ "receive", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } },
> +	{ "receive", 1, { 1, 0, "struct sock *", "csinfo_t *" } },
> +	{ "receive", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } },
> +	{ "receive", 3, { 3, 0, "struct net_device *", "ifinfo_t *" } },
> +	{ "receive", 4, { 4, 0, "struct iphdr *", "ipv4info_t *" } },
> +	{ "receive", 5, { 5, 0, "struct ipv6hdr *", "ipv6info_t *"} },
> +	{ "send", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } },
> +	{ "send", 1, { 1, 0, "struct sock *", "csinfo_t *" } },
> +	{ "send", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } },
> +	{ "send", 3, { 3, 0, "struct net_device *", "ifinfo_t *" } },
> +	{ "send", 4, { 4, 0, "struct iphdr *", "ipv4info_t *" } },
> +	{ "send", 5, { 5, 0, "struct ipv6hdr *", "ipv6info_t *"} },
> +	{ NULL, }
> +};
> +
> +static const dtrace_pattr_t	pattr = {
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +};
> +
> +/*
> + * Provide all the "ip" SDT probes.
> + */
> +static int populate(dtrace_hdl_t *dtp)
> +{
> +	return dt_sdt_populate(dtp, prvname, modname, &dt_ip, &pattr,
> +			       probe_args, probes);
> +}
> +
> +/*
> + * Retrieve the value of a member in a given struct.
> + *
> + * Entry:
> + *	reg = TYPE *ptr
> + *
> + * Return:
> + *	%r0 = ptr->member
> + * Clobbers:
> + *	%r1 .. %r5
> + */
> +static int get_member(dt_pcb_t *pcb, const char *name, int reg,
> +		      const char *member) {
> +	dtrace_hdl_t		*dtp = pcb->pcb_hdl;
> +	dt_irlist_t		*dlp = &pcb->pcb_ir;
> +	dtrace_typeinfo_t	tt;
> +	ctf_membinfo_t		ctm;
> +	size_t			size;
> +	uint_t			ldop;
> +
> +	if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_KMODS, name, &tt) == -1 ||
> +	    ctf_member_info(tt.dtt_ctfp, tt.dtt_type, member, &ctm) == CTF_ERR)
> +		return -1;
> +
> +	ldop = dt_cg_ldsize(NULL, tt.dtt_ctfp, ctm.ctm_type, &size);
> +
> +	assert(size > 0 && size <= 8 && (size & (size - 1)) == 0);
> +
> +	if (reg != BPF_REG_0)
> +		emit(dlp, BPF_MOV_REG(BPF_REG_0, reg));
> +
> +	emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_TRAMP_SP_BASE));
> +	emit(dlp, BPF_MOV_IMM(BPF_REG_2, size));
> +	emit(dlp, BPF_MOV_REG(BPF_REG_3, reg));
> +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, ctm.ctm_offset / NBBY));
> +	emit(dlp, BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel]));
> +	emit(dlp, BPF_LOAD(ldop, BPF_REG_0, BPF_REG_FP, DT_TRAMP_SP_BASE));
> +
> +	return 0;
> +}
> +
> +/*
> + * Generate a BPF trampoline for a SDT probe.
> + *
> + * The trampoline function is called when a SDT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + *	int dt_ip(void *data)
> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause.  It returns the value that it gets
> + * back from that function.
> + */
> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> +	dt_irlist_t	*dlp = &pcb->pcb_ir;
> +	dt_probe_t	*prp = pcb->pcb_probe;
> +	dt_probe_t	*uprp = pcb->pcb_parent_probe;
> +	uint_t		skbreg;
> +
> +	/*
> +	 * Determine the register that holds a pointer to the skb passed from
> +	 * the underlying probe.
> +	 */
> +	if (strcmp(prp->desc->prb, "receive") == 0)
> +		skbreg = 0;
> +	else
> +		skbreg = 2;
> +
> +	/*
> +	 * We construct the ip:::(receive,send) probe arguments as
> +	 * follows:
> +	 *	args[0] = skb
> +	 *	args[1] = skb->sk
> +	 *	args[2] = ip_hdr(skb)
> +	 *	args[3] = skb->dev
> +	 *	args[4] = [IPv4] ip_hdr(skb)	-or- [IPv6] NULL
> +	 *	args[5] = [IPv4] NULL		-or- [IPv6] ipv6_hdr(skb)
> +	 */
> +	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skbreg)));
> +	emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl));
> +
> +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
> +
> +	get_member(pcb, "struct sk_buff", BPF_REG_6, "sk");
> +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> +
> +	/*
> +	 * ip_hdr(skb) =
> +	 *	skb_network_header(skb)	=	(include/linux/ip.h)
> +	 *	skb->head + skb->network_header	(include/linux/skbuff.h)
> +	 */
> +	get_member(pcb, "struct sk_buff", BPF_REG_6, "head");
> +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
> +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0));
> +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0));
> +	get_member(pcb, "struct sk_buff", BPF_REG_6, "network_header");
> +	emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
> +	emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0));
> +	emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0));
> +
> +	/*
> +	 * We can use the name of the underlying probe to determine whether we
> +	 * are dealing with IPv4 (ip_*) or IPv6 (ip6_*).
> +	 */
> +	if (uprp->desc->fun[2] == '6')
> +		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(4), 0));
> +	else
> +		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(5), 0));
> +
> +	get_member(pcb, "struct sk_buff", BPF_REG_6, "dev");
> +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_0));
> +
> +	return 0;
> +}
> +
> +dt_provimpl_t	dt_ip = {
> +	.name		= prvname,
> +	.prog_type	= BPF_PROG_TYPE_UNSPEC,
> +	.populate	= &populate,
> +	.enable		= &dt_sdt_enable,
> +	.trampoline	= &trampoline,
> +	.probe_info	= &dt_sdt_probe_info,
> +};
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index 8face769..31ad028d 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -70,6 +70,7 @@ typedef struct dt_provimpl {
>   extern dt_provimpl_t dt_dtrace;
>   extern dt_provimpl_t dt_cpc;
>   extern dt_provimpl_t dt_fbt;
> +extern dt_provimpl_t dt_ip;
>   extern dt_provimpl_t dt_lockstat;
>   extern dt_provimpl_t dt_proc;
>   extern dt_provimpl_t dt_profile;
> diff --git a/libdtrace/ip.d b/libdtrace/ip.d
> index f66316c3..f8b77f12 100644
> --- a/libdtrace/ip.d
> +++ b/libdtrace/ip.d
> @@ -1,6 +1,6 @@
>   /*
>    * Oracle Linux DTrace.
> - * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2007, 2023, Oracle and/or its affiliates. All rights reserved.
>    * Licensed under the Universal Permissive License v 1.0 as shown at
>    * http://oss.oracle.com/licenses/upl.
>    */
> @@ -51,6 +51,8 @@ inline int TCP_MIN_HEADER_LENGTH =	20;
>    * to the net namespace (nd_net in struct net_device).
>    */
>   typedef uint64_t	netstackid_t;
> +typedef __be32		ipaddr_t;
> +typedef struct in6_addr	in6_addr_t;
>   
>   /*
>    * pktinfo is where packet ID info can be made available for deeper
> @@ -159,7 +161,7 @@ translator csinfo_t < struct sock *s > {
>   #pragma D binding "1.5" translator
>   translator ipinfo_t < struct iphdr *I > {
>   	ip_ver = 4;
> -        ip_plength = I != NULL ? (ntohs(I->tot_len) - (*(uint8_t *)I & 0xf) << 2) : 0;
> +        ip_plength = I != NULL ? (ntohs(I->tot_len) - I->ihl << 2) : 0;
>   	ip_saddr = I != NULL ? inet_ntoa(&I->saddr) : "<unknown>";
>   	ip_daddr = I != NULL ? inet_ntoa(&I->daddr) : "<unknown>";
>   };
> diff --git a/test/demo/ip/ipproto.d b/test/demo/ip/ipproto.d
> index 0538abe3..da499f74 100644
> --- a/test/demo/ip/ipproto.d
> +++ b/test/demo/ip/ipproto.d
> @@ -1,10 +1,9 @@
>   /*
>    * Oracle Linux DTrace.
> - * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
>    * Licensed under the Universal Permissive License v 1.0 as shown at
>    * http://oss.oracle.com/licenses/upl.
>    */
> -/* @@xfail: dtv2 */
>   
>   #pragma D option quiet
>   
> diff --git a/test/unittest/funcs/tst.inet_ntoa.d b/test/unittest/funcs/tst.inet_ntoa.d
> index 42ea7107..3b7b7556 100644
> --- a/test/unittest/funcs/tst.inet_ntoa.d
> +++ b/test/unittest/funcs/tst.inet_ntoa.d
> @@ -1,14 +1,12 @@
>   /*
>    * Oracle Linux DTrace.
> - * Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2007, 2023, Oracle and/or its affiliates. All rights reserved.
>    * Licensed under the Universal Permissive License v 1.0 as shown at
>    * http://oss.oracle.com/licenses/upl.
>    */
>   
>   #pragma D option quiet
>   
> -typedef vmlinux`__be32 ipaddr_t;                  /* FIXME: how should this really be handled? */
> -
>   ipaddr_t *ip4a;
>   ipaddr_t *ip4b;
>   ipaddr_t *ip4c;



More information about the DTrace-devel mailing list