[DTrace-devel] [PATCH v2] Implement the ip provider

Kris Van Hees kris.van.hees at oracle.com
Sat Sep 16 03:04:31 UTC 2023


Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 libdtrace/Build                     |   4 +-
 libdtrace/dt_open.c                 |   1 +
 libdtrace/dt_prov_ip.c              | 189 ++++++++++++++++++++++++++++
 libdtrace/dt_provider.h             |   1 +
 libdtrace/ip.d                      |   6 +-
 test/demo/ip/ipproto.d              |   3 +-
 test/unittest/funcs/tst.inet_ntoa.d |   4 +-
 7 files changed, 200 insertions(+), 8 deletions(-)
 create mode 100644 libdtrace/dt_prov_ip.c

diff --git a/libdtrace/Build b/libdtrace/Build
index d1b00933..7dc2d5d6 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -49,6 +49,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
 			  dt_prov_cpc.c \
 			  dt_prov_dtrace.c \
 			  dt_prov_fbt.c \
+			  dt_prov_ip.c \
 			  dt_prov_lockstat.c \
 			  dt_prov_proc.c \
 			  dt_prov_profile.c \
@@ -96,8 +97,9 @@ dt_proc.c_CFLAGS := -Wno-pedantic
 dt_prov_cpc.c_CFLAGS := -Wno-pedantic
 dt_prov_dtrace.c_CFLAGS := -Wno-pedantic
 dt_prov_fbt.c_CFLAGS := -Wno-pedantic
-dt_prov_proc.c_CFLAGS := -Wno-pedantic
+dt_prov_ip.c_CFLAGS := -Wno-pedantic
 dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
+dt_prov_proc.c_CFLAGS := -Wno-pedantic
 dt_prov_profile.c_CFLAGS := -Wno-pedantic
 dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
 dt_prov_sched.c_CFLAGS := -Wno-pedantic
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 1eca6079..2db8ec38 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -68,6 +68,7 @@ static const dt_provimpl_t *dt_providers[] = {
 	&dt_dtrace,		/* list dt_dtrace first */
 	&dt_cpc,
 	&dt_fbt,
+	&dt_ip,
 	&dt_lockstat,
 	&dt_proc,
 	&dt_profile,
diff --git a/libdtrace/dt_prov_ip.c b/libdtrace/dt_prov_ip.c
new file mode 100644
index 00000000..8377e066
--- /dev/null
+++ b/libdtrace/dt_prov_ip.c
@@ -0,0 +1,189 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ *
+ * The 'ip' SDT provider for DTrace-specific probes.
+ */
+#include <assert.h>
+#include <errno.h>
+
+#include "dt_dctx.h"
+#include "dt_cg.h"
+#include "dt_provider_sdt.h"
+#include "dt_probe.h"
+
+static const char		prvname[] = "ip";
+static const char		modname[] = "vmlinux";
+
+static probe_dep_t	probes[] = {
+	{ "receive",
+	  DTRACE_PROBESPEC_NAME,	"fbt::ip_local_deliver:entry" },
+	{ "receive",
+	  DTRACE_PROBESPEC_NAME,	"fbt::ip6_input:entry" },
+	{ "send",
+	  DTRACE_PROBESPEC_NAME,	"fbt::ip_finish_output:entry" },
+	{ "send",
+	  DTRACE_PROBESPEC_NAME,	"fbt::ip6_finish_output:entry" },
+	{ NULL, }
+};
+
+static probe_arg_t probe_args[] = {
+	{ "receive", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } },
+	{ "receive", 1, { 1, 0, "struct sock *", "csinfo_t *" } },
+	{ "receive", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } },
+	{ "receive", 3, { 3, 0, "struct net_device *", "ifinfo_t *" } },
+	{ "receive", 4, { 4, 0, "struct iphdr *", "ipv4info_t *" } },
+	{ "receive", 5, { 5, 0, "struct ipv6hdr *", "ipv6info_t *"} },
+	{ "send", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } },
+	{ "send", 1, { 1, 0, "struct sock *", "csinfo_t *" } },
+	{ "send", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } },
+	{ "send", 3, { 3, 0, "struct net_device *", "ifinfo_t *" } },
+	{ "send", 4, { 4, 0, "struct iphdr *", "ipv4info_t *" } },
+	{ "send", 5, { 5, 0, "struct ipv6hdr *", "ipv6info_t *"} },
+	{ NULL, }
+};
+
+static const dtrace_pattr_t	pattr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
+/*
+ * Provide all the "ip" SDT probes.
+ */
+static int populate(dtrace_hdl_t *dtp)
+{
+	return dt_sdt_populate(dtp, prvname, modname, &dt_ip, &pattr,
+			       probe_args, probes);
+}
+
+/*
+ * Retrieve the value of a member in a given struct.
+ *
+ * Entry:
+ *	reg = TYPE *ptr
+ *
+ * Return:
+ *	%r0 = ptr->member
+ * Clobbers:
+ *	%r1 .. %r5
+ */
+static int get_member(dt_pcb_t *pcb, const char *name, int reg,
+		      const char *member) {
+	dtrace_hdl_t		*dtp = pcb->pcb_hdl;
+	dt_irlist_t		*dlp = &pcb->pcb_ir;
+	dtrace_typeinfo_t	tt;
+	ctf_membinfo_t		ctm;
+	size_t			size;
+	uint_t			ldop;
+
+	if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_KMODS, name, &tt) == -1 ||
+	    ctf_member_info(tt.dtt_ctfp, tt.dtt_type, member, &ctm) == CTF_ERR)
+		return -1;
+
+	ldop = dt_cg_ldsize(NULL, tt.dtt_ctfp, ctm.ctm_type, &size);
+
+	assert(size > 0 && size <= 8 && (size & (size - 1)) == 0);
+
+	if (reg != BPF_REG_0)
+		emit(dlp, BPF_MOV_REG(BPF_REG_0, reg));
+
+	emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
+	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_TRAMP_SP_BASE));
+	emit(dlp, BPF_MOV_IMM(BPF_REG_2, size));
+	emit(dlp, BPF_MOV_REG(BPF_REG_3, reg));
+	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, ctm.ctm_offset / NBBY));
+	emit(dlp, BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel]));
+	emit(dlp, BPF_LOAD(ldop, BPF_REG_0, BPF_REG_FP, DT_TRAMP_SP_BASE));
+
+	return 0;
+}
+
+/*
+ * Generate a BPF trampoline for a SDT probe.
+ *
+ * The trampoline function is called when a SDT probe triggers, and it must
+ * satisfy the following prototype:
+ *
+ *	int dt_ip(void *data)
+ *
+ * The trampoline will populate a dt_dctx_t struct and then call the function
+ * that implements the compiled D clause.  It returns the value that it gets
+ * back from that function.
+ */
+static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
+{
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	dt_probe_t	*prp = pcb->pcb_probe;
+	dt_probe_t	*uprp = pcb->pcb_parent_probe;
+	uint_t		skbreg;
+
+	/*
+	 * Determine the register that holds a pointer to the skb passed from
+	 * the underlying probe.
+	 */
+	if (strcmp(prp->desc->prb, "receive") == 0)
+		skbreg = 0;
+	else
+		skbreg = 2;
+
+	/*
+	 * We construct the ip:::(receive,send) probe arguments as
+	 * follows:
+	 *	args[0] = skb
+	 *	args[1] = skb->sk
+	 *	args[2] = ip_hdr(skb)
+	 *	args[3] = skb->dev
+	 *	args[4] = [IPv4] ip_hdr(skb)	-or- [IPv6] NULL
+	 *	args[5] = [IPv4] NULL		-or- [IPv6] ipv6_hdr(skb)
+	 */
+	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skbreg)));
+	emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl));
+
+	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6));
+
+	get_member(pcb, "struct sk_buff", BPF_REG_6, "sk");
+	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
+
+	/*
+	 * ip_hdr(skb) =
+	 *	skb_network_header(skb)	=	(include/linux/ip.h)
+	 *	skb->head + skb->network_header	(include/linux/skbuff.h)
+	 */
+	get_member(pcb, "struct sk_buff", BPF_REG_6, "head");
+	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
+	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0));
+	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0));
+	get_member(pcb, "struct sk_buff", BPF_REG_6, "network_header");
+	emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0));
+	emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0));
+	emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0));
+
+	/*
+	 * We can use the name of the underlying probe to determine whether we
+	 * are dealing with IPv4 (ip_*) or IPv6 (ip6_*).
+	 */
+	if (uprp->desc->fun[2] == '6')
+		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(4), 0));
+	else
+		emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(5), 0));
+
+	get_member(pcb, "struct sk_buff", BPF_REG_6, "dev");
+	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_0));
+
+	return 0;
+}
+
+dt_provimpl_t	dt_ip = {
+	.name		= prvname,
+	.prog_type	= BPF_PROG_TYPE_UNSPEC,
+	.populate	= &populate,
+	.enable		= &dt_sdt_enable,
+	.trampoline	= &trampoline,
+	.probe_info	= &dt_sdt_probe_info,
+};
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index 8face769..31ad028d 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -70,6 +70,7 @@ typedef struct dt_provimpl {
 extern dt_provimpl_t dt_dtrace;
 extern dt_provimpl_t dt_cpc;
 extern dt_provimpl_t dt_fbt;
+extern dt_provimpl_t dt_ip;
 extern dt_provimpl_t dt_lockstat;
 extern dt_provimpl_t dt_proc;
 extern dt_provimpl_t dt_profile;
diff --git a/libdtrace/ip.d b/libdtrace/ip.d
index f66316c3..f8b77f12 100644
--- a/libdtrace/ip.d
+++ b/libdtrace/ip.d
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -51,6 +51,8 @@ inline int TCP_MIN_HEADER_LENGTH =	20;
  * to the net namespace (nd_net in struct net_device).
  */
 typedef uint64_t	netstackid_t;
+typedef __be32		ipaddr_t;
+typedef struct in6_addr	in6_addr_t;
 
 /*
  * pktinfo is where packet ID info can be made available for deeper
@@ -159,7 +161,7 @@ translator csinfo_t < struct sock *s > {
 #pragma D binding "1.5" translator
 translator ipinfo_t < struct iphdr *I > {
 	ip_ver = 4;
-        ip_plength = I != NULL ? (ntohs(I->tot_len) - (*(uint8_t *)I & 0xf) << 2) : 0;
+        ip_plength = I != NULL ? (ntohs(I->tot_len) - I->ihl << 2) : 0;
 	ip_saddr = I != NULL ? inet_ntoa(&I->saddr) : "<unknown>";
 	ip_daddr = I != NULL ? inet_ntoa(&I->daddr) : "<unknown>";
 };
diff --git a/test/demo/ip/ipproto.d b/test/demo/ip/ipproto.d
index 0538abe3..da499f74 100644
--- a/test/demo/ip/ipproto.d
+++ b/test/demo/ip/ipproto.d
@@ -1,10 +1,9 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
-/* @@xfail: dtv2 */
 
 #pragma D option quiet
 
diff --git a/test/unittest/funcs/tst.inet_ntoa.d b/test/unittest/funcs/tst.inet_ntoa.d
index 42ea7107..3b7b7556 100644
--- a/test/unittest/funcs/tst.inet_ntoa.d
+++ b/test/unittest/funcs/tst.inet_ntoa.d
@@ -1,14 +1,12 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
 
 #pragma D option quiet
 
-typedef vmlinux`__be32 ipaddr_t;                  /* FIXME: how should this really be handled? */
-
 ipaddr_t *ip4a;
 ipaddr_t *ip4b;
 ipaddr_t *ip4c;
-- 
2.40.1




More information about the DTrace-devel mailing list