From kris.van.hees at oracle.com Tue Jul 1 15:12:34 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Tue, 01 Jul 2025 11:12:34 -0400 Subject: [DTrace-devel] [PATCH v2 2/3] usdt parser: handle encoded hyphens Message-ID: Signed-off-by: Kris Van Hees --- libcommon/usdt_parser_notes.c | 17 ++++++ test/unittest/usdt/tst.encoded_hyphens.r | 0 test/unittest/usdt/tst.encoded_hyphens.sh | 74 +++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 test/unittest/usdt/tst.encoded_hyphens.r create mode 100755 test/unittest/usdt/tst.encoded_hyphens.sh diff --git a/libcommon/usdt_parser_notes.c b/libcommon/usdt_parser_notes.c index fb57f119..d3d744fb 100644 --- a/libcommon/usdt_parser_notes.c +++ b/libcommon/usdt_parser_notes.c @@ -471,6 +471,23 @@ parse_usdt_note(int out, dof_helper_t *dhp, usdt_data_t *data, } prbt.off = off; + /* + * If the probe name has encoded hyphens, perform in-place changing + * from "__" into "-". + */ + if (strstr(prbt.prb, "__") != NULL) { + char *q; + const char *s = prbt.prb, *e = p; + + for (q = (char *)s; s < e; s++, q++) { + if (s[0] == '_' && s[1] == '_') { + *q = '-'; + s++; + } else if (s > q) + *q = *s; + } + } + if ((prp = dt_htab_lookup(prbmap, &prbt)) == NULL) { if ((prp = malloc(sizeof(dt_probe_t))) == NULL) { usdt_error(out, ENOMEM, "Failed to allocate probe"); diff --git a/test/unittest/usdt/tst.encoded_hyphens.r b/test/unittest/usdt/tst.encoded_hyphens.r new file mode 100644 index 00000000..e69de29b diff --git a/test/unittest/usdt/tst.encoded_hyphens.sh b/test/unittest/usdt/tst.encoded_hyphens.sh new file mode 100755 index 00000000..90608e25 --- /dev/null +++ b/test/unittest/usdt/tst.encoded_hyphens.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# +# Oracle Linux DTrace. +# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at +# http://oss.oracle.com/licenses/upl. +# +if [ $# != 1 ]; then + echo expected one argument: '<'dtrace-path'>' + exit 2 +fi + +dtrace=$1 +CFLAGS="$test_cppflags" +LDFLAGS="$test_ldflags" + +DIRNAME="$tmpdir/usdt-entryreturn.$$.$RANDOM" +mkdir -p $DIRNAME +cd $DIRNAME + +cat > test.c < + +int +main(int argc, char **argv) +{ + DTRACE_PROBE(test_prov, entry); + DTRACE_PROBE(test_prov, __entry); + DTRACE_PROBE(test_prov, foo__entry); + DTRACE_PROBE(test_prov, carpentry); + DTRACE_PROBE(test_prov, miniatureturn); + DTRACE_PROBE(test_prov, foo__return); + DTRACE_PROBE(test_prov, __return); + /* + * Unfortunately, a "return" probe is not currently possible due to + * the conflict with a reserved word. + */ + DTRACE_PROBE(test_prov, done); +} +EOF + +cat > prov.d <& 2 + exit 1 +fi +$dtrace $dt_flags -G -s prov.d test.o +if [ $? -ne 0 ]; then + echo "failed to create USDT data" >& 2 + exit 1 +fi +${CC} ${LDFLAGS} -o test test.o prov.o +if [ $? -ne 0 ]; then + echo "failed to link final executable" >& 2 + exit 1 +fi + +./test +status=$? + +exit $status -- 2.43.5 From kris.van.hees at oracle.com Tue Jul 1 16:42:02 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Tue, 1 Jul 2025 12:42:02 -0400 Subject: [DTrace-devel] [PATCH v4 5/5] selftests/usdt: add test covering different forms of stapsdt note args In-Reply-To: <20250623101310.1649756-6-alan.maguire@oracle.com> References: <20250623101310.1649756-1-alan.maguire@oracle.com> <20250623101310.1649756-6-alan.maguire@oracle.com> Message-ID: This test does not pass on Debian, or any other system that compiles with PIE enbaled by default as far as I can see because it fails to resolve the function name (even though it is in the symbol table since that is not stripped). So there must be something wrong with the logic of the implementing patch to get the function name from the address information. On Mon, Jun 23, 2025 at 11:13:10AM +0100, Alan Maguire wrote: > Add a test exercising various arg types supported by stapsdt notes; > register values, register + offset and constants. The test generates > a binary with probes represented as follows on x86_64: > > Displaying notes found in: .note.stapsdt > Owner Data size Description > stapsdt 0x00000048 NT_STAPSDT (SystemTap probe descriptors) > Provider: test_prov > Name: args > Location: 0x0000000000400557, Base: 0x00000000004005f8, Semaphore: 0x0000000000000000 > Arguments: -4 at -4(%rbp) 8@%rax 8@%rdx -4@$18 > > Verify we get expected data for the probe arguments. > > Signed-off-by: Alan Maguire > --- > test/unittest/usdt/tst.stapsdt-notes-args.r | 2 + > test/unittest/usdt/tst.stapsdt-notes-args.sh | 50 ++++++++++++++++++++ > 2 files changed, 52 insertions(+) > create mode 100644 test/unittest/usdt/tst.stapsdt-notes-args.r > create mode 100755 test/unittest/usdt/tst.stapsdt-notes-args.sh > > diff --git a/test/unittest/usdt/tst.stapsdt-notes-args.r b/test/unittest/usdt/tst.stapsdt-notes-args.r > new file mode 100644 > index 00000000..42bca19f > --- /dev/null > +++ b/test/unittest/usdt/tst.stapsdt-notes-args.r > @@ -0,0 +1,2 @@ > +test:main:args:2:./test:val:18 > + > diff --git a/test/unittest/usdt/tst.stapsdt-notes-args.sh b/test/unittest/usdt/tst.stapsdt-notes-args.sh > new file mode 100755 > index 00000000..82097808 > --- /dev/null > +++ b/test/unittest/usdt/tst.stapsdt-notes-args.sh > @@ -0,0 +1,50 @@ > +#!/bin/bash > +# > +# Oracle Linux DTrace. > +# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. > +# Licensed under the Universal Permissive License v 1.0 as shown at > +# http://oss.oracle.com/licenses/upl. > + > +# This test covers stapsdt probes fired by the STAP_PROBEn macros, > +# testing various argument forms (constant, register, deref etc). > + > +if [ $# != 1 ]; then > + echo expected one argument: '<'dtrace-path'>' > + exit 2 > +fi > + > +dtrace=$1 > +CC=/usr/bin/gcc > +CFLAGS="-I${PWD}/test/unittest/usdt" > + > +DIRNAME="$tmpdir/usdt-notes.$$.$RANDOM" > +mkdir -p $DIRNAME > +cd $DIRNAME > + > +cat > test.c < +#include > + > +int > +main(int argc, char **argv) > +{ > + STAP_PROBE4(test_prov, args, argc, argv[0], argv[1] + 4, 18); > +} > +EOF > + > +${CC} ${CFLAGS} -o test test.c > +if [ $? -ne 0 ]; then > + echo "failed to compile test.c" >& 2 > + exit 1 > +fi > + > +$dtrace -c './test arg1val' -qs /dev/stdin < +test_prov\$target:::args > +{ > + printf("%s:%s:%s:%li:%s:%s:%li\n", probemod, probefunc, probename, > + arg0, copyinstr(arg1), copyinstr(arg2), arg3); > +} > + > +EOF > +status=$? > + > +exit $status > -- > 2.43.5 > From eugene.loh at oracle.com Tue Jul 1 17:44:25 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Tue, 1 Jul 2025 13:44:25 -0400 Subject: [DTrace-devel] [PATCH v2 2/3] usdt parser: handle encoded hyphens In-Reply-To: References: Message-ID: Reviewed-by: Eugene Loh Little nits below to consider. On 7/1/25 11:12, Kris Van Hees wrote: > Signed-off-by: Kris Van Hees > --- > libcommon/usdt_parser_notes.c | 17 ++++++ > test/unittest/usdt/tst.encoded_hyphens.r | 0 > test/unittest/usdt/tst.encoded_hyphens.sh | 74 +++++++++++++++++++++++ > 3 files changed, 91 insertions(+) > create mode 100644 test/unittest/usdt/tst.encoded_hyphens.r > create mode 100755 test/unittest/usdt/tst.encoded_hyphens.sh > > diff --git a/libcommon/usdt_parser_notes.c b/libcommon/usdt_parser_notes.c > index fb57f119..d3d744fb 100644 > --- a/libcommon/usdt_parser_notes.c > +++ b/libcommon/usdt_parser_notes.c > @@ -471,6 +471,23 @@ parse_usdt_note(int out, dof_helper_t *dhp, usdt_data_t *data, > } > prbt.off = off; > > + /* > + * If the probe name has encoded hyphens, perform in-place changing > + * from "__" into "-". > + */ > + if (strstr(prbt.prb, "__") != NULL) { > + char *q; > + const char *s = prbt.prb, *e = p; > + > + for (q = (char *)s; s < e; s++, q++) { > + if (s[0] == '_' && s[1] == '_') { > + *q = '-'; > + s++; > + } else if (s > q) > + *q = *s; > + } > + } > + > if ((prp = dt_htab_lookup(prbmap, &prbt)) == NULL) { > if ((prp = malloc(sizeof(dt_probe_t))) == NULL) { > usdt_error(out, ENOMEM, "Failed to allocate probe"); > diff --git a/test/unittest/usdt/tst.encoded_hyphens.r b/test/unittest/usdt/tst.encoded_hyphens.r > new file mode 100644 > index 00000000..e69de29b Empty?? I guess that works.? I usually have something like "echo success" at the end of the script and "success" in the .r file as a sanity check that we got to the end of the script, but I admit I see nothing in this test I can think of where things could go wrong.? Up to you. > diff --git a/test/unittest/usdt/tst.encoded_hyphens.sh b/test/unittest/usdt/tst.encoded_hyphens.sh > new file mode 100755 > index 00000000..90608e25 > --- /dev/null > +++ b/test/unittest/usdt/tst.encoded_hyphens.sh > @@ -0,0 +1,74 @@ > +#!/bin/bash > +# > +# Oracle Linux DTrace. > +# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. > +# Licensed under the Universal Permissive License v 1.0 as shown at > +# http://oss.oracle.com/licenses/upl. > +# > +if [ $# != 1 ]; then > + echo expected one argument: '<'dtrace-path'>' > + exit 2 > +fi > + > +dtrace=$1 > +CFLAGS="$test_cppflags" > +LDFLAGS="$test_ldflags" > + > +DIRNAME="$tmpdir/usdt-entryreturn.$$.$RANDOM" s/entryreturn/encoded_hyphens/ > +mkdir -p $DIRNAME > +cd $DIRNAME > + > +cat > test.c < +#include > + > +int > +main(int argc, char **argv) > +{ > + DTRACE_PROBE(test_prov, entry); > + DTRACE_PROBE(test_prov, __entry); > + DTRACE_PROBE(test_prov, foo__entry); > + DTRACE_PROBE(test_prov, carpentry); > + DTRACE_PROBE(test_prov, miniatureturn); > + DTRACE_PROBE(test_prov, foo__return); > + DTRACE_PROBE(test_prov, __return); > + /* > + * Unfortunately, a "return" probe is not currently possible due to > + * the conflict with a reserved word. > + */ > + DTRACE_PROBE(test_prov, done); > +} > +EOF > + > +cat > prov.d < +provider test_prov { > + probe entry(); > + probe __entry(); > + probe foo__entry(); > + probe carpentry(); > + probe miniatureturn(); > + probe foo__return(); > + probe __return(); > + probe done(); > +}; > +EOF > + > +${CC} ${CFLAGS} -c test.c > +if [ $? -ne 0 ]; then > + echo "failed to compile test.c" >& 2 > + exit 1 > +fi > +$dtrace $dt_flags -G -s prov.d test.o > +if [ $? -ne 0 ]; then > + echo "failed to create USDT data" >& 2 > + exit 1 > +fi > +${CC} ${LDFLAGS} -o test test.o prov.o > +if [ $? -ne 0 ]; then > + echo "failed to link final executable" >& 2 > + exit 1 > +fi > + > +./test > +status=$? > + > +exit $status Yeah.? On first read, I'm left wondering where the check is that "__" got turned into "-".? I guess the check is kind of implicit. Another way to reassure readers could be to add one more line ("$dtrace -c ./test -lP 'test_prov$target' |& gawk '/test_prov/ { print $NF}'") to the script and the corresponding output to the .r file. From eugene.loh at oracle.com Tue Jul 1 18:23:12 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Tue, 1 Jul 2025 14:23:12 -0400 Subject: [DTrace-devel] [PATCH v2 1/4] dtrace: move get_member() to dt_cg.c In-Reply-To: <20250610135813.15746-2-alan.maguire@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-2-alan.maguire@oracle.com> Message-ID: Reviewed-by: Eugene Loh btw... On 6/10/25 09:58, Alan Maguire wrote: > diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c > @@ -1901,6 +1901,45 @@ dt_cg_ctf_offsetof(const char *structname, const char *membername, > +/* > + * Retrieve the value of a member in a given struct. > + * > + * Entry: > + * reg = TYPE *ptr > + * > + * Return: > + * %r0 = ptr->member > + * Clobbers: > + * %r1 .. %r5 > + */ The dominant (but admittedly not consistent) style of this file is to indent with a tab.? E.g., "^ *\treg = ...".? Clearly, we have not established a consistent style. From eugene.loh at oracle.com Tue Jul 1 18:31:28 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Tue, 1 Jul 2025 14:31:28 -0400 Subject: [DTrace-devel] [PATCH v2 2/4] dt_impl: bump number of TSLOTS to 8 In-Reply-To: <20250610135813.15746-3-alan.maguire@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-3-alan.maguire@oracle.com> Message-ID: <4a079f61-5806-68b8-9051-af918a822adc@oracle.com> Very simple patch, but one key problem.? The changed line is preceded by a huge comment block that goes to excruciating pains to explain why the value should be 4.? So, I'm fine with the change itself and I think the "Because of the complexity..." sentence in the commit message can be dropped, but the comment block in the file needs to be updated to explain (with corresponding pains?) why we need 8 (not 9, not 7). On 6/10/25 09:58, Alan Maguire wrote: > Because of the complexity of the TCP translators, more tslots are > needed. > > Signed-off-by: Alan Maguire > --- > libdtrace/dt_impl.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h > index 68fb8ec5..10424f9c 100644 > --- a/libdtrace/dt_impl.h > +++ b/libdtrace/dt_impl.h > @@ -218,7 +218,7 @@ typedef struct dt_kern_path { > * - cleanpath() holds a prepended '/' char, a string, an appended '/' char, > * and a terminating NUL char, or STRSZ + 3 chars altogether > */ > -#define DT_TSTRING_SLOTS 4 > +#define DT_TSTRING_SLOTS 8 > #define DT_TSTRING_SIZE(dtp) \ > MAX(P2ROUNDUP((dtp)->dt_options[DTRACEOPT_STRSIZE] + 3, 8), \ > 72) From eugene.loh at oracle.com Tue Jul 1 19:08:59 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Tue, 1 Jul 2025 15:08:59 -0400 Subject: [DTrace-devel] [PATCH v2 0/4] DTrace TCP provider In-Reply-To: <20250610135813.15746-1-alan.maguire@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> Message-ID: Incidentally, looking at the subject lines for the patch series, I see: ??????? dtrace: move get_member() to dt_cg.c ??????? dt_impl: bump number of TSLOTS to 8 ??????? dtrace: add tcp provider ??????? dtrace: sync dlibs with tcp.d, ip.d and net.d changes That is, each line is prefixed with a component name.? We do that a lot for "test:", but otherwise the practice seems to depend on... the patch author?? Anyhow, I claim "dtrace:" is not very useful.? For patch 1, all the prefix is saying is "there is movement in the dtrace code base."? I think the subject would be more direct without the prefix.? Same for patch 2.? For patch 3, if one wanted to use a component prefix, I'd think one would use "tcp:". Historically, it looks like we've added new providers with subject lines like this: ??? rawfbt: new provider ??? Implement the io provider ??? Implement the ip provider ??? Implement the lockstat provider ??? Implement the sched provider (first part) ??? Implement the proc provider ??? provider: Implement a rawtp provider ??? Add a CPC provider ??? PID provider implementation ??? Add a profile provider ??? Implement BEGIN and END probes for the dtrace provider ??? Added IO provider support for 4.14 kernels Everyone has their own style.? Shrug.? No big deal. Anyhow, personally, I think the prefixes don't add much, especially for "dtrace:".? My suggestion is to drop the prefixes, but... your call. On 6/10/25 09:58, Alan Maguire wrote: > This series is a first draft of TCP provider support, where the > probes are implemented via underlying fbt and sdt probes. > > Due to the use of the sock/inet_sock_set_state tracepoint, intended > for ~5.15 kernels and later. Tried replacing this with > > fbt::tcp_set_state:entry > > but this misses a few state transitions, so stuck with using > the tracepoint. > > All tests under test/unittest/tcp pass unmodified on an upstream > (6.15) kernel and 5.15 UEK7U3 kernel. > > It implements all documented TCP provider probes: > > accept-established, accept-refused, connnect-request, > connect-established, connect-refused, receive, send, > state-change > > Changes since RFC: > > - fixed issues with test failures on UEK7 due to missing > SYN_RCV state change > - moved get_member() to dt_cg.c (patch 1) > > Alan Maguire (4): > dtrace: move get_member() to dt_cg.c > dt_impl: bump number of TSLOTS to 8 > dtrace: add tcp provider > dtrace: sync dlibs with tcp.d, ip.d and net.d changes > > dlibs/aarch64/5.14/ip.d | 1 - > dlibs/aarch64/5.14/net.d | 6 +- > dlibs/aarch64/5.14/tcp.d | 52 ++--- > dlibs/aarch64/5.16/ip.d | 1 - > dlibs/aarch64/5.16/net.d | 6 +- > dlibs/aarch64/5.16/tcp.d | 52 ++--- > dlibs/aarch64/6.1/ip.d | 1 - > dlibs/aarch64/6.1/net.d | 6 +- > dlibs/aarch64/6.1/tcp.d | 52 ++--- > dlibs/aarch64/6.10/ip.d | 1 - > dlibs/aarch64/6.10/net.d | 6 +- > dlibs/aarch64/6.10/tcp.d | 52 ++--- > dlibs/x86_64/5.14/ip.d | 1 - > dlibs/x86_64/5.14/net.d | 6 +- > dlibs/x86_64/5.14/tcp.d | 52 ++--- > dlibs/x86_64/5.16/ip.d | 1 - > dlibs/x86_64/5.16/net.d | 6 +- > dlibs/x86_64/5.16/tcp.d | 52 ++--- > dlibs/x86_64/6.1/ip.d | 1 - > dlibs/x86_64/6.1/net.d | 6 +- > dlibs/x86_64/6.1/tcp.d | 52 ++--- > dlibs/x86_64/6.10/ip.d | 1 - > dlibs/x86_64/6.10/net.d | 6 +- > dlibs/x86_64/6.10/tcp.d | 52 ++--- > libdtrace/Build | 2 + > libdtrace/dt_cg.c | 39 ++++ > libdtrace/dt_cg.h | 2 + > libdtrace/dt_impl.h | 2 +- > libdtrace/dt_prov_ip.c | 45 +---- > libdtrace/dt_prov_tcp.c | 405 +++++++++++++++++++++++++++++++++++++++ > libdtrace/dt_provider.c | 1 + > libdtrace/dt_provider.h | 1 + > libdtrace/ip.d | 1 - > libdtrace/net.d | 6 +- > libdtrace/tcp.d | 52 ++--- > 35 files changed, 761 insertions(+), 267 deletions(-) > create mode 100644 libdtrace/dt_prov_tcp.c > From kris.van.hees at oracle.com Tue Jul 1 19:27:33 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Tue, 1 Jul 2025 15:27:33 -0400 Subject: [DTrace-devel] [PATCH v2 0/4] DTrace TCP provider In-Reply-To: References: <20250610135813.15746-1-alan.maguire@oracle.com> Message-ID: On Tue, Jul 01, 2025 at 03:08:59PM -0400, Eugene Loh wrote: > Incidentally, looking at the subject lines for the patch series, I see: > > ??????? dtrace: move get_member() to dt_cg.c > ??????? dt_impl: bump number of TSLOTS to 8 > ??????? dtrace: add tcp provider > ??????? dtrace: sync dlibs with tcp.d, ip.d and net.d changes I would suggest: cg: move get_member() to dt_cg.c cg: bump number of TSLOTS to 8 tcp: new provider -or- Implement the tcp provider dlibs: sync dlibs with tcp.d, ip.d and net.d changes > That is, each line is prefixed with a component name.? We do that a lot for > "test:", but otherwise the practice seems to depend on... the patch author?? > Anyhow, I claim "dtrace:" is not very useful.? For patch 1, all the prefix > is saying is "there is movement in the dtrace code base."? I think the > subject would be more direct without the prefix.? Same for patch 2.? For > patch 3, if one wanted to use a component prefix, I'd think one would use > "tcp:". > > Historically, it looks like we've added new providers with subject lines > like this: > > ??? rawfbt: new provider > ??? Implement the io provider > ??? Implement the ip provider > ??? Implement the lockstat provider > ??? Implement the sched provider (first part) > ??? Implement the proc provider > ??? provider: Implement a rawtp provider > ??? Add a CPC provider > ??? PID provider implementation > ??? Add a profile provider > ??? Implement BEGIN and END probes for the dtrace provider > ??? Added IO provider support for 4.14 kernels > > Everyone has their own style.? Shrug.? No big deal. > > Anyhow, personally, I think the prefixes don't add much, especially for > "dtrace:".? My suggestion is to drop the prefixes, but... your call. > > On 6/10/25 09:58, Alan Maguire wrote: > > > This series is a first draft of TCP provider support, where the > > probes are implemented via underlying fbt and sdt probes. > > > > Due to the use of the sock/inet_sock_set_state tracepoint, intended > > for ~5.15 kernels and later. Tried replacing this with > > > > fbt::tcp_set_state:entry > > > > but this misses a few state transitions, so stuck with using > > the tracepoint. > > > > All tests under test/unittest/tcp pass unmodified on an upstream > > (6.15) kernel and 5.15 UEK7U3 kernel. > > > > It implements all documented TCP provider probes: > > > > accept-established, accept-refused, connnect-request, > > connect-established, connect-refused, receive, send, > > state-change > > > > Changes since RFC: > > > > - fixed issues with test failures on UEK7 due to missing > > SYN_RCV state change > > - moved get_member() to dt_cg.c (patch 1) > > > > Alan Maguire (4): > > dtrace: move get_member() to dt_cg.c > > dt_impl: bump number of TSLOTS to 8 > > dtrace: add tcp provider > > dtrace: sync dlibs with tcp.d, ip.d and net.d changes > > > > dlibs/aarch64/5.14/ip.d | 1 - > > dlibs/aarch64/5.14/net.d | 6 +- > > dlibs/aarch64/5.14/tcp.d | 52 ++--- > > dlibs/aarch64/5.16/ip.d | 1 - > > dlibs/aarch64/5.16/net.d | 6 +- > > dlibs/aarch64/5.16/tcp.d | 52 ++--- > > dlibs/aarch64/6.1/ip.d | 1 - > > dlibs/aarch64/6.1/net.d | 6 +- > > dlibs/aarch64/6.1/tcp.d | 52 ++--- > > dlibs/aarch64/6.10/ip.d | 1 - > > dlibs/aarch64/6.10/net.d | 6 +- > > dlibs/aarch64/6.10/tcp.d | 52 ++--- > > dlibs/x86_64/5.14/ip.d | 1 - > > dlibs/x86_64/5.14/net.d | 6 +- > > dlibs/x86_64/5.14/tcp.d | 52 ++--- > > dlibs/x86_64/5.16/ip.d | 1 - > > dlibs/x86_64/5.16/net.d | 6 +- > > dlibs/x86_64/5.16/tcp.d | 52 ++--- > > dlibs/x86_64/6.1/ip.d | 1 - > > dlibs/x86_64/6.1/net.d | 6 +- > > dlibs/x86_64/6.1/tcp.d | 52 ++--- > > dlibs/x86_64/6.10/ip.d | 1 - > > dlibs/x86_64/6.10/net.d | 6 +- > > dlibs/x86_64/6.10/tcp.d | 52 ++--- > > libdtrace/Build | 2 + > > libdtrace/dt_cg.c | 39 ++++ > > libdtrace/dt_cg.h | 2 + > > libdtrace/dt_impl.h | 2 +- > > libdtrace/dt_prov_ip.c | 45 +---- > > libdtrace/dt_prov_tcp.c | 405 +++++++++++++++++++++++++++++++++++++++ > > libdtrace/dt_provider.c | 1 + > > libdtrace/dt_provider.h | 1 + > > libdtrace/ip.d | 1 - > > libdtrace/net.d | 6 +- > > libdtrace/tcp.d | 52 ++--- > > 35 files changed, 761 insertions(+), 267 deletions(-) > > create mode 100644 libdtrace/dt_prov_tcp.c > > From kris.van.hees at oracle.com Tue Jul 1 19:42:09 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Tue, 1 Jul 2025 15:42:09 -0400 Subject: [DTrace-devel] [PATCH v4 2/5] support stapsdt ELF-note-defined static probes In-Reply-To: <20250623101310.1649756-3-alan.maguire@oracle.com> References: <20250623101310.1649756-1-alan.maguire@oracle.com> <20250623101310.1649756-3-alan.maguire@oracle.com> Message-ID: I am looking a bit deeper into this patch. See my other email concerning the args test that is not passing because it fails to get the function name. I believe the problem is that the code here does not handle PIE-compiled code (which is default for e.g. Debian, but not OL). Also, I am trying to see whether we can integrate the parsing of the note format in usdt_parser_notes.c so that we can centralize all ELF notes parsing related to USDT in a single location. Maybe it would be better, maybe not - I'm evaluating. Another thing... you are performing addr-to-map lookups for every address (i.e. for every note) even though you are processing notes for a single mapping in the loop, so the map should be the same for all the addresses, right? I don't think that the ELF notes for mapping A can refer to probes (by address) that belong in mapping B - so that cam be optimized I think. Do you think the semaphore can be implemented as well, since that is somewhat similar to is-enabled probes I think? On Mon, Jun 23, 2025 at 11:13:07AM +0100, Alan Maguire wrote: > As well as using dtrace -G to generate USDT probes, programs and > libraries may have added static probes via stapsdt ELF notes. > > Read ELF notes from binaries from /proc/ maps associated with processes > and parse them to retrieve uprobe address and argument-related info > to create the associated uprobe. > > Probe arguments can be either constants, register values or dereferences > or dereferences from register values (plus offset), identical to the > updated USDT ELF note handling. > > A new provider - stapsdt - implements this support, as stapsdt probes do > not dynamically register themselves with DTrace. This makes them less > powerful than DTrace-based USDT probes, but they do exist in programs and > libraries so should be supported. > > As well as supporting ELF-note stapsdt defined probes in programs and > libraries, this patch supports dynamically-created probes that > are created via libstapsdt [1]. libstapsdt allows dynamic languages > like python to declare and fire probes by dynamically creating > a memfd-based shared library containing ELF notes for the probes. > With these changes we can also trace these probes. This is very > useful since libstapsdt has python, NodeJS, go and luaJIT bindings. > > [1] https://github.com/linux-usdt/libstapsdt > > Signed-off-by: Alan Maguire > --- > include/dtrace/pid.h | 1 + > libdtrace/dt_pid.c | 288 +++++++++++++++++++++++++++++++++++++ > libdtrace/dt_prov_uprobe.c | 43 +++++- > 3 files changed, 328 insertions(+), 4 deletions(-) > > diff --git a/include/dtrace/pid.h b/include/dtrace/pid.h > index 8d4b6432..99093bc9 100644 > --- a/include/dtrace/pid.h > +++ b/include/dtrace/pid.h > @@ -24,6 +24,7 @@ typedef enum pid_probetype { > DTPPT_OFFSETS, > DTPPT_ABSOFFSETS, > DTPPT_USDT, > + DTPPT_STAPSDT, > DTPPT_IS_ENABLED > } pid_probetype_t; > > diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c > index d12b7919..6581b087 100644 > --- a/libdtrace/dt_pid.c > +++ b/libdtrace/dt_pid.c > @@ -38,6 +38,9 @@ > #include > #include > > +#define SEC_STAPSDT_NOTE ".note.stapsdt" > +#define NAME_STAPSDT_NOTE "stapsdt" > + > /* > * Information on a PID probe. > */ > @@ -1262,6 +1265,288 @@ dt_pid_create_pid_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *p > return err; > } > > +static int > +dt_stapsdt_parse(dtrace_hdl_t *dtp, dt_proc_t *dpr, dtrace_probedesc_t *pdp, > + dt_pcb_t *pcb, const dt_provider_t *pvp, char *path, > + unsigned long base_addr) > +{ > + Elf *elf = NULL; > + Elf_Scn *scn = NULL; > + GElf_Shdr shdr; > + GElf_Nhdr nhdr; > + size_t shstrndx, noff, doff, off, n; > + Elf_Data *data; > + GElf_Ehdr ehdr; > + int i, err = 0; > + int fd = -1; > + char *mod; > + > + fd = open(path, O_RDONLY); > + if (fd < 0) { > + dt_pid_error(dtp, pcb, dpr, D_PROC_USDT, > + "Cannot open %s: %s\n", > + path, strerror(errno)); > + return -1; > + } > + mod = strrchr(path, '/'); > + if (mod) > + mod++; > + else > + mod = path; > + > + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); // ELF_C_READ ? > + assert(elf_kind(elf) == ELF_K_ELF); > + elf_getshdrstrndx(elf, &shstrndx); > + > + if (gelf_getehdr(elf, &ehdr)) { > + switch (ehdr.e_type) { > + case ET_EXEC: > + /* binary does not require base addr adjustment */ > + base_addr = 0; > + break; > + case ET_DYN: > + break; > + default: > + dt_dprintf("unexpected ELF hdr type 0x%x for '%s'\n", > + ehdr.e_type, path); > + err = -1; > + goto out; > + } > + } > + > + while ((scn = elf_nextscn(elf, scn)) != NULL) { > + char *secname; > + > + assert(gelf_getshdr(scn, &shdr) != NULL); > + > + secname = elf_strptr(elf, shstrndx, shdr.sh_name); > + if (strcmp(secname, SEC_STAPSDT_NOTE) == 0 && > + shdr.sh_type == SHT_NOTE) > + break; > + } > + /* No ELF notes, just bail. */ > + if (scn == NULL) > + goto out; > + data = elf_getdata(scn, 0); > + for (off = 0; > + (off = gelf_getnote(data, off, &nhdr, &noff, &doff)) > 0;) { > + pid_probespec_t psp = {0}; > + char *prv, *prb; > + const char *fun; > + char *dbuf = (char *)data->d_buf; > + long *addrs = data->d_buf + doff; /* 3 addrs are loc/base/semaphore */ > + GElf_Sym sym; > + const prmap_t *pmp; > + > + if (strncmp(dbuf + noff, NAME_STAPSDT_NOTE, nhdr.n_namesz) != 0) > + continue; > + prv = dbuf + doff + (3*sizeof(long)); > + /* ensure prv/prb is null-terminated */ > + if (strlen(prv) >= nhdr.n_descsz) > + continue; > + prb = prv + strlen(prv) + 1; > + if (strlen(prb) >= nhdr.n_descsz) > + continue; > + if (strncmp(pdp->prv, prv, strlen(prv)) != 0) > + continue; > + /* skip unmatched, non-wildcarded probes */ > + if (strcmp(pdp->prb, "*") != 0 && > + (strlen(pdp->prb) > 0 && strcmp(pdp->prb, prb) != 0)) > + continue; > + if (prb + strlen(prb) + 1 < dbuf + doff + nhdr.n_descsz) > + psp.pps_sargv = prb + strlen(prb) + 1; > + > + psp.pps_type = DTPPT_STAPSDT; > + psp.pps_prv = prv; > + psp.pps_mod = mod; > + psp.pps_prb = prb; > + if (elf_getphdrnum(elf, &n)) > + continue; > + for (i = 0; i < n; i++) { > + GElf_Phdr phdr; > + > + if (!gelf_getphdr(elf, i, &phdr)) > + break; > + > + if (addrs[0] < phdr.p_vaddr || > + addrs[0] > phdr.p_vaddr + phdr.p_memsz) > + continue; > + if (base_addr) > + psp.pps_off = addrs[0]; > + else > + psp.pps_off = addrs[0] - phdr.p_vaddr + phdr.p_offset; > + break; > + } > + if (!psp.pps_off) > + continue; > + psp.pps_nameoff = 0; > + > + pmp = Paddr_to_map(dpr->dpr_proc, base_addr + addrs[0]); > + if (!pmp) { > + dt_dprintf("%i: cannot determine 0x%lx's mapping\n", > + Pgetpid(dpr->dpr_proc), psp.pps_off); > + continue; > + } > + psp.pps_fn = Pmap_mapfile_name(dpr->dpr_proc, pmp); > + if (psp.pps_fn == NULL) { > + dt_pid_error(dtp, pcb, dpr, D_PROC_USDT, > + "Cannot get name of mapping containing probe %s for pid %d\n", > + psp.pps_prb, dpr->dpr_pid); > + err = -1; > + break; > + } > + if (dt_Plookup_by_addr(dtp, dpr->dpr_pid, base_addr + addrs[0], > + &fun, &sym) == 0) > + psp.pps_fun = (char *)fun; > + else > + psp.pps_fun = ""; > + psp.pps_dev = pmp->pr_dev; > + psp.pps_inum = pmp->pr_inum; > + psp.pps_pid = dpr->dpr_pid; > + psp.pps_nameoff = 0; > + > + if (pvp->impl->provide_probe(dtp, &psp) < 0) { > + dt_pid_error(dtp, pcb, dpr, D_PROC_USDT, > + "failed to instantiate probe %s for pid %d: %s", > + psp.pps_prb, psp.pps_pid, > + dtrace_errmsg(dtp, dtrace_errno(dtp))); > + err = -1; > + } > + free(psp.pps_fn); > + if (err == -1) > + break; > + } > + > +out: > + elf_end(elf); > + close(fd); > + return err; > +} > + > +static void > +dt_pid_create_stapsdt_probes_proc(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, > + dt_pcb_t *pcb, const dt_provider_t *pvp, > + dt_proc_t *dpr, const char *proc_map) > +{ > + char line[1024]; > + FILE *fp = NULL; > + pid_t pid; > + > + assert(dpr != NULL); > + > + pid = dpr->dpr_pid; > + fp = fopen(proc_map, "r"); > + if (!fp) > + return; > + > + while (fgets(line, sizeof(line) - 1, fp) != NULL) { > + long addr_start, addr_end, file_offset; > + long dev_major, dev_minor; > + unsigned long inode; > + char name[PATH_MAX + 1]; > + char path[PATH_MAX + 1]; > + char perm[5]; > + int ret; > + > + ret = sscanf(line, > + "%lx-%lx %4s %lx %lx:%lx %lu %[^\n]", > + &addr_start, &addr_end, perm, &file_offset, > + &dev_major, &dev_minor, &inode, name); > + if (ret != 8 || !strchr(perm, 'x') || strchr(name, '[') != NULL) > + continue; > + > + /* libstapsdt uses an memfd-based library to dynamically create > + * stapsdt notes for dynamic languages like python; we need > + * the associated /proc//fds/ fd to read these notes. > + */ > + if (strncmp(name, "/memfd:", strlen("/memfd:")) == 0) { > + DIR *d; > + struct dirent *dirent; > + char *deleted; > + > + deleted = strstr(name, " (deleted)"); > + if (deleted) > + *deleted = '\0'; > + snprintf(path, sizeof(path), "/proc/%d/fd", pid); > + d = opendir(path); > + if (d == NULL) > + continue; > + while ((dirent = readdir(d)) != NULL) { > + struct stat s; > + > + snprintf(path, sizeof(path), "/proc/%d/fd/%s", > + pid, dirent->d_name); > + if (stat(path, &s) != 0 || s.st_ino != inode) > + continue; > + if (dt_stapsdt_parse(dtp, dpr, pdp, pcb, pvp, > + path, addr_start) != 0) > + break; > + } > + } else { > + if (dt_stapsdt_parse(dtp, dpr, pdp, pcb, pvp, name, > + addr_start) != 0) > + break; > + } > + } > + fclose(fp); > +} > + > +static int > +dt_pid_create_stapsdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb) > +{ > + int i, nmatches = 0, err = 0; > + const dt_provider_t *pvp; > + char *globpat = NULL; > + const char *pidstr; > + glob_t globbuf; > + bool wildcard; > + pid_t pid; > + > + assert(pcb != NULL); > + > + pidstr = &pdp->prv[strlen(pdp->prv)]; > + > + while (isdigit(*(pidstr - 1)) || *(pidstr - 1) == '*') > + pidstr--; > + if (strlen(pidstr) == 0) > + return 0; > + wildcard = strchr(pidstr, '*'); > + asprintf(&globpat, "/proc/%s/maps", pidstr); > + nmatches = glob(globpat, 0, NULL, &globbuf) ? 0 : globbuf.gl_pathc; > + pvp = dt_provider_lookup(dtp, "stapsdt"); > + assert(pvp != NULL); > + > + for (i = 0; i < nmatches; i++) { > + dt_proc_t *dpr = NULL; > + > + pidstr = globbuf.gl_pathv[i] + strlen("/proc/"); > + pid = atoll(pidstr); > + if (pid <= 0) > + continue; > + if (dt_proc_grab_lock(dtp, pid, DTRACE_PROC_WAITING | > + DTRACE_PROC_SHORTLIVED) < 0) { > + if (wildcard) > + continue; > + dt_pid_error(dtp, pcb, NULL, D_PROC_GRAB, > + "failed to grab process %d", > + (int)pid); > + err = 1; > + break; > + } > + dpr = dt_proc_lookup(dtp, pid); > + if (dpr) { > + dt_pid_create_stapsdt_probes_proc(pdp, dtp, pcb, > + pvp, dpr, > + globbuf.gl_pathv[i]); > + dt_proc_release_unlock(dtp, pid); > + } > + } > + free(globpat); > + globfree(&globbuf); > + > + return err; > +} > + > int > dt_pid_create_usdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb) > { > @@ -1319,6 +1604,9 @@ dt_pid_create_usdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t * > free(globpat); > globfree(&globbuf); > > + if (err == 0) > + err = dt_pid_create_stapsdt_probes(pdp, dtp, pcb); > + > /* If no errors, report success. */ > if (err == 0) > return 0; > diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c > index 2cbd8910..b91cf810 100644 > --- a/libdtrace/dt_prov_uprobe.c > +++ b/libdtrace/dt_prov_uprobe.c > @@ -313,12 +313,15 @@ static const dtrace_pattr_t pattr = { > > dt_provimpl_t dt_pid; > dt_provimpl_t dt_usdt; > +dt_provimpl_t dt_stapsdt; > > static int populate(dtrace_hdl_t *dtp) > { > if (dt_provider_create(dtp, dt_uprobe.name, &dt_uprobe, &pattr, > NULL) == NULL || > dt_provider_create(dtp, dt_pid.name, &dt_pid, &pattr, > + NULL) == NULL || > + dt_provider_create(dtp, dt_stapsdt.name, &dt_stapsdt, &pattr, > NULL) == NULL) > return -1; /* errno already set */ > > @@ -477,8 +480,8 @@ clean_usdt_probes(dtrace_hdl_t *dtp) > > prp_next = dt_list_next(prp); > > - /* Make sure it is an overlying USDT probe. */ > - if (prp->prov->impl != &dt_usdt) > + /* Make sure it is an overlying USDT, stapsdt probe. */ > + if (prp->prov->impl != &dt_usdt && prp->prov->impl != &dt_stapsdt) > continue; > > /* FIXME passing in NULL pcb and dpr wreaks havoc on error reporting? */ > @@ -637,6 +640,7 @@ static int add_probe_uprobe(dtrace_hdl_t *dtp, dt_probe_t *prp) > return 0; > } > > +/* shared between usdt, stapsdt probes */ > static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp) > { > char probnam[DTRACE_FULLNAMELEN], *p; > @@ -890,6 +894,7 @@ static dt_probe_t *create_underlying(dtrace_hdl_t *dtp, > case DTPPT_OFFSETS: > case DTPPT_ABSOFFSETS: > case DTPPT_USDT: > + case DTPPT_STAPSDT: > snprintf(prb, sizeof(prb), "%lx", psp->pps_off); > break; > default: > @@ -904,7 +909,7 @@ static dt_probe_t *create_underlying(dtrace_hdl_t *dtp, > pd.prb = prb; > > dt_dprintf("Providing underlying probe %s:%s:%s:%s @ %lx\n", psp->pps_prv, > - psp->pps_mod, psp->pps_fn, psp->pps_prb, psp->pps_off); > + psp->pps_mod, psp->pps_fun, psp->pps_prb, psp->pps_off); > uprp = dt_probe_lookup(dtp, &pd); > if (uprp == NULL) { > dt_provider_t *pvp; > @@ -1108,11 +1113,24 @@ static int provide_usdt_probe(dtrace_hdl_t *dtp, const pid_probespec_t *psp) > return provide_probe(dtp, psp, psp->pps_prb, &dt_usdt, PP_IS_FUNCALL); > } > > +static int provide_stapsdt_probe(dtrace_hdl_t *dtp, const pid_probespec_t *psp) > +{ > + if (psp->pps_type != DTPPT_STAPSDT && > + psp->pps_type != DTPPT_IS_ENABLED) { > + dt_dprintf("pid: unknown stapsdt probe type %i\n", psp->pps_type); > + return -1; > + } > + > + return provide_probe(dtp, psp, psp->pps_prb, &dt_stapsdt, PP_IS_FUNCALL); > +} > + > + > static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp, int is_usdt) > { > const list_probe_t *pup; > > - assert(prp->prov->impl == &dt_pid || prp->prov->impl == &dt_usdt); > + assert(prp->prov->impl == &dt_pid || prp->prov->impl == &dt_usdt || > + prp->prov->impl == &dt_stapsdt); > > /* > * We need to enable the underlying probes (if not enabled yet). > @@ -1144,6 +1162,11 @@ static void enable_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp) > enable(dtp, prp, 1); > } > > +static void enable_stapsdt(dtrace_hdl_t *dtp, dt_probe_t *prp) > +{ > + enable(dtp, prp, 1); > +} > + > /* > * Generate code that populates, counts the probe arguments. > */ > @@ -1875,3 +1898,15 @@ dt_provimpl_t dt_usdt = { > .discover = &discover, > .add_probe = &add_probe_usdt, > }; > + > +/* > + * Used for stapsdt probes. > + */ > +dt_provimpl_t dt_stapsdt = { > + .name = "stapsdt", > + .prog_type = BPF_PROG_TYPE_UNSPEC, > + .provide_probe = &provide_stapsdt_probe, > + .enable = &enable_stapsdt, > + .probe_destroy = &probe_destroy, > + .add_probe = &add_probe_usdt, > +}; > -- > 2.43.5 > From eugene.loh at oracle.com Tue Jul 1 23:16:07 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Tue, 1 Jul 2025 19:16:07 -0400 Subject: [DTrace-devel] [PATCH v2 3/4] dtrace: add tcp provider In-Reply-To: <20250610135813.15746-4-alan.maguire@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-4-alan.maguire@oracle.com> Message-ID: <8ed88a3b-6e37-a696-d6bb-38745c144995@oracle.com> I'll try reading this some more, but there is much I do not understand.? For now: I think tcp.d needs an updated Copyright year.? Also, there seems to be a missing ">";? check " Based upon various fbt probe points support TCP send, receive, > state-change, accept-established, accept-refused, connect-request, > connect-established and connect-refused probes. > > A few tweaks were needed to tcp.d to support the probes fully. > > Signed-off-by: Alan Maguire > --- > libdtrace/Build | 2 + > libdtrace/dt_prov_tcp.c | 405 ++++++++++++++++++++++++++++++++++++++++ > libdtrace/dt_provider.c | 1 + > libdtrace/dt_provider.h | 1 + > libdtrace/ip.d | 1 - > libdtrace/net.d | 6 +- > libdtrace/tcp.d | 52 +++--- > 7 files changed, 443 insertions(+), 25 deletions(-) > create mode 100644 libdtrace/dt_prov_tcp.c > > diff --git a/libdtrace/Build b/libdtrace/Build > index 7e6e8a38..a5439354 100644 > --- a/libdtrace/Build > +++ b/libdtrace/Build > @@ -59,6 +59,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \ > dt_prov_sched.c \ > dt_prov_sdt.c \ > dt_prov_syscall.c \ > + dt_prov_tcp.c \ > dt_prov_uprobe.c \ > dt_provider.c \ > dt_provider_sdt.c \ > @@ -117,6 +118,7 @@ dt_prov_rawtp.c_CFLAGS := -Wno-pedantic > dt_prov_sched.c_CFLAGS := -Wno-pedantic > dt_prov_sdt.c_CFLAGS := -Wno-pedantic > dt_prov_syscall.c_CFLAGS := -Wno-pedantic > +dt_prov_tcp.c_CFLAGS := -Wno-pedantic > dt_prov_uprobe.c_CFLAGS := -Wno-pedantic > dt_debug.c_CFLAGS := -Wno-prio-ctor-dtor > > diff --git a/libdtrace/dt_prov_tcp.c b/libdtrace/dt_prov_tcp.c > new file mode 100644 > index 00000000..75e1e3a9 > --- /dev/null > +++ b/libdtrace/dt_prov_tcp.c > @@ -0,0 +1,405 @@ > +/* > + * Oracle Linux DTrace. > + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. > + * Licensed under the Universal Permissive License v 1.0 as shown at > + * http://oss.oracle.com/licenses/upl. > + * > + * The 'tcp' SDT provider for DTrace-specific probes. > + */ > +#include > +#include > +#include > + > +#include "dt_dctx.h" > +#include "dt_cg.h" > +#include "dt_provider_sdt.h" > +#include "dt_probe.h" > + > +static const char prvname[] = "tcp"; > +static const char modname[] = "vmlinux"; > + > +enum { > + NET_PROBE_OUTBOUND = 0, > + NET_PROBE_INBOUND, > + NET_PROBE_STATE > +}; > + > +static probe_dep_t probes[] = { > + /* does not fire on UEK7 unless rawfbt; no idea why... */ > + { "accept-established", > + DTRACE_PROBESPEC_NAME, "rawfbt::tcp_init_transfer:entry" }, > + { "accept-refused", > + DTRACE_PROBESPEC_NAME, "fbt::tcp_v4_send_reset:entry" }, > + { "accept-refused", > + DTRACE_PROBESPEC_NAME, "fbt::tcp_v6_send_reset:entry" }, > + { "connect-established", > + DTRACE_PROBESPEC_NAME, "fbt::tcp_finish_connect:entry" }, > + { "connect-refused", > + DTRACE_PROBESPEC_NAME, "fbt::tcp_reset:entry" }, > + { "connect-request", > + DTRACE_PROBESPEC_NAME, "fbt::ip_queue_xmit:entry" }, > + /* ip6_xmit has > 6 args so cannot fentry on aarch64; use rawfbt */ > + { "connect-request", > + DTRACE_PROBESPEC_NAME, "rawfbt::ip6_xmit:entry" }, > + { "receive", > + DTRACE_PROBESPEC_NAME, "fbt::tcp_rcv_established:entry" }, > + { "receive", > + DTRACE_PROBESPEC_NAME, "fbt::tcp_rcv_state_process:entry" }, > + { "receive", > + DTRACE_PROBESPEC_NAME, "fbt::tcp_v4_send_reset:entry" }, > + { "send", > + DTRACE_PROBESPEC_NAME, "fbt::ip_queue_xmit:entry" }, > + /* ip_send_unicast_reply has 10 args so cannot fentry; use rawfbt */ > + { "send", > + DTRACE_PROBESPEC_NAME, "rawfbt::ip_send_unicast_reply:entry" }, > + { "send", > + DTRACE_PROBESPEC_NAME, "fbt::ip_build_and_send_pkt" }, > + /* ip6_xmit has > 6 args so cannot fentry on aarch64; use rawfbt */ > + { "send", > + DTRACE_PROBESPEC_NAME, "rawfbt::ip6_xmit:entry" }, > + { "state-change", > + DTRACE_PROBESPEC_NAME, "sdt:::inet_sock_set_state" }, > + { "state-change", > + DTRACE_PROBESPEC_NAME, "fbt::tcp_time_wait:entry" }, > + { "state-change", > + DTRACE_PROBESPEC_NAME, "fbt::inet_csk_clone_lock:entry" }, > + { NULL, } > +}; > + > +static probe_arg_t probe_args[] = { > + { "accept-established", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } }, > + { "accept-established", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, > + { "accept-established", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, > + { "accept-established", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, > + { "accept-established", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, > + { "accept-established", 5, { 5, 0, "unsigned char", "int"} }, > + { "accept-established", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, > + { "accept-established", 7, { 7, 0, "int", "int" } }, > + > + { "accept-refused", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } }, > + { "accept-refused", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, > + { "accept-refused", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, > + { "accept-refused", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, > + { "accept-refused", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, > + { "accept-refused", 5, { 5, 0, "unsigned char", "int"} }, > + { "accept-refused", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, > + { "accept-refused", 7, { 7, 0, "int", "int" } }, > + > + { "connect-established", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } }, > + { "connect-established", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, > + { "connect-established", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, > + { "connect-established", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, > + { "connect-established", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, > + { "connect-established", 5, { 5, 0, "unsigned char", "int"} }, > + { "connect-established", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, > + { "connect-established", 7, { 7, 0, "int", "int" } }, > + > + { "connect-refused", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } }, > + { "connect-refused", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, > + { "connect-refused", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, > + { "connect-refused", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, > + { "connect-refused", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, > + { "connect-refused", 5, { 5, 0, "unsigned char", "int"} }, > + { "connect-refused", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, > + { "connect-refused", 7, { 7, 0, "int", "int" } }, > + > + { "connect-request", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } }, > + { "connect-request", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, > + { "connect-request", 2, { 2, 0, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" } }, > + { "connect-request", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, > + { "connect-request", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, > + { "connect-request", 5, { 5, 0, "unsigned char", "int"} }, > + { "connect-request", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, > + { "connect-request", 7, { 7, 0, "int", "int" } }, > + > + { "receive", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } }, > + { "receive", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, > + { "receive", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, > + { "receive", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, > + { "receive", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, > + { "receive", 5, { 5, 0, "unsigned char", "int"} }, > + { "receive", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, > + { "receive", 7, { 7, 0, "int", "int" } }, > + > + { "send", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } }, > + { "send", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, > + { "send", 2, { 2, 0, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" } }, > + { "send", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, > + { "send", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, > + { "send", 5, { 5, 0, "unsigned char", "int"} }, > + { "send", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, > + { "send", 7, { 7, 0, "int", "int" } }, > + > + { "state-change", 0, { 0, 0, "void *", "void *", } }, > + { "state-change", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, > + { "state-change", 2, { 2, 0, "void *", "void *" } }, > + { "state-change", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, > + { "state-change", 4, { 4, 0, "void *", "void *" } }, > + { "state-change", 5, { 5, 0, "void *", "void *" } }, > + { "state-change", 6, { 6, 0, "struct sock *", "tcplsinfo_t *" } }, > + { "state-change", 7, { 7, 0, "int", "int" } }, > + > + { NULL, } > +}; > + > +static const dtrace_pattr_t pattr = { > +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, > +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, > +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, > +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, > +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, > +}; > + > +/* > + * Provide all the "tcp" SDT probes. > + */ > +static int populate(dtrace_hdl_t *dtp) > +{ > + return dt_sdt_populate(dtp, prvname, modname, &dt_tcp, &pattr, > + probe_args, probes); > +} > + > +/* > + * Generate a BPF trampoline for a SDT probe. > + * > + * The trampoline function is called when a SDT probe triggers, and it must > + * satisfy the following prototype: > + * > + * int dt_tcp(void *data) > + * > + * The trampoline will populate a dt_dctx_t struct and then call the function > + * that implements the compiled D clause. It returns the value that it gets > + * back from that function. > + */ > +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl) > +{ > + dt_irlist_t *dlp = &pcb->pcb_ir; > + dt_probe_t *prp = pcb->pcb_probe; > + dt_probe_t *uprp = pcb->pcb_parent_probe; > + int direction, have_iphdr; > + int skarg = 0, skbarg = 1, tcparg = 0; > + int skarg_maybe_null; > + int skstate = 0; > + > + /* > + * We construct the tcp::: probe arguments as > + * follows: > + * args[0] = skb > + * args[1] = sk > + * args[2] = ip_hdr(skb) [if available] > + * args[3] = sk [struct tcp_sock *] > + * args[4] = tcp_hdr(skb) > + * args[5] = sk->sk_state > + * args[6] = sk->sk_state > + * args[7] = NET_PROBE_INBOUND (0x1) | NET_PROBE_OUTBOUND (0x0) > + */ > + > + if (strcmp(prp->desc->prb, "state-change") == 0) { > + int newstatearg; > + int skip_state = 0; > + int check_proto = IPPROTO_TCP; > + > + /* For pre-6.14 kernels, inet_sock_state_change() to > + * TCP_SYN_RCV is broken in that the cloned socket has > + * not yet copied info of interest like addresses, ports. > + * This is fixed in 6.14 via > + * > + * commit a3a128f611a965fddf8a02dd45716f96e0738e00 > + * Author: Eric Dumazet > + * Date: Wed Feb 12 13:13:28 2025 +0000 > + * > + * inet: consolidate inet_csk_clone_lock() > + * > + * To work around this we trace inet_csk_clone_lock and > + * use the reqsk (arg1) as the means to populate the > + * struct tcpinfo. We need then to explicitly set the > + * state to TCP_SYN_RCV and also skip the case where > + * inet_sock_set_state() specifies TCP_SYN_RCV otherwise > + * we will get a probe double-firing. > + */ > + if (strcmp(uprp->desc->fun, "inet_csk_clone_lock") == 0) { > + skarg = 1; > + newstatearg = 2; > + check_proto = 0; > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), > + BPF_TCP_SYN_RECV)); > + } else if (strcmp(uprp->desc->fun, "tcp_time_wait") == 0) { > + skarg = 0; > + newstatearg = 1; > + } else { > + skarg = 0; > + newstatearg = 2; > + skip_state = BPF_TCP_SYN_RECV; > + } > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skarg))); > + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl)); > + /* check it is a TCP socket */ > + if (check_proto) { > + dt_cg_get_member(pcb, "struct sock", BPF_REG_6, > + "sk_protocol"); > + emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, > + IPPROTO_TCP, exitlbl)); > + } > + /* save sk */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skarg))); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_6)); > + > + /* save new state */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(newstatearg))); > + if (skip_state) { > + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, skip_state, > + exitlbl)); > + } > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(6), BPF_REG_6)); > + > + /* save sk */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(3))); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_6)); > + > + /* save empty args */ > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(0), 0)); > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), 0)); > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(4), 0)); > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(5), 0)); > + > + /* NET_PROBE_STATE */ > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(7), > + NET_PROBE_STATE)); > + return 0; > + } > + > + if (strcmp(prp->desc->prb, "accept-established") == 0) { > + direction = NET_PROBE_OUTBOUND; > + have_iphdr = 1; > + /* skb in arg2 not arg1 */ > + skbarg = 2; > + skarg_maybe_null = 0; > + /* ensure arg1 is BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(1))); > + emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_6, > + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, > + exitlbl)); > + } else if (strcmp(prp->desc->prb, "receive") == 0 || > + strcmp(prp->desc->prb, "accept-refused") == 0) { > + direction = NET_PROBE_INBOUND; > + have_iphdr = 1; > + if (strcmp(uprp->desc->fun, "tcp_v4_send_reset") == 0 || > + strcmp(uprp->desc->fun, "tcp_v6_send_reset") == 0) > + skarg_maybe_null = 1; > + else > + skarg_maybe_null = 0; > + } else if (strcmp(prp->desc->prb, "connect-established") == 0) { > + direction = NET_PROBE_INBOUND; > + have_iphdr = 1; > + skarg_maybe_null = 0; > + } else if (strcmp(prp->desc->prb, "connect-refused") == 0) { > + direction = NET_PROBE_INBOUND; > + have_iphdr = 1; > + skarg_maybe_null = 0; > + skstate = BPF_TCP_SYN_SENT; > + } else { > + direction = NET_PROBE_OUTBOUND; > + if (strcmp(uprp->desc->fun, "ip_send_unicast_reply") == 0) { > + /* NULL sk in arg1 not arg2 (we dont want ctl_sk) */ > + skarg = 1; > + /* skb in arg2 not arg1 */ > + skbarg = 2; > + have_iphdr = 1; > + /* tcp hdr in ip_reply_arg * */ > + tcparg = 6; > + skarg_maybe_null = 1; > + } else if (strcmp(uprp->desc->fun, "ip_build_and_send_pkt") == 0) { > + skarg = 1; > + skbarg = 0; > + have_iphdr = 0; > + skarg_maybe_null = 1; > + } else if (strcmp(prp->desc->prb, "connect-request") == 0) { > + skstate = BPF_TCP_SYN_SENT; > + have_iphdr = 0; > + skarg_maybe_null = 0; > + } else { > + have_iphdr = 0; > + skarg_maybe_null = 0; > + } > + } > + > + /* first save sk to args[3]; this avoids overwriting it when we > + * populate args[0,1] below. > + */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skarg))); > + /* only allow NULL sk for ip_send_unicast_reply() */ > + if (!skarg_maybe_null) > + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl)); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_6)); > + > + /* then save skb to args[0] */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skbarg))); > + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl)); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6)); > + > + /* next save sk to args[1] now that we have skb in args[0] */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(3))); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_6)); > + > + /* > + * ip_hdr(skb) = > + * skb_network_header(skb) = (include/linux/ip.h) > + * skb->head + skb->network_header (include/linux/skbuff.h) > + */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(0))); > + dt_cg_get_member(pcb, "struct sk_buff", BPF_REG_6, "head"); > + if (have_iphdr) > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0)); > + else > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), 0)); > + > + if (have_iphdr) { > + dt_cg_get_member(pcb, "struct sk_buff", BPF_REG_6, > + "network_header"); > + emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0)); > + } > + /* > + * tcp_hdr(skb) = > + * skb_transport_header(skb) = (include/linux/ip.h) > + * skb->head + skb->transport_header (include/linux/skbuff.h) > + */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(tcparg))); > + if (tcparg) { > + /* struct ip_reply_arg * has a kvec containing the tcp header */ > + dt_cg_get_member(pcb, "struct kvec", BPF_REG_6, "iov_base"); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0)); > + } else { > + dt_cg_get_member(pcb, "struct sk_buff", BPF_REG_6, "head"); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0)); > + dt_cg_get_member(pcb, "struct sk_buff", BPF_REG_6, > + "transport_header"); > + emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0)); > + } > + > + if (!skarg_maybe_null) { > + /* save sk state */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(3))); > + dt_cg_get_member(pcb, "struct sock_common", BPF_REG_6, > + "skc_state"); > + /* ensure sk state - if specified - is what we expect */ > + if (skstate) > + emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, skstate, > + exitlbl)); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0)); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(6), BPF_REG_0)); > + } > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(7), direction)); > + > + return 0; > +} > + > +dt_provimpl_t dt_tcp = { > + .name = prvname, > + .prog_type = BPF_PROG_TYPE_UNSPEC, > + .populate = &populate, > + .enable = &dt_sdt_enable, > + .load_prog = &dt_bpf_prog_load, > + .trampoline = &trampoline, > + .probe_info = &dt_sdt_probe_info, > + .destroy = &dt_sdt_destroy, > +}; > diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c > index 0c621197..798e67ee 100644 > --- a/libdtrace/dt_provider.c > +++ b/libdtrace/dt_provider.c > @@ -41,6 +41,7 @@ const dt_provimpl_t *dt_providers[] = { > &dt_sched, > &dt_sdt, > &dt_syscall, > + &dt_tcp, > &dt_uprobe, > &dt_usdt, > NULL > diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h > index 59a8d62e..4db89b45 100644 > --- a/libdtrace/dt_provider.h > +++ b/libdtrace/dt_provider.h > @@ -87,6 +87,7 @@ extern dt_provimpl_t dt_rawtp; > extern dt_provimpl_t dt_sched; > extern dt_provimpl_t dt_sdt; > extern dt_provimpl_t dt_syscall; > +extern dt_provimpl_t dt_tcp; > extern dt_provimpl_t dt_uprobe; > extern dt_provimpl_t dt_usdt; > > diff --git a/libdtrace/ip.d b/libdtrace/ip.d > index f8b77f12..d59bb436 100644 > --- a/libdtrace/ip.d > +++ b/libdtrace/ip.d > @@ -51,7 +51,6 @@ inline int TCP_MIN_HEADER_LENGTH = 20; > * to the net namespace (nd_net in struct net_device). > */ > typedef uint64_t netstackid_t; > -typedef __be32 ipaddr_t; > typedef struct in6_addr in6_addr_t; > > /* > diff --git a/libdtrace/net.d b/libdtrace/net.d > index 6ac34287..45b5cba3 100644 > --- a/libdtrace/net.d > +++ b/libdtrace/net.d > @@ -25,9 +25,13 @@ typedef struct conninfo { > string ci_protocol; /* protocol (ipv4, ipv6, etc) */ > } conninfo_t; > > +typedef __be32 ipaddr_t; > + > /* > * We use these values to determine if a probe point is associated > - * with sending (outbound) or receiving (inbound). > + * with sending (outbound) or receiving (inbound) or a state-related > + * probe (i.e. neither in our outbound). > */ > inline int NET_PROBE_OUTBOUND = 0x00; > inline int NET_PROBE_INBOUND = 0x01; > +inline int NET_PROBE_STATE = 0x02; > diff --git a/libdtrace/tcp.d b/libdtrace/tcp.d > index 54e310cb..d4beea87 100644 > --- a/libdtrace/tcp.d > +++ b/libdtrace/tcp.d > @@ -8,7 +8,6 @@ > #pragma D depends_on module vmlinux > #pragma D depends_on library net.d > #pragma D depends_on provider ip > -#pragma D depends_on provider tcp > > inline int TH_FIN = 0x01; > inline int TH_SYN = 0x02; > @@ -60,7 +59,7 @@ typedef struct tcpinfo { > uint32_t tcp_seq; /* sequence number */ > uint32_t tcp_ack; /* acknowledgment number */ > uint8_t tcp_offset; /* data offset, in bytes */ > - uint8_t tcp_flags; /* flags */ > + uint16_t tcp_flags; /* flags */ > uint16_t tcp_window; /* window size */ > uint16_t tcp_checksum; /* checksum */ > uint16_t tcp_urgent; /* urgent data pointer */ > @@ -111,13 +110,16 @@ translator tcpinfo_t < struct tcphdr *T > { > tcp_seq = T ? ntohl(T->seq) : 0; > tcp_ack = T ? ntohl(T->ack_seq) : 0; > tcp_offset = T ? (*(uint8_t *)(T + 12) & 0xf0) >> 2 : 0; > - tcp_flags = T ? *(uint8_t *)(T + 13) : 0; > + tcp_flags = T ? *((uint8_t *)T + 13) : 0; > tcp_window = T ? ntohs(T->window) : 0; > tcp_checksum = T ? ntohs(T->check) : 0; > tcp_urgent = T ? ntohs(T->urg_ptr) : 0; > tcp_hdr = (uintptr_t)T; > }; > > +inline int tcp_fullsock[struct tcp_sock *sk] = > + (((struct sock_common *)sk)->skc_state != TCP_STATE_SYN_RECEIVED && > + ((struct sock_common *)sk)->skc_state != TCP_STATE_TIME_WAIT); > /* > * In the main we simply translate from the "struct [tcp_]sock *" to > * a tcpsinfo_t *. However there are a few exceptions: > @@ -158,47 +160,45 @@ translator tcpsinfo_t < struct tcp_sock *T > { > ((uint32_t *)&((struct sock *)T)->__sk_common.skc_v6_daddr)[2] && > ((uint32_t *)&((struct sock *)T)->__sk_common.skc_v6_rcv_saddr)[3]) > : 0; > - tcps_lport = (T && ((struct inet_sock *)T)->inet_sport != 0) ? > + tcps_lport = T && ((struct inet_sock *)T)->inet_sport != 0 && > + tcp_fullsock[T] ? > ntohs(((struct inet_sock *)T)->inet_sport) : > (T && ((struct inet_sock *)T)->inet_sport == 0) ? > - ntohs(((struct sock *)T)->__sk_common.skc_num) : > + ((struct sock *)T)->__sk_common.skc_num : > arg4 != NULL ? > ntohs(arg7 == NET_PROBE_INBOUND ? > - ((struct tcphdr *)arg4)->dest : ((struct tcphdr *)arg4)->source) : > + ((struct tcphdr *)arg4)->dest : > + ((struct tcphdr *)arg4)->source) : > 0; > tcps_rport = T && ((struct sock *)T)->__sk_common.skc_dport != 0 ? > ntohs(((struct sock *)T)->__sk_common.skc_dport) : > arg4 != NULL ? > ntohs(arg7 == NET_PROBE_INBOUND ? > - ((struct tcphdr *)arg4)->source : ((struct tcphdr *)arg4)->dest) : > + ((struct tcphdr *)arg4)->source : > + ((struct tcphdr *)arg4)->dest) : > 0; > tcps_laddr = > T && ((struct sock *)T)->__sk_common.skc_family == AF_INET ? > inet_ntoa(&((struct sock *)T)->__sk_common.skc_rcv_saddr) : > T && ((struct sock *)T)->__sk_common.skc_family == AF_INET6 ? > inet_ntoa6(&((struct sock *)T)->__sk_common.skc_v6_rcv_saddr) : > - arg2 != NULL && (*(uint8_t *)arg2) >> 4 == 4 ? > - inet_ntoa(arg7 == NET_PROBE_INBOUND ? > - &((struct iphdr *)arg2)->daddr : &((struct iphdr *)arg2)->saddr) : > - arg2 != NULL && *((uint8_t *)arg2) >> 4 == 6 ? > - inet_ntoa6(arg7 == NET_PROBE_INBOUND ? > - &((struct ipv6hdr *)arg2)->daddr : > - &((struct ipv6hdr *)arg2)->saddr) : > + arg2 != NULL && (*(uint8_t *)arg2 >> 4) == 4 ? > + inet_ntoa(&((struct iphdr *)arg2)->daddr) : > + arg2 != NULL && (*(uint8_t *)arg2 >> 4) == 6 ? > + inet_ntoa6(&((struct ipv6hdr *)arg2)->daddr) : > ""; > tcps_raddr = > T && ((struct sock *)T)->__sk_common.skc_family == AF_INET ? > inet_ntoa(&((struct sock *)T)->__sk_common.skc_daddr) : > T && ((struct sock *)T)->__sk_common.skc_family == AF_INET6 ? > inet_ntoa6(&((struct sock *)T)->__sk_common.skc_v6_daddr) : > - arg2 != NULL && (*(uint8_t *)arg2) >> 4 == 4 ? > - inet_ntoa(arg7 == NET_PROBE_INBOUND ? > - &((struct iphdr *)arg2)->saddr : &((struct iphdr *)arg2)->daddr) : > - arg2 != NULL && *((uint8_t *)arg2) >> 4 == 6 ? > - inet_ntoa6(arg7 == NET_PROBE_INBOUND ? > - &((struct ipv6hdr *)arg2)->saddr : > - &((struct ipv6hdr *)arg2)->daddr) : > - ""; > - tcps_state = arg6; > + arg2 != NULL && (*(uint8_t *)arg2 >> 4) == 4 ? > + inet_ntoa(&((struct iphdr *)arg2)->saddr) : > + arg2 != NULL && (*(uint8_t *)arg2 >> 4) == 6 ? > + inet_ntoa6(&((struct ipv6hdr *)arg2)->saddr) : > + " + tcps_state = arg7 == NET_PROBE_STATE ? arg6 : > + T ? ((struct sock *)T)->__sk_common.skc_state : 0; > tcps_iss = T ? > T->snd_una - (uint32_t)T->bytes_acked : 0; > tcps_suna = T ? T->snd_una : 0; > @@ -229,3 +229,9 @@ translator tcpsinfo_t < struct tcp_sock *T > { > translator tcplsinfo_t < int I > { > tcps_state = I; > }; > + > +/* For tracepoint, the last state is in the sock state, next passed as arg6 */ > +#pragma D binding "1.6.3" translator > +translator tcplsinfo_t < struct sock *S > { > + tcps_state = S ? S->__sk_common.skc_state : 0; > +}; From alan.maguire at oracle.com Wed Jul 2 14:52:09 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Wed, 2 Jul 2025 15:52:09 +0100 Subject: [DTrace-devel] [PATCH v2 2/4] dt_impl: bump number of TSLOTS to 8 In-Reply-To: <4a079f61-5806-68b8-9051-af918a822adc@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-3-alan.maguire@oracle.com> <4a079f61-5806-68b8-9051-af918a822adc@oracle.com> Message-ID: On 01/07/2025 19:31, Eugene Loh wrote: > Very simple patch, but one key problem.? The changed line is preceded by > a huge comment block that goes to excruciating pains to explain why the > value should be 4.? So, I'm fine with the change itself and I think the > "Because of the complexity..." sentence in the commit message can be > dropped, but the comment block in the file needs to be updated to > explain (with corresponding pains?) why we need 8 (not 9, not 7). > Ah good catch, forgot to do this. I actually think 6 is enough. The additional seems to be a result of the various complex inet_ntoa*() calls in the tcp.d translators; some have ternary operators with multiple inet_ntoa*()s. The origin appears to be dt_cg_ternary_op() and that has the comment: /* * Strings complicate things a bit because dn_left and dn_right might * actually be temporary strings (tstring) *and* in different slots. * We need to allocate a new tstring to hold the result, and copy the * value into the new tstring (and free any tstrings in dn_left and * dn_right). */ So if we have a left and right ternary and both allocate 3 tstring slots for inet_ntoa*()s that means 3*2 = 6 tstring slots should actually be enough. Additional testing suggests that is so; does the above make sense or are there additional things to consider here? I'll update the next version with a comment describing the above anyhow. Thanks for the review! Alan > On 6/10/25 09:58, Alan Maguire wrote: >> Because of the complexity of the TCP translators, more tslots are >> needed. >> >> Signed-off-by: Alan Maguire >> --- >> ? libdtrace/dt_impl.h | 2 +- >> ? 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h >> index 68fb8ec5..10424f9c 100644 >> --- a/libdtrace/dt_impl.h >> +++ b/libdtrace/dt_impl.h >> @@ -218,7 +218,7 @@ typedef struct dt_kern_path { >> ?? * - cleanpath() holds a prepended '/' char, a string, an appended >> '/' char, >> ?? *?? and a terminating NUL char, or STRSZ + 3 chars altogether >> ?? */ >> -#define DT_TSTRING_SLOTS??? 4 >> +#define DT_TSTRING_SLOTS??? 8 >> ? #define DT_TSTRING_SIZE(dtp)??? \ >> ????????? MAX(P2ROUNDUP((dtp)->dt_options[DTRACEOPT_STRSIZE] + 3, 8), \ >> ????????????? 72) From alan.maguire at oracle.com Wed Jul 2 14:52:36 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Wed, 2 Jul 2025 15:52:36 +0100 Subject: [DTrace-devel] [PATCH v2 0/4] DTrace TCP provider In-Reply-To: References: <20250610135813.15746-1-alan.maguire@oracle.com> Message-ID: <8e0fdb92-985b-4a1f-b41f-08c16c2bafc1@oracle.com> On 01/07/2025 20:27, Kris Van Hees wrote: > On Tue, Jul 01, 2025 at 03:08:59PM -0400, Eugene Loh wrote: >> Incidentally, looking at the subject lines for the patch series, I see: >> >> ??????? dtrace: move get_member() to dt_cg.c >> ??????? dt_impl: bump number of TSLOTS to 8 >> ??????? dtrace: add tcp provider >> ??????? dtrace: sync dlibs with tcp.d, ip.d and net.d changes > > I would suggest: > > cg: move get_member() to dt_cg.c > cg: bump number of TSLOTS to 8 > tcp: new provider -or- Implement the tcp provider > dlibs: sync dlibs with tcp.d, ip.d and net.d changes > perfect, thanks! >> That is, each line is prefixed with a component name.? We do that a lot for >> "test:", but otherwise the practice seems to depend on... the patch author?? >> Anyhow, I claim "dtrace:" is not very useful.? For patch 1, all the prefix >> is saying is "there is movement in the dtrace code base."? I think the >> subject would be more direct without the prefix.? Same for patch 2.? For >> patch 3, if one wanted to use a component prefix, I'd think one would use >> "tcp:". >> >> Historically, it looks like we've added new providers with subject lines >> like this: >> >> ??? rawfbt: new provider >> ??? Implement the io provider >> ??? Implement the ip provider >> ??? Implement the lockstat provider >> ??? Implement the sched provider (first part) >> ??? Implement the proc provider >> ??? provider: Implement a rawtp provider >> ??? Add a CPC provider >> ??? PID provider implementation >> ??? Add a profile provider >> ??? Implement BEGIN and END probes for the dtrace provider >> ??? Added IO provider support for 4.14 kernels >> >> Everyone has their own style.? Shrug.? No big deal. >> >> Anyhow, personally, I think the prefixes don't add much, especially for >> "dtrace:".? My suggestion is to drop the prefixes, but... your call. >> >> On 6/10/25 09:58, Alan Maguire wrote: >> >>> This series is a first draft of TCP provider support, where the >>> probes are implemented via underlying fbt and sdt probes. >>> >>> Due to the use of the sock/inet_sock_set_state tracepoint, intended >>> for ~5.15 kernels and later. Tried replacing this with >>> >>> fbt::tcp_set_state:entry >>> >>> but this misses a few state transitions, so stuck with using >>> the tracepoint. >>> >>> All tests under test/unittest/tcp pass unmodified on an upstream >>> (6.15) kernel and 5.15 UEK7U3 kernel. >>> >>> It implements all documented TCP provider probes: >>> >>> accept-established, accept-refused, connnect-request, >>> connect-established, connect-refused, receive, send, >>> state-change >>> >>> Changes since RFC: >>> >>> - fixed issues with test failures on UEK7 due to missing >>> SYN_RCV state change >>> - moved get_member() to dt_cg.c (patch 1) >>> >>> Alan Maguire (4): >>> dtrace: move get_member() to dt_cg.c >>> dt_impl: bump number of TSLOTS to 8 >>> dtrace: add tcp provider >>> dtrace: sync dlibs with tcp.d, ip.d and net.d changes >>> >>> dlibs/aarch64/5.14/ip.d | 1 - >>> dlibs/aarch64/5.14/net.d | 6 +- >>> dlibs/aarch64/5.14/tcp.d | 52 ++--- >>> dlibs/aarch64/5.16/ip.d | 1 - >>> dlibs/aarch64/5.16/net.d | 6 +- >>> dlibs/aarch64/5.16/tcp.d | 52 ++--- >>> dlibs/aarch64/6.1/ip.d | 1 - >>> dlibs/aarch64/6.1/net.d | 6 +- >>> dlibs/aarch64/6.1/tcp.d | 52 ++--- >>> dlibs/aarch64/6.10/ip.d | 1 - >>> dlibs/aarch64/6.10/net.d | 6 +- >>> dlibs/aarch64/6.10/tcp.d | 52 ++--- >>> dlibs/x86_64/5.14/ip.d | 1 - >>> dlibs/x86_64/5.14/net.d | 6 +- >>> dlibs/x86_64/5.14/tcp.d | 52 ++--- >>> dlibs/x86_64/5.16/ip.d | 1 - >>> dlibs/x86_64/5.16/net.d | 6 +- >>> dlibs/x86_64/5.16/tcp.d | 52 ++--- >>> dlibs/x86_64/6.1/ip.d | 1 - >>> dlibs/x86_64/6.1/net.d | 6 +- >>> dlibs/x86_64/6.1/tcp.d | 52 ++--- >>> dlibs/x86_64/6.10/ip.d | 1 - >>> dlibs/x86_64/6.10/net.d | 6 +- >>> dlibs/x86_64/6.10/tcp.d | 52 ++--- >>> libdtrace/Build | 2 + >>> libdtrace/dt_cg.c | 39 ++++ >>> libdtrace/dt_cg.h | 2 + >>> libdtrace/dt_impl.h | 2 +- >>> libdtrace/dt_prov_ip.c | 45 +---- >>> libdtrace/dt_prov_tcp.c | 405 +++++++++++++++++++++++++++++++++++++++ >>> libdtrace/dt_provider.c | 1 + >>> libdtrace/dt_provider.h | 1 + >>> libdtrace/ip.d | 1 - >>> libdtrace/net.d | 6 +- >>> libdtrace/tcp.d | 52 ++--- >>> 35 files changed, 761 insertions(+), 267 deletions(-) >>> create mode 100644 libdtrace/dt_prov_tcp.c >>> From alan.maguire at oracle.com Wed Jul 2 15:06:09 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Wed, 2 Jul 2025 16:06:09 +0100 Subject: [DTrace-devel] [PATCH v2 3/4] dtrace: add tcp provider In-Reply-To: <8ed88a3b-6e37-a696-d6bb-38745c144995@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-4-alan.maguire@oracle.com> <8ed88a3b-6e37-a696-d6bb-38745c144995@oracle.com> Message-ID: On 02/07/2025 00:16, Eugene Loh wrote: > I'll try reading this some more, but there is much I do not understand.? > For now: > > I think tcp.d needs an updated Copyright year.? Also, there seems to be > a missing ">";? check " will fix, thanks! > With this patch, should we also git rm test/unittest/tcp/test.x? > true, will do. > On most VMs, > ??? test/unittest/tcp/tst.ipv4remotetcp.sh > ??? test/unittest/tcp/tst.ipv4remotetcpstate.sh > xfail due to missing remote.? Are we okay with "shrugging our shoulders" > like that? > Yeah, I don't think the remote test is robust enough. Specifically in OCI it seems to always fail. I'd suggest we replace it with creating a network namespace with IP addresses configured on top of veths to simulate the remote case, the codepaths will be the same. I've done this in other test suites and it works well. > Meanwhile, my one non-OCI VM ran those tests.? The first test passes.? > The second one consistently reports > ??? -tcp:::state-change to time-wait - yes > ??? +tcp:::state-change to time-wait - no I hit some of these failure during development; adding the fbt::tcp_time_wait:entry probe helped. Is that inlined or something perhaps (grep tcp_time_wait /proc/kallsyms)? > and occasionally reports stuff like > ??? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): > invalid address (0x1fc0c0000000000) at BPF pc 287 > ??? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): > invalid address (0x225b80000000000) at BPF pc 287 > ah, ok there must be a null deref somewhere. Haven't seen this before; what kernel version/arch is this? > The non-remote tests fail on OL8 UEK6 (x86 and arm). > ??? dtrace: failed to compile script /dev/stdin: > ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of > inet_ntoa arg#1 (ipaddr_t *): > ??? Unknown type name > This is a weird failure; I see it on some systems but not on others. In tcp.d we have #pragma D depends_on library net.d which contains the typedef for ipaddr_t ; it seems that's not enough to pull in the typedef reliably. I suspect there is a timing element involved here in when the net.d library is included. Perhaps there is a better way to define ipaddr_t ; would using a builtin typedef in _dtrace_typedefs_32/64 work better perhaps? > The probe names are > ? ? tcp:ip:*:*?? ? ?? Solaris > ? ? tcp:vmlinux:*:* ? DTv1 > ? ? tcp:vmlinux::* ?? with this patch (that is, no more function) > I guess precedents have already been set for other SDT providers;? so, > okay.? Just noting for my own sake. > > Meanwhile, the typed args[] have changed in number and type from Solaris> to DTv1 to this patch.? Does that merit discussion? > Hmm, that's not intentional (aside from the additional INBOUND/OUTBOUND etc which we use to help inform translation). Do you see other changes aside from them? Thanks! Alan > On 6/10/25 09:58, Alan Maguire wrote: >> Based upon various fbt probe points support TCP send, receive, >> state-change, accept-established, accept-refused, connect-request, >> connect-established and connect-refused probes. >> >> A few tweaks were needed to tcp.d to support the probes fully. >> >> Signed-off-by: Alan Maguire >> --- >> ? libdtrace/Build???????? |?? 2 + >> ? libdtrace/dt_prov_tcp.c | 405 ++++++++++++++++++++++++++++++++++++++++ >> ? libdtrace/dt_provider.c |?? 1 + >> ? libdtrace/dt_provider.h |?? 1 + >> ? libdtrace/ip.d????????? |?? 1 - >> ? libdtrace/net.d???????? |?? 6 +- >> ? libdtrace/tcp.d???????? |? 52 +++--- >> ? 7 files changed, 443 insertions(+), 25 deletions(-) >> ? create mode 100644 libdtrace/dt_prov_tcp.c >> >> diff --git a/libdtrace/Build b/libdtrace/Build >> index 7e6e8a38..a5439354 100644 >> --- a/libdtrace/Build >> +++ b/libdtrace/Build >> @@ -59,6 +59,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \ >> ??????????????? dt_prov_sched.c \ >> ??????????????? dt_prov_sdt.c \ >> ??????????????? dt_prov_syscall.c \ >> +????????????? dt_prov_tcp.c \ >> ??????????????? dt_prov_uprobe.c \ >> ??????????????? dt_provider.c \ >> ??????????????? dt_provider_sdt.c \ >> @@ -117,6 +118,7 @@ dt_prov_rawtp.c_CFLAGS := -Wno-pedantic >> ? dt_prov_sched.c_CFLAGS := -Wno-pedantic >> ? dt_prov_sdt.c_CFLAGS := -Wno-pedantic >> ? dt_prov_syscall.c_CFLAGS := -Wno-pedantic >> +dt_prov_tcp.c_CFLAGS := -Wno-pedantic >> ? dt_prov_uprobe.c_CFLAGS := -Wno-pedantic >> ? dt_debug.c_CFLAGS := -Wno-prio-ctor-dtor >> ? diff --git a/libdtrace/dt_prov_tcp.c b/libdtrace/dt_prov_tcp.c >> new file mode 100644 >> index 00000000..75e1e3a9 >> --- /dev/null >> +++ b/libdtrace/dt_prov_tcp.c >> @@ -0,0 +1,405 @@ >> +/* >> + * Oracle Linux DTrace. >> + * Copyright (c) 2025, Oracle and/or its affiliates. All rights >> reserved. >> + * Licensed under the Universal Permissive License v 1.0 as shown at >> + * http://oss.oracle.com/licenses/upl. >> + * >> + * The 'tcp' SDT provider for DTrace-specific probes. >> + */ >> +#include >> +#include >> +#include >> + >> +#include "dt_dctx.h" >> +#include "dt_cg.h" >> +#include "dt_provider_sdt.h" >> +#include "dt_probe.h" >> + >> +static const char??????? prvname[] = "tcp"; >> +static const char??????? modname[] = "vmlinux"; >> + >> +enum { >> +??? NET_PROBE_OUTBOUND = 0, >> +??? NET_PROBE_INBOUND, >> +??? NET_PROBE_STATE >> +}; >> + >> +static probe_dep_t??? probes[] = { >> +??? /* does not fire on UEK7 unless rawfbt; no idea why... */ >> +??? { "accept-established", >> +????? DTRACE_PROBESPEC_NAME,??? "rawfbt::tcp_init_transfer:entry" }, >> +??? { "accept-refused", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::tcp_v4_send_reset:entry" }, >> +??? { "accept-refused", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::tcp_v6_send_reset:entry" }, >> +??? { "connect-established", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::tcp_finish_connect:entry" }, >> +??? { "connect-refused", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::tcp_reset:entry" }, >> +??? { "connect-request", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::ip_queue_xmit:entry" }, >> +??? /* ip6_xmit has > 6 args so cannot fentry on aarch64; use rawfbt */ >> +??? { "connect-request", >> +????? DTRACE_PROBESPEC_NAME,??? "rawfbt::ip6_xmit:entry" }, >> +??? { "receive", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::tcp_rcv_established:entry" }, >> +??? { "receive", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::tcp_rcv_state_process:entry" }, >> +??? { "receive", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::tcp_v4_send_reset:entry" }, >> +??? { "send", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::ip_queue_xmit:entry" }, >> +??? /* ip_send_unicast_reply has 10 args so cannot fentry; use rawfbt */ >> +??? { "send", >> +????? DTRACE_PROBESPEC_NAME,??? "rawfbt::ip_send_unicast_reply:entry" }, >> +??? { "send", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::ip_build_and_send_pkt" }, >> +??? /* ip6_xmit has > 6 args so cannot fentry on aarch64; use rawfbt */ >> +??? { "send", >> +????? DTRACE_PROBESPEC_NAME,??? "rawfbt::ip6_xmit:entry" }, >> +??? { "state-change", >> +????? DTRACE_PROBESPEC_NAME,??? "sdt:::inet_sock_set_state" }, >> +??? { "state-change", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::tcp_time_wait:entry" }, >> +??? { "state-change", >> +????? DTRACE_PROBESPEC_NAME,??? "fbt::inet_csk_clone_lock:entry" }, >> +??? { NULL, } >> +}; >> + >> +static probe_arg_t probe_args[] = { >> +??? { "accept-established", 0, { 0, 0, "struct sk_buff *", "pktinfo_t >> *" } }, >> +??? { "accept-established", 1, { 1, 0, "struct sock *", "csinfo_t >> *" } }, >> +??? { "accept-established", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, >> +??? { "accept-established", 3, { 3, 0, "struct tcp_sock *", >> "tcpsinfo_t *" } }, >> +??? { "accept-established", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t >> *" } }, >> +??? { "accept-established", 5, { 5, 0, "unsigned char", "int"} }, >> +??? { "accept-established", 6, { 6, 0, "unsigned char", "tcplsinfo_t >> *" } }, >> +??? { "accept-established", 7, { 7, 0, "int", "int" } }, >> + >> +??? { "accept-refused", 0, { 0, 0, "struct sk_buff *", "pktinfo_t >> *" } }, >> +??? { "accept-refused", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, >> +??? { "accept-refused", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, >> +??? { "accept-refused", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t >> *" } }, >> +??? { "accept-refused", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, >> +??? { "accept-refused", 5, { 5, 0, "unsigned char", "int"} }, >> +??? { "accept-refused", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, >> +??? { "accept-refused", 7, { 7, 0, "int", "int" } }, >> + >> +??? { "connect-established", 0, { 0, 0, "struct sk_buff *", >> "pktinfo_t *" } }, >> +??? { "connect-established", 1, { 1, 0, "struct sock *", "csinfo_t >> *" } }, >> +??? { "connect-established", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, >> +??? { "connect-established", 3, { 3, 0, "struct tcp_sock *", >> "tcpsinfo_t *" } }, >> +??? { "connect-established", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t >> *" } }, >> +??? { "connect-established", 5, { 5, 0, "unsigned char", "int"} }, >> +??? { "connect-established", 6, { 6, 0, "unsigned char", "tcplsinfo_t >> *" } }, >> +??? { "connect-established", 7, { 7, 0, "int", "int" } }, >> + >> +??? { "connect-refused", 0, { 0, 0, "struct sk_buff *", "pktinfo_t >> *" } }, >> +??? { "connect-refused", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, >> +??? { "connect-refused", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, >> +??? { "connect-refused", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t >> *" } }, >> +??? { "connect-refused", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t >> *" } }, >> +??? { "connect-refused", 5, { 5, 0, "unsigned char", "int"} }, >> +??? { "connect-refused", 6, { 6, 0, "unsigned char", "tcplsinfo_t >> *" } }, >> +??? { "connect-refused", 7, { 7, 0, "int", "int" } }, >> + >> +??? { "connect-request", 0, { 0, 0, "struct sk_buff *", "pktinfo_t >> *" } }, >> +??? { "connect-request", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, >> +??? { "connect-request", 2, { 2, 0, "__dtrace_tcp_void_ip_t *", >> "ipinfo_t *" } }, >> +??? { "connect-request", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t >> *" } }, >> +??? { "connect-request", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t >> *" } }, >> +??? { "connect-request", 5, { 5, 0, "unsigned char", "int"} }, >> +??? { "connect-request", 6, { 6, 0, "unsigned char", "tcplsinfo_t >> *" } }, >> +??? { "connect-request", 7, { 7, 0, "int", "int" } }, >> + >> +??? { "receive", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } }, >> +??? { "receive", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, >> +??? { "receive", 2, { 2, 0, "void_ip_t *", "ipinfo_t *" } }, >> +??? { "receive", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, >> +??? { "receive", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, >> +??? { "receive", 5, { 5, 0, "unsigned char", "int"} }, >> +??? { "receive", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, >> +??? { "receive", 7, { 7, 0, "int", "int" } }, >> + >> +??? { "send", 0, { 0, 0, "struct sk_buff *", "pktinfo_t *" } }, >> +??? { "send", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, >> +??? { "send", 2, { 2, 0, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" } }, >> +??? { "send", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t *" } }, >> +??? { "send", 4, { 4, 0, "struct tcphdr *", "tcpinfo_t *" } }, >> +??? { "send", 5, { 5, 0, "unsigned char", "int"} }, >> +??? { "send", 6, { 6, 0, "unsigned char", "tcplsinfo_t *" } }, >> +??? { "send", 7, { 7, 0, "int", "int" } }, >> + >> +??? { "state-change", 0, { 0, 0, "void *", "void *", } }, >> +??? { "state-change", 1, { 1, 0, "struct sock *", "csinfo_t *" } }, >> +??? { "state-change", 2, { 2, 0, "void *", "void *" } }, >> +??? { "state-change", 3, { 3, 0, "struct tcp_sock *", "tcpsinfo_t >> *" } }, >> +??? { "state-change", 4, { 4, 0, "void *", "void *" } }, >> +??? { "state-change", 5, { 5, 0, "void *", "void *" } }, >> +??? { "state-change", 6, { 6, 0, "struct sock *", "tcplsinfo_t *" } }, >> +??? { "state-change", 7, { 7, 0, "int", "int" } }, >> + >> +??? { NULL, } >> +}; >> + >> +static const dtrace_pattr_t??? pattr = { >> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, >> DTRACE_CLASS_ISA }, >> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, >> DTRACE_CLASS_UNKNOWN }, >> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, >> DTRACE_CLASS_UNKNOWN }, >> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, >> DTRACE_CLASS_ISA }, >> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, >> DTRACE_CLASS_ISA }, >> +}; >> + >> +/* >> + * Provide all the "tcp" SDT probes. >> + */ >> +static int populate(dtrace_hdl_t *dtp) >> +{ >> +??? return dt_sdt_populate(dtp, prvname, modname, &dt_tcp, &pattr, >> +?????????????????? probe_args, probes); >> +} >> + >> +/* >> + * Generate a BPF trampoline for a SDT probe. >> + * >> + * The trampoline function is called when a SDT probe triggers, and >> it must >> + * satisfy the following prototype: >> + * >> + *??? int dt_tcp(void *data) >> + * >> + * The trampoline will populate a dt_dctx_t struct and then call the >> function >> + * that implements the compiled D clause.? It returns the value that >> it gets >> + * back from that function. >> + */ >> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl) >> +{ >> +??? dt_irlist_t??? *dlp = &pcb->pcb_ir; >> +??? dt_probe_t??? *prp = pcb->pcb_probe; >> +??? dt_probe_t??? *uprp = pcb->pcb_parent_probe; >> +??? int??????? direction, have_iphdr; >> +??? int??????? skarg = 0, skbarg = 1, tcparg = 0; >> +??? int??????? skarg_maybe_null; >> +??? int??????? skstate = 0; >> + >> +??? /* >> +???? * We construct the tcp::: probe arguments as >> +???? * follows: >> +???? *????? args[0] = skb >> +???? *????? args[1] = sk >> +???? *????? args[2] = ip_hdr(skb) [if available] >> +???? *????? args[3] = sk [struct tcp_sock *] >> +???? *????? args[4] = tcp_hdr(skb) >> +???? *????? args[5] = sk->sk_state >> +???? *????? args[6] = sk->sk_state >> +???? *????? args[7] = NET_PROBE_INBOUND (0x1) | NET_PROBE_OUTBOUND (0x0) >> +???? */ >> + >> +??? if (strcmp(prp->desc->prb, "state-change") == 0) { >> +??????? int newstatearg; >> +??????? int skip_state = 0; >> +??????? int check_proto = IPPROTO_TCP; >> + >> +??????? /* For pre-6.14 kernels, inet_sock_state_change() to >> +???????? * TCP_SYN_RCV is broken in that the cloned socket has >> +???????? * not yet copied info of interest like addresses, ports. >> +???????? * This is fixed in 6.14 via >> +???????? * >> +???????? * commit a3a128f611a965fddf8a02dd45716f96e0738e00 >> +???????? * Author: Eric Dumazet >> +???????? * Date:?? Wed Feb 12 13:13:28 2025 +0000 >> +???????? * >> +???????? * inet: consolidate inet_csk_clone_lock() >> +???????? * >> +???????? * To work around this we trace inet_csk_clone_lock and >> +???????? * use the reqsk (arg1) as the means to populate the >> +???????? * struct tcpinfo.? We need then to explicitly set the >> +???????? * state to TCP_SYN_RCV and also skip the case where >> +???????? * inet_sock_set_state() specifies TCP_SYN_RCV otherwise >> +???????? * we will get a probe double-firing. >> +???????? */ >> +??????? if (strcmp(uprp->desc->fun, "inet_csk_clone_lock") == 0) { >> +??????????? skarg = 1; >> +??????????? newstatearg = 2; >> +??????????? check_proto = 0; >> +??????????? emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), >> +??????????????????????? BPF_TCP_SYN_RECV)); >> +??????? } else if (strcmp(uprp->desc->fun, "tcp_time_wait") == 0) { >> +??????????? skarg = 0; >> +??????????? newstatearg = 1; >> +??????? } else { >> +??????????? skarg = 0; >> +??????????? newstatearg = 2; >> +??????????? skip_state = BPF_TCP_SYN_RECV; >> +??????? } >> +??????? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, >> DMST_ARG(skarg))); >> +??????? emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl)); >> +??????? /* check it is a TCP socket */ >> +??????? if (check_proto) { >> +??????????? dt_cg_get_member(pcb, "struct sock", BPF_REG_6, >> +???????????????????? "sk_protocol"); >> +??????????? emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, >> +???????????????????????? IPPROTO_TCP, exitlbl)); >> +??????? } >> +??????? /* save sk */ >> +??????? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, >> DMST_ARG(skarg))); >> +??????? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_6)); >> + >> +??????? /* save new state */ >> +??????? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, >> DMST_ARG(newstatearg))); >> +??????? if (skip_state) { >> +??????????? emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, skip_state, >> +???????????????????????? exitlbl)); >> +??????? } >> +??????? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(6), BPF_REG_6)); >> + >> +??????? /* save sk */ >> +??????? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(3))); >> +??????? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_6)); >> + >> +??????? /* save empty args */ >> +??????? emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(0), 0)); >> +??????? emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), 0)); >> +??????? emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(4), 0)); >> +??????? emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(5), 0)); >> + >> +??????? /* NET_PROBE_STATE */ >> +??????? emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(7), >> +??????????????????? NET_PROBE_STATE)); >> +??????? return 0; >> +??? } >> + >> +??? if (strcmp(prp->desc->prb, "accept-established") == 0) { >> +??????? direction = NET_PROBE_OUTBOUND; >> +??????? have_iphdr = 1; >> +??????? /* skb in arg2 not arg1 */ >> +??????? skbarg = 2; >> +??????? skarg_maybe_null = 0; >> +??????? /* ensure arg1 is BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB */ >> +??????? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(1))); >> +??????? emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_6, >> +???????????????????? BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, >> +???????????????????? exitlbl)); >> +??? } else if (strcmp(prp->desc->prb, "receive") == 0 || >> +?????????? strcmp(prp->desc->prb, "accept-refused") == 0) { >> +??????? direction = NET_PROBE_INBOUND; >> +??????? have_iphdr = 1; >> +??????? if (strcmp(uprp->desc->fun, "tcp_v4_send_reset") == 0 || >> +??????????? strcmp(uprp->desc->fun, "tcp_v6_send_reset") == 0) >> +??????????? skarg_maybe_null = 1; >> +??????? else >> +??????????? skarg_maybe_null = 0; >> +??? } else if (strcmp(prp->desc->prb, "connect-established") == 0) { >> +??????? direction = NET_PROBE_INBOUND; >> +??????? have_iphdr = 1; >> +??????? skarg_maybe_null = 0; >> +??? } else if (strcmp(prp->desc->prb, "connect-refused") == 0) { >> +??????? direction = NET_PROBE_INBOUND; >> +??????? have_iphdr = 1; >> +??????? skarg_maybe_null = 0; >> +??????? skstate = BPF_TCP_SYN_SENT; >> +??? } else { >> +??????? direction = NET_PROBE_OUTBOUND; >> +??????? if (strcmp(uprp->desc->fun, "ip_send_unicast_reply") == 0) { >> +??????????? /* NULL sk in arg1 not arg2 (we dont want ctl_sk) */ >> +??????????? skarg = 1; >> +??????????? /* skb in arg2 not arg1 */ >> +??????????? skbarg = 2; >> +??????????? have_iphdr = 1; >> +??????????? /* tcp hdr in ip_reply_arg * */ >> +??????????? tcparg = 6; >> +??????????? skarg_maybe_null = 1; >> +??????? } else if (strcmp(uprp->desc->fun, "ip_build_and_send_pkt") >> == 0) { >> +??????????? skarg = 1; >> +??????????? skbarg = 0; >> +??????????? have_iphdr = 0; >> +??????????? skarg_maybe_null = 1; >> +??????? } else if (strcmp(prp->desc->prb, "connect-request") == 0) { >> +??????????? skstate = BPF_TCP_SYN_SENT; >> +??????????? have_iphdr = 0; >> +??????????? skarg_maybe_null = 0; >> +??????? } else { >> +??????????? have_iphdr = 0; >> +??????????? skarg_maybe_null = 0; >> +??????? } >> +??? } >> + >> +??? /* first save sk to args[3]; this avoids overwriting it when we >> +???? * populate args[0,1] below. >> +???? */ >> +??? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skarg))); >> +??? /* only allow NULL sk for ip_send_unicast_reply() */ >> +??? if (!skarg_maybe_null) >> +??????? emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl)); >> +??? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_6)); >> + >> +??? /* then save skb to args[0] */ >> +??? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skbarg))); >> +??? emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl)); >> +??? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_6)); >> + >> +??? /* next save sk to args[1] now that we have skb in args[0] */ >> +??? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(3))); >> +??? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_6)); >> + >> +??? /* >> +???? * ip_hdr(skb) = >> +???? *??? skb_network_header(skb)??? =??? (include/linux/ip.h) >> +???? *??? skb->head + skb->network_header??? (include/linux/skbuff.h) >> +???? */ >> +??? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(0))); >> +??? dt_cg_get_member(pcb, "struct sk_buff", BPF_REG_6, "head"); >> +??? if (have_iphdr) >> +??????? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(2), BPF_REG_0)); >> +??? else >> +??????? emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), 0)); >> + >> +??? if (have_iphdr) { >> +??????? dt_cg_get_member(pcb, "struct sk_buff", BPF_REG_6, >> +???????????????? "network_header"); >> +??????? emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(2), >> BPF_REG_0)); >> +??? } >> +??? /* >> +???? * tcp_hdr(skb) = >> +???? *??? skb_transport_header(skb) =??????? (include/linux/ip.h) >> +???? *??? skb->head + skb->transport_header??? (include/linux/skbuff.h) >> +???? */ >> +??? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(tcparg))); >> +??? if (tcparg) { >> +??????? /* struct ip_reply_arg * has a kvec containing the tcp header */ >> +??????? dt_cg_get_member(pcb, "struct kvec", BPF_REG_6, "iov_base"); >> +??????? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0)); >> +??? } else { >> +??????? dt_cg_get_member(pcb, "struct sk_buff", BPF_REG_6, "head"); >> +??????? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(4), BPF_REG_0)); >> +??????? dt_cg_get_member(pcb, "struct sk_buff", BPF_REG_6, >> +???????????????? "transport_header"); >> +??????? emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_7, DMST_ARG(4), >> BPF_REG_0)); >> +??? } >> + >> +??? if (!skarg_maybe_null) { >> +??????? /* save sk state */ >> +??????? emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(3))); >> +??????? dt_cg_get_member(pcb, "struct sock_common", BPF_REG_6, >> +???????????????? "skc_state"); >> +??????? /* ensure sk state - if specified - is what we expect */ >> +??????? if (skstate) >> +??????????? emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, skstate, >> +???????????????????????? exitlbl)); >> +??????? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(5), BPF_REG_0)); >> +??????? emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(6), BPF_REG_0)); >> +??? } >> +??? emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(7), direction)); >> + >> +??? return 0; >> +} >> + >> +dt_provimpl_t??? dt_tcp = { >> +??? .name??????? = prvname, >> +??? .prog_type??? = BPF_PROG_TYPE_UNSPEC, >> +??? .populate??? = &populate, >> +??? .enable??????? = &dt_sdt_enable, >> +??? .load_prog??? = &dt_bpf_prog_load, >> +??? .trampoline??? = &trampoline, >> +??? .probe_info??? = &dt_sdt_probe_info, >> +??? .destroy??? = &dt_sdt_destroy, >> +}; >> diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c >> index 0c621197..798e67ee 100644 >> --- a/libdtrace/dt_provider.c >> +++ b/libdtrace/dt_provider.c >> @@ -41,6 +41,7 @@ const dt_provimpl_t *dt_providers[] = { >> ????? &dt_sched, >> ????? &dt_sdt, >> ????? &dt_syscall, >> +??? &dt_tcp, >> ????? &dt_uprobe, >> ????? &dt_usdt, >> ????? NULL >> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h >> index 59a8d62e..4db89b45 100644 >> --- a/libdtrace/dt_provider.h >> +++ b/libdtrace/dt_provider.h >> @@ -87,6 +87,7 @@ extern dt_provimpl_t dt_rawtp; >> ? extern dt_provimpl_t dt_sched; >> ? extern dt_provimpl_t dt_sdt; >> ? extern dt_provimpl_t dt_syscall; >> +extern dt_provimpl_t dt_tcp; >> ? extern dt_provimpl_t dt_uprobe; >> ? extern dt_provimpl_t dt_usdt; >> ? diff --git a/libdtrace/ip.d b/libdtrace/ip.d >> index f8b77f12..d59bb436 100644 >> --- a/libdtrace/ip.d >> +++ b/libdtrace/ip.d >> @@ -51,7 +51,6 @@ inline int TCP_MIN_HEADER_LENGTH =??? 20; >> ?? * to the net namespace (nd_net in struct net_device). >> ?? */ >> ? typedef uint64_t??? netstackid_t; >> -typedef __be32??????? ipaddr_t; >> ? typedef struct in6_addr??? in6_addr_t; >> ? ? /* >> diff --git a/libdtrace/net.d b/libdtrace/net.d >> index 6ac34287..45b5cba3 100644 >> --- a/libdtrace/net.d >> +++ b/libdtrace/net.d >> @@ -25,9 +25,13 @@ typedef struct conninfo { >> ?????? string ci_protocol;??? /* protocol (ipv4, ipv6, etc) */ >> ? } conninfo_t; >> ? +typedef __be32 ipaddr_t; >> + >> ? /* >> ?? * We use these values to determine if a probe point is associated >> - * with sending (outbound) or receiving (inbound). >> + * with sending (outbound) or receiving (inbound) or a state-related >> + * probe (i.e. neither in our outbound). >> ?? */ >> ? inline int NET_PROBE_OUTBOUND =??????? 0x00; >> ? inline int NET_PROBE_INBOUND =??????? 0x01; >> +inline int NET_PROBE_STATE =??????? 0x02; >> diff --git a/libdtrace/tcp.d b/libdtrace/tcp.d >> index 54e310cb..d4beea87 100644 >> --- a/libdtrace/tcp.d >> +++ b/libdtrace/tcp.d >> @@ -8,7 +8,6 @@ >> ? #pragma D depends_on module vmlinux >> ? #pragma D depends_on library net.d >> ? #pragma D depends_on provider ip >> -#pragma D depends_on provider tcp >> ? ? inline int TH_FIN =??? 0x01; >> ? inline int TH_SYN =??? 0x02; >> @@ -60,7 +59,7 @@ typedef struct tcpinfo { >> ????? uint32_t tcp_seq;??????? /* sequence number */ >> ????? uint32_t tcp_ack;??????? /* acknowledgment number */ >> ????? uint8_t tcp_offset;??????? /* data offset, in bytes */ >> -??? uint8_t tcp_flags;??????? /* flags */ >> +??? uint16_t tcp_flags;??????? /* flags */ >> ????? uint16_t tcp_window;??????? /* window size */ >> ????? uint16_t tcp_checksum;??????? /* checksum */ >> ????? uint16_t tcp_urgent;??????? /* urgent data pointer */ >> @@ -111,13 +110,16 @@ translator tcpinfo_t < struct tcphdr *T > { >> ????? tcp_seq = T ? ntohl(T->seq) : 0; >> ????? tcp_ack = T ? ntohl(T->ack_seq) : 0; >> ????? tcp_offset = T ? (*(uint8_t *)(T + 12) & 0xf0) >> 2 : 0; >> -??? tcp_flags = T ? *(uint8_t *)(T + 13) : 0; >> +??? tcp_flags = T ? *((uint8_t *)T + 13) : 0; >> ????? tcp_window = T ? ntohs(T->window) : 0; >> ????? tcp_checksum = T ? ntohs(T->check) : 0; >> ????? tcp_urgent = T ? ntohs(T->urg_ptr) : 0; >> ????? tcp_hdr = (uintptr_t)T; >> ? }; >> ? +inline int tcp_fullsock[struct tcp_sock *sk] = >> +??? (((struct sock_common *)sk)->skc_state != TCP_STATE_SYN_RECEIVED && >> +???? ((struct sock_common *)sk)->skc_state != TCP_STATE_TIME_WAIT); >> ? /* >> ?? * In the main we simply translate from the "struct [tcp_]sock *" to >> ?? * a tcpsinfo_t *.? However there are a few exceptions: >> @@ -158,47 +160,45 @@ translator tcpsinfo_t < struct tcp_sock *T > { >> ????????????? ((uint32_t *)&((struct sock *)T)- >> >__sk_common.skc_v6_daddr)[2] && >> ????????? ((uint32_t *)&((struct sock *)T)- >> >__sk_common.skc_v6_rcv_saddr)[3]) >> ????????? : 0; >> -??? tcps_lport = (T && ((struct inet_sock *)T)->inet_sport != 0) ? >> +??? tcps_lport = T && ((struct inet_sock *)T)->inet_sport != 0 && >> +??????? tcp_fullsock[T] ? >> ????????? ntohs(((struct inet_sock *)T)->inet_sport) : >> ????????? (T && ((struct inet_sock *)T)->inet_sport == 0) ? >> -??????? ntohs(((struct sock *)T)->__sk_common.skc_num) : >> +??????? ((struct sock *)T)->__sk_common.skc_num : >> ????????? arg4 != NULL ? >> ????????? ntohs(arg7 == NET_PROBE_INBOUND ? >> -??????? ((struct tcphdr *)arg4)->dest : ((struct tcphdr *)arg4)- >> >source) : >> +????????? ((struct tcphdr *)arg4)->dest : >> +????????? ((struct tcphdr *)arg4)->source) : >> ????????? 0; >> ????? tcps_rport = T && ((struct sock *)T)->__sk_common.skc_dport != 0 ? >> ????????? ntohs(((struct sock *)T)->__sk_common.skc_dport) : >> ????????? arg4 != NULL ? >> ????????? ntohs(arg7 == NET_PROBE_INBOUND ? >> -??????????? ((struct tcphdr *)arg4)->source : ((struct tcphdr >> *)arg4)->dest) : >> +????????? ((struct tcphdr *)arg4)->source : >> +????????? ((struct tcphdr *)arg4)->dest) : >> ????????? 0; >> ????? tcps_laddr = >> ????????? T && ((struct sock *)T)->__sk_common.skc_family == AF_INET ? >> ????????? inet_ntoa(&((struct sock *)T)->__sk_common.skc_rcv_saddr) : >> ????????? T && ((struct sock *)T)->__sk_common.skc_family == AF_INET6 ? >> ????????? inet_ntoa6(&((struct sock *)T)->__sk_common.skc_v6_rcv_saddr) : >> -??????? arg2 != NULL && (*(uint8_t *)arg2) >> 4 == 4 ? >> -??????? inet_ntoa(arg7 == NET_PROBE_INBOUND ? >> -??????? &((struct iphdr *)arg2)->daddr : &((struct iphdr *)arg2)- >> >saddr) : >> -??????? arg2 != NULL && *((uint8_t *)arg2) >> 4 == 6 ? >> -??????? inet_ntoa6(arg7 == NET_PROBE_INBOUND ? >> -??????? &((struct ipv6hdr *)arg2)->daddr : >> -??????? &((struct ipv6hdr *)arg2)->saddr) : >> +??????? arg2 != NULL && (*(uint8_t *)arg2 >> 4) == 4 ? >> +??????? inet_ntoa(&((struct iphdr *)arg2)->daddr) : >> +??????? arg2 != NULL && (*(uint8_t *)arg2 >> 4) == 6 ? >> +??????? inet_ntoa6(&((struct ipv6hdr *)arg2)->daddr) : >> ????????? ""; >> ????? tcps_raddr = >> ????????? T && ((struct sock *)T)->__sk_common.skc_family == AF_INET ? >> ????????? inet_ntoa(&((struct sock *)T)->__sk_common.skc_daddr) : >> ????????? T && ((struct sock *)T)->__sk_common.skc_family == AF_INET6 ? >> ????????? inet_ntoa6(&((struct sock *)T)->__sk_common.skc_v6_daddr) : >> -??????? arg2 != NULL && (*(uint8_t *)arg2) >> 4 == 4 ? >> -??????? inet_ntoa(arg7 == NET_PROBE_INBOUND ? >> -??????? &((struct iphdr *)arg2)->saddr : &((struct iphdr *)arg2)- >> >daddr) : >> -??????? arg2 != NULL && *((uint8_t *)arg2) >> 4 == 6 ? >> -??????? inet_ntoa6(arg7 == NET_PROBE_INBOUND ? >> -??????? &((struct ipv6hdr *)arg2)->saddr : >> -??????? &((struct ipv6hdr *)arg2)->daddr) : >> -??????? ""; >> -??? tcps_state = arg6; >> +??????? arg2 != NULL && (*(uint8_t *)arg2 >> 4) == 4 ? >> +??????? inet_ntoa(&((struct iphdr *)arg2)->saddr) : >> +??????? arg2 != NULL && (*(uint8_t *)arg2 >> 4) == 6 ? >> +??????? inet_ntoa6(&((struct ipv6hdr *)arg2)->saddr) : >> +??????? "> +??? tcps_state = arg7 == NET_PROBE_STATE ? arg6 : >> +??????? T ? ((struct sock *)T)->__sk_common.skc_state : 0; >> ????? tcps_iss = T ? >> ????????? T->snd_una - (uint32_t)T->bytes_acked : 0; >> ????? tcps_suna = T ? T->snd_una : 0; >> @@ -229,3 +229,9 @@ translator tcpsinfo_t < struct tcp_sock *T > { >> ? translator tcplsinfo_t < int I > { >> ????? tcps_state = I; >> ? }; >> + >> +/* For tracepoint, the last state is in the sock state, next passed >> as arg6 */ >> +#pragma D binding "1.6.3" translator >> +translator tcplsinfo_t < struct sock *S > { >> +??? tcps_state = S ? S->__sk_common.skc_state : 0; >> +}; From eugene.loh at oracle.com Wed Jul 2 20:22:27 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Wed, 2 Jul 2025 16:22:27 -0400 Subject: [DTrace-devel] [PATCH v2 2/4] dt_impl: bump number of TSLOTS to 8 In-Reply-To: References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-3-alan.maguire@oracle.com> <4a079f61-5806-68b8-9051-af918a822adc@oracle.com> Message-ID: <2d8d7b61-29ef-04ae-bffd-0dcb0709a106@oracle.com> On 7/2/25 10:52, Alan Maguire wrote: > On 01/07/2025 19:31, Eugene Loh wrote: >> Very simple patch, but one key problem.? The changed line is preceded by >> a huge comment block that goes to excruciating pains to explain why the >> value should be 4.? So, I'm fine with the change itself and I think the >> "Because of the complexity..." sentence in the commit message can be >> dropped, but the comment block in the file needs to be updated to >> explain (with corresponding pains?) why we need 8 (not 9, not 7). >> > Ah good catch, forgot to do this. I actually think 6 is enough. > The additional seems to be a result of the various complex inet_ntoa*() > calls in the tcp.d translators; some have ternary operators with > multiple inet_ntoa*()s. The origin appears to be dt_cg_ternary_op() and > that has the comment: > > /* > * Strings complicate things a bit because dn_left and dn_right > might > * actually be temporary strings (tstring) *and* in different slots. > * We need to allocate a new tstring to hold the result, and > copy the > * value into the new tstring (and free any tstrings in dn_left and > * dn_right). > */ > > So if we have a left and right ternary and both allocate 3 tstring slots > for inet_ntoa*()s that means 3*2 = 6 tstring slots should actually be > enough. Additional testing suggests that is so; does the above make > sense or are there additional things to consider here? Ha!? I do not know;? I prefer NOT to have a headache.? But maybe there should also be some tstring stress test using inet_ntoa*() and ternary ops. > I'll update the > next version with a comment describing the above anyhow. Thanks for the > review! > Alan > > >> On 6/10/25 09:58, Alan Maguire wrote: >>> Because of the complexity of the TCP translators, more tslots are >>> needed. >>> >>> Signed-off-by: Alan Maguire >>> --- >>> ? libdtrace/dt_impl.h | 2 +- >>> ? 1 file changed, 1 insertion(+), 1 deletion(-) >>> >>> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h >>> index 68fb8ec5..10424f9c 100644 >>> --- a/libdtrace/dt_impl.h >>> +++ b/libdtrace/dt_impl.h >>> @@ -218,7 +218,7 @@ typedef struct dt_kern_path { >>> ?? * - cleanpath() holds a prepended '/' char, a string, an appended >>> '/' char, >>> ?? *?? and a terminating NUL char, or STRSZ + 3 chars altogether >>> ?? */ >>> -#define DT_TSTRING_SLOTS??? 4 >>> +#define DT_TSTRING_SLOTS??? 8 >>> ? #define DT_TSTRING_SIZE(dtp)??? \ >>> ????????? MAX(P2ROUNDUP((dtp)->dt_options[DTRACEOPT_STRSIZE] + 3, 8), \ >>> ????????????? 72) From eugene.loh at oracle.com Thu Jul 3 00:02:00 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Wed, 2 Jul 2025 20:02:00 -0400 Subject: [DTrace-devel] [PATCH v2 3/4] dtrace: add tcp provider In-Reply-To: References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-4-alan.maguire@oracle.com> <8ed88a3b-6e37-a696-d6bb-38745c144995@oracle.com> Message-ID: <148eabc7-ee00-33fe-b013-0f48fda0163a@oracle.com> On 7/2/25 11:06, Alan Maguire wrote: > On 02/07/2025 00:16, Eugene Loh wrote: >> On most VMs, >> ??? test/unittest/tcp/tst.ipv4remotetcp.sh >> ??? test/unittest/tcp/tst.ipv4remotetcpstate.sh >> xfail due to missing remote.? Are we okay with "shrugging our shoulders" >> like that? > Yeah, I don't think the remote test is robust enough. Specifically in > OCI it seems to always fail. I'd suggest we replace it with creating a > network namespace with IP addresses configured on top of veths to > simulate the remote case, the codepaths will be the same. I've done this > in other test suites and it works well. Sounds great (if "we" is "you", haha). >> Meanwhile, my one non-OCI VM ran those tests.? The first test passes. >> The second one consistently reports >> ??? -tcp:::state-change to time-wait - yes >> ??? +tcp:::state-change to time-wait - no > I hit some of these failure during development; adding the > fbt::tcp_time_wait:entry probe helped. Is that inlined or something > perhaps (grep tcp_time_wait /proc/kallsyms)? On the VM in question: # grep -w tcp_time_wait /proc/kallsyms ffffffff92ad25b0 T tcp_time_wait # dtrace -lP fbt |& grep tcp_time_wait 49373??????? fbt?????????? vmlinux???????????????????? tcp_time_wait return 49372??????? fbt?????????? vmlinux???????????????????? tcp_time_wait entry # dtrace -lP rawfbt |& grep tcp_time_wait 51079???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait return 51078???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait entry >> and occasionally reports stuff like >> ??? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): >> invalid address (0x1fc0c0000000000) at BPF pc 287 >> ??? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): >> invalid address (0x225b80000000000) at BPF pc 287 >> > ah, ok there must be a null deref somewhere. Haven't seen this before; > what kernel version/arch is this? 5.15.0-300.161.13.el9uek.x86_64 FWIW, I can comment out all probes in tcp other than: ??????? { "send", DTRACE_PROBESPEC_NAME, "rawfbt::ip_send_unicast_reply:entry" }, Then I run dtrace -c "$testdir/client.ip.pl tcp $dest $tcpport" -qn 'tcp:::send /args[2]->ip_saddr == "'$source'"/ { tcpsend++; }' The disassembly shows that I look up args[2] using dt_bvar_args() (including checking for a fault).? Then we try to dereference args[2]->ip_saddr.? We first check the pointer is non NULL.? Then we call dt_cg_load_scalar() to bpf_probe_read() from the desired location.? This call is problematic. >> The non-remote tests fail on OL8 UEK6 (x86 and arm). >> ??? dtrace: failed to compile script /dev/stdin: >> ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of >> inet_ntoa arg#1 (ipaddr_t *): >> ??? Unknown type name >> > This is a weird failure; I see it on some systems but not on others. > In tcp.d we have > > #pragma D depends_on library net.d > > which contains the typedef for ipaddr_t ; it seems that's not enough to > pull in the typedef reliably. I suspect there is a timing element > involved here in when the net.d library is included. Perhaps there is a > better way to define ipaddr_t ; would using a builtin typedef in > _dtrace_typedefs_32/64 work better perhaps? Don't know. >> The probe names are >> ? ? tcp:ip:*:*?? ? ?? Solaris >> ? ? tcp:vmlinux:*:* ? DTv1 >> ? ? tcp:vmlinux::* ?? with this patch (that is, no more function) >> I guess precedents have already been set for other SDT providers;? so, >> okay.? Just noting for my own sake. >> Meanwhile, the typed args[] have changed in number and type from Solaris> to DTv1 to this patch.? Does that merit discussion? > Hmm, that's not intentional (aside from the additional INBOUND/OUTBOUND > etc which we use to help inform translation). Worth mentioning somewhere? > Do you see other changes aside from them? Thanks! This is what I have for typed args[] for tcp probes. The typed probe arguments for probes ??????? accept-[refused|established] ??????? connect-[refused|established|request] ??????? receive are the same as for send. The typed probe arguments for state-change may be different. So, the typed probe arguments are (wide screen, fixed-width font): args[0]:????? args[1]:????? args[2]:????? args[3]: args[4]:????? args[5]:????? args[6]:????? args[7]: ??????????? send Solaris???????? pktinfo_t *?? csinfo_t * ipinfo_t *??? tcpsinfo_t *? tcpinfo_t * ??????????? send DTv1??????????? (unknown)???? (unknown) (unknown)???? (unknown)???? (unknown)???? (unknown) int?????????? int ??????????? send DTv2??????????? pktinfo_t *?? csinfo_t * ipinfo_t *??? tcpsinfo_t *? tcpinfo_t *?? int tcplsinfo_t * int ??????????? state-change Solaris void????????? csinfo_t * void????????? tcpsinfo_t *? void????????? tcplsinfo_t * ??????????? state-change DTv1??? (unknown)???? (unknown) (unknown)???? (unknown)???? (unknown)???? (unknown) int?????????? int ??????????? state-change DTv2??? void????? *?? csinfo_t * void???? *??? tcpsinfo_t *? void????? *?? void * tcplsinfo_t * int Here, "DTv1" refers to legacy DTrace on Linux.? I guess we can ignore that.? By "DTv2" I mean your patch.? For state-change, Solaris calls some things "void" (not "void *") and tcplsinfo_t* moves from args[5] to args[6]. From alan.maguire at oracle.com Thu Jul 3 11:33:45 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Thu, 3 Jul 2025 12:33:45 +0100 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach Message-ID: <20250703113345.1273604-1-alan.maguire@oracle.com> The current approach of looking for remote addresses is brittle and fails in many environments; it checks the default route gateway and looks for open ports in the TCP case. We can however achieve the same goal reliably by creating a network namespace on the system and configuring either IPv4 or IPv6 addresses on the namespaced and local veth interfaces that support communication between namespaces. If a tcp port is required start sshd to listen on that port. Teardown is managed in runtest.sh as signal handling for timeouts within the test scripts is not working; a trap function does not trigger for TERM. Move the get_remote.sh script to test/utils also as it seems a more natural location. One issue - this cannot be run on a local system with a VPN running as the VPN connection is pretty aggressive in disconnecting/reconnecting when spotting a link-up event associated with the global netns side of the veth. However in my experience the remote IP tests do not work reliably in that environment anyway. Signed-off-by: Alan Maguire --- runtest.sh | 2 + test/unittest/ip/get.ipv4remote.pl | 87 --------------------- test/unittest/ip/get.ipv6remote.pl | 70 ----------------- test/unittest/ip/tst.ipv4remoteicmp.sh | 10 +-- test/unittest/ip/tst.ipv4remotetcp.sh | 25 ++---- test/unittest/ip/tst.ipv4remoteudp.sh | 8 +- test/unittest/ip/tst.ipv6remoteicmp.sh | 16 ++-- test/unittest/tcp/tst.ipv4remotetcp.sh | 24 ++---- test/unittest/tcp/tst.ipv4remotetcpstate.sh | 29 +++---- test/unittest/udp/tst.ipv4remoteudp.sh | 4 +- test/utils/get_remote.sh | 71 +++++++++++++++++ 11 files changed, 116 insertions(+), 230 deletions(-) delete mode 100755 test/unittest/ip/get.ipv4remote.pl delete mode 100755 test/unittest/ip/get.ipv6remote.pl create mode 100755 test/utils/get_remote.sh diff --git a/runtest.sh b/runtest.sh index 156e7dec..9f06a499 100755 --- a/runtest.sh +++ b/runtest.sh @@ -1473,6 +1473,8 @@ for dt in $dtrace; do log "\n" + test/utils/get_remote.sh cleanup + if [[ -n $regression ]]; then # If regtesting, we run a second time, with intermediate results # displayed, and output redirected to a per-test, per-dtrace diff --git a/test/unittest/ip/get.ipv4remote.pl b/test/unittest/ip/get.ipv4remote.pl deleted file mode 100755 index 3cc47d01..00000000 --- a/test/unittest/ip/get.ipv4remote.pl +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/perl -w -# -# Oracle Linux DTrace. -# Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at -# http://oss.oracle.com/licenses/upl. -# - -# -# get.ipv4remote.pl [tcpport] -# -# Find an IPv4 reachable remote host using both ip(8) and ping(8). -# If a tcpport is specified, return a host that is also listening on this -# TCP port. Print the local address and the remote address, or an -# error message if no suitable remote host was found. Exit status is 0 if -# a host was found. (Note: the only host we check is the gateway. Nobody -# responds to broadcast pings these days, and portscanning the local net is -# unfriendly.) -# - -use strict; -use IO::Socket; - -my $TIMEOUT = 3; -my $tcpport = @ARGV == 1 ? $ARGV[0] : 0; - -# -# Determine gateway IP address -# - -my $local = ""; -my $remote = ""; -my $responsive = ""; -my $up; -open IP, '/sbin/ip -o -4 route show |' or die "Couldn't run ip route show: $!\n"; -while () { - next unless /^default /; - - if (/via (\S+)/) { - $remote = $1; - } -} -close IP; -die "Could not determine gateway router IP address" if $remote eq ""; - -open IP, "/sbin/ip -o route get to $remote |" or die "Couldn't run ip route get: $!\n"; -while () { - next unless /^$remote /; - if (/src (\S+)/) { - $local = $1; - } -} -close IP; -die "Could not determine local IP address" if $local eq ""; - -# -# See if the rmote host responds to an icmp echo. -# -open PING, "/bin/ping -n -s 56 -w $TIMEOUT $remote |" or - die "Couldn't run ping: $!\n"; -while () { - if (/bytes from (.*): /) { - my $addr = $1; - - if ($tcpport != 0) { - # - # Test TCP - # - my $socket = IO::Socket::INET->new( - Proto => "tcp", - PeerAddr => $addr, - PeerPort => $tcpport, - Timeout => $TIMEOUT, - ); - next unless $socket; - close $socket; - } - - $responsive = $addr; - last; - } -} -close PING; -die "Can't find a remote host for testing: No suitable response from " . - "$remote\n" if $responsive eq ""; - -print "$local $responsive\n"; diff --git a/test/unittest/ip/get.ipv6remote.pl b/test/unittest/ip/get.ipv6remote.pl deleted file mode 100755 index b2136c5b..00000000 --- a/test/unittest/ip/get.ipv6remote.pl +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/perl -w -# -# Oracle Linux DTrace. -# Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at -# http://oss.oracle.com/licenses/upl. -# - -# -# get.ipv6remote.pl -# -# Find an IPv6 reachable remote host using both ip(8) and ping(8). -# Print the local address and the remote address, or print nothing if either -# no IPv6 interfaces or remote hosts were found. (Remote IPv6 testing is -# considered optional, and so not finding another IPv6 host is not an error -# state we need to log.) Exit status is 0 if a host was found. -# - -use strict; -use IO::Socket; - -my $TIMEOUT = 3; # connection timeout - -# possible paths for ping6 -$ENV{'PATH'} = "/bin:/usr/bin:/sbin:/usr/sbin:$ENV{'PATH'}"; - -# -# Determine local IP address -# -my $local = ""; -my $remote = ""; -my $responsive = ""; -my $up; -open IP, '/sbin/ip -o -6 route show |' or die "Couldn't run ip route show: $!\n"; -while () { - next unless /^default /; - - if (/via (\S+)/) { - $remote = $1; - } -} -close IP; -die "Could not determine gateway router IPv6 address" if $remote eq ""; - -open IP, "/sbin/ip -o route get to $remote |" or die "Couldn't run ip route get: $!\n"; -while () { - next unless /^$remote /; - if (/src (\S+)/) { - $local = $1; - } -} -close IP; -die "Could not determine local IPv6 address" if $local eq ""; - -# -# Find the first remote host that responds to an icmp echo, -# which isn't a local address. -# -open PING, "ping6 -n -s 56 -w $TIMEOUT $remote 2>/dev/null |" or - die "Couldn't run ping: $!\n"; -while () { - if (/bytes from (.*): /) { - $responsive = $1; - last; - } -} -close PING; -exit 2 if $responsive eq ""; - -print "$local $responsive\n"; diff --git a/test/unittest/ip/tst.ipv4remoteicmp.sh b/test/unittest/ip/tst.ipv4remoteicmp.sh index c165cbdc..854797a7 100755 --- a/test/unittest/ip/tst.ipv4remoteicmp.sh +++ b/test/unittest/ip/tst.ipv4remoteicmp.sh @@ -13,9 +13,7 @@ # # 1. A change to the ip stack breaking expected probe behavior, # which is the reason we are testing. -# 2. No physical network interface is plumbed and up. -# 3. The subnet gateway is not reachable. -# 4. An unrelated ICMP between these hosts was traced by accident. +# 2. An unrelated ICMP between these hosts was traced by accident. # if (( $# != 1 )); then @@ -25,18 +23,20 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/get.ipv4remote.pl +getaddr=$testdir/../../utils/get_remote.sh if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -set -- $($getaddr) + +set -- $($getaddr ipv4) source="$1" dest="$2" if [[ $? -ne 0 ]] || [[ -z $dest ]]; then exit 67 fi + $dtrace $dt_flags -c "$testdir/perlping.pl icmp $dest" -qs /dev/stdin <&2 exit 3 fi -for port in $tcpports ; do - res=`$getaddr $port 2>/dev/null` - if (( $? == 0 )); then - read s d <<< $res - tcpport=$port - source=$s - dest=$d - break - fi -done -if [ -z $tcpport ]; then +set -- $($getaddr ipv4 $tcpport) +source="$1" +dest="$2" + +if [[ $? -ne 0 ]] || [[ -z $dest ]]; then exit 67 fi diff --git a/test/unittest/ip/tst.ipv4remoteudp.sh b/test/unittest/ip/tst.ipv4remoteudp.sh index 3d25e1f5..f88ab35b 100755 --- a/test/unittest/ip/tst.ipv4remoteudp.sh +++ b/test/unittest/ip/tst.ipv4remoteudp.sh @@ -13,9 +13,7 @@ # # 1. A change to the ip stack breaking expected probe behavior, # which is the reason we are testing. -# 2. No physical network interface is plumbed and up. -# 3. The gateway is not reachable and listening on rpcbind. -# 4. An unlikely race causes the unlocked global send/receive +# 2. An unlikely race causes the unlocked global send/receive # variables to be corrupted. # # This test sends a UDP message using ping and checks that at least the @@ -31,13 +29,13 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/get.ipv4remote.pl +getaddr=$testdir/../../utils/get_remote.sh if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -set -- $($getaddr) +set -- $($getaddr ipv4) source="$1" dest="$2" if [[ $? -ne 0 ]] || [[ -z $dest ]]; then diff --git a/test/unittest/ip/tst.ipv6remoteicmp.sh b/test/unittest/ip/tst.ipv6remoteicmp.sh index 90fd48b4..0107a3ae 100755 --- a/test/unittest/ip/tst.ipv6remoteicmp.sh +++ b/test/unittest/ip/tst.ipv6remoteicmp.sh @@ -19,7 +19,7 @@ # # @@tags: unstable -# possible paths for ping6 +# possible paths for ping export PATH=/bin:/usr/bin:/sbin:/usr/sbin:$PATH if (( $# != 1 )); then @@ -29,24 +29,24 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/get.ipv6remote.pl +getaddr=$testdir/../../utils/get_remote.sh if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -set -- $($getaddr) + +set -- $($getaddr ipv6) source="$1" dest="$2" + if [[ $? -ne 0 ]] || [[ -z $dest ]]; then echo -n "Could not find a local IPv6 interface and a remote IPv6 " >&2 echo "host. Aborting test." >&2 exit 67 fi -nolinkdest="$(printf "%s" "$dest" | sed 's,%.*,,')" - -$dtrace $dt_flags -c "ping6 -c 6 $dest" -qs /dev/stdin <ip_saddr == "$source" && args[2]->ip_daddr == "$nolinkdest" && +/args[2]->ip_saddr == "$source" && args[2]->ip_daddr == "$dest" && args[5]->ipv6_nexthdr == IPPROTO_ICMPV6 && args[2]->ip_plength > 32/ { printf("1 ip:::send ("); @@ -64,7 +64,7 @@ ip:::send } ip:::receive -/args[2]->ip_saddr == "$nolinkdest" && args[2]->ip_daddr == "$source" && +/args[2]->ip_saddr == "$dest" && args[2]->ip_daddr == "$source" && args[5]->ipv6_nexthdr == IPPROTO_ICMPV6 && args[2]->ip_plength > 32/ { printf("2 ip:::receive ("); diff --git a/test/unittest/tcp/tst.ipv4remotetcp.sh b/test/unittest/tcp/tst.ipv4remotetcp.sh index 333760a1..d8673d4b 100755 --- a/test/unittest/tcp/tst.ipv4remotetcp.sh +++ b/test/unittest/tcp/tst.ipv4remotetcp.sh @@ -13,9 +13,7 @@ # # 1. A change to the tcp stack breaking expected probe behavior, # which is the reason we are testing. -# 2. No physical network interface is plumbed and up. -# 3. No other hosts on this subnet are reachable and listening on ssh. -# 4. An unlikely race causes the unlocked global send/receive +# 2. An unlikely race causes the unlocked global send/receive # variables to be corrupted. # # This test performs a TCP connection and checks that at least the @@ -32,9 +30,8 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/../ip/get.ipv4remote.pl -tcpports="22 80" -tcpport="" +getaddr=$testdir/../../utils/get_remote.sh +tcpport="22" dest="" if [[ ! -x $getaddr ]]; then @@ -42,18 +39,11 @@ if [[ ! -x $getaddr ]]; then exit 3 fi -for port in $tcpports ; do - res=`$getaddr $port 2>/dev/null` - if (( $? == 0 )); then - read s d <<< $res - tcpport=$port - source=$s - dest=$d - break - fi -done +set -- $($getaddr ipv4 $tcpport) +source="$1" +dest="$2" -if [[ -z $tcpport ]]; then +if [[ $? -ne 0 ]] || [[ -z $dest ]]; then exit 67 fi diff --git a/test/unittest/tcp/tst.ipv4remotetcpstate.sh b/test/unittest/tcp/tst.ipv4remotetcpstate.sh index 74fb4ce3..e9ff218d 100755 --- a/test/unittest/tcp/tst.ipv4remotetcpstate.sh +++ b/test/unittest/tcp/tst.ipv4remotetcpstate.sh @@ -17,8 +17,7 @@ # # 1. A change to the ip stack breaking expected probe behavior, # which is the reason we are testing. -# 2. The remote ssh service is not online. -# 3. An unlikely race causes the unlocked global send/receive +# 2. An unlikely race causes the unlocked global send/receive # variables to be corrupted. # # This test performs a TCP connection to the ssh service (port 22) and @@ -40,29 +39,21 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/../ip/get.ipv4remote.pl +getaddr=$testdir/../../utils/get_remote.sh client=$testdir/../ip/client.ip.pl -tcpports="22 80" -tcpport="" -dest="" +tcpport="22" if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -for port in $tcpports ; do - res=`$getaddr $port 2>/dev/null` - if (( $? == 0 )); then - read s d <<< $res - tcpport=$port - source=$s - dest=$d - break - fi -done - -if [ -z $tcpport ]; then - exit 67 + +set -- $($getaddr ipv4 $tcpport) +source="$1" +dest="$2" + +if [[ $? -ne 0 ]] || [[ -z $dest ]]; then + exit 67 fi diff --git a/test/unittest/udp/tst.ipv4remoteudp.sh b/test/unittest/udp/tst.ipv4remoteudp.sh index 1c5f2a9a..4fe70f5a 100755 --- a/test/unittest/udp/tst.ipv4remoteudp.sh +++ b/test/unittest/udp/tst.ipv4remoteudp.sh @@ -34,14 +34,14 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/../ip/get.ipv4remote.pl +getaddr=$testdir/../../utils/get_remote.sh port=31337 if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -read source dest <<<`$getaddr 2>/dev/null` +read source dest <<<`$getaddr ipv4 2>/dev/null` if (( $? != 0 )) || [[ -z $dest ]]; then exit 67 fi diff --git a/test/utils/get_remote.sh b/test/utils/get_remote.sh new file mode 100755 index 00000000..d8a4d450 --- /dev/null +++ b/test/utils/get_remote.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# +# Oracle Linux DTrace. +# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at +# http://oss.oracle.com/licenses/upl. +# + +# +# get_remote.sh ipv4|ipv6|cleanup [tcpport] +# +# Create (or cleanup) a network namespace with either IPv4 or IPv6 +# address associated. +# +# Print the local address and the remote address, or an +# error message if a failure occurred during setup. +# +# If tcpport is specified, start sshd on that port. +# +# Exit status is 0 if all succceeded. +# + +cmd=$1 +tcpport=$2 + +prefix=$(basename $tmpdir) +netns=${prefix}ns +veth1=${prefix}v1 +veth2=${prefix}v2 +mtu=1500 + +set -e + +case $cmd in +cleanup) pids=$(ip netns pids ${netns} 2>/dev/null) + if [[ -n "$pids" ]]; then + kill -TERM $pids + fi + ip netns del ${netns} 2>/dev/null + exit 0 + ;; + ipv4) veth1_addr=192.168.168.1 + veth2_addr=192.168.168.2 + prefixlen=24 + family= + ;; + ipv6) veth1_addr=fd::1 + veth2_addr=fd::2 + prefixlen=64 + family=-6 + ;; + *) echo "Unexpected cmd $cmd" >2 + exit 1 + ;; +esac + +ip netns add $netns +ip link add dev $veth1 mtu $mtu netns $netns type veth \ + peer name $veth2 mtu $mtu +ip netns exec $netns ip $family addr add ${veth1_addr}/$prefixlen dev $veth1 +ip netns exec $netns ip link set $veth1 up +ip addr add ${veth2_addr}/${prefixlen} dev $veth2 +ip link set $veth2 up + +if [[ -n "$tcpport" ]]; then + sshd=$(which sshd) + ip netns exec $netns $sshd -p $tcpport & +fi + +echo "$veth2_addr $veth1_addr" +exit 0 -- 2.43.5 From alan.maguire at oracle.com Thu Jul 3 15:03:05 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Thu, 3 Jul 2025 16:03:05 +0100 Subject: [DTrace-devel] [PATCH v2 3/4] dtrace: add tcp provider In-Reply-To: <148eabc7-ee00-33fe-b013-0f48fda0163a@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-4-alan.maguire@oracle.com> <8ed88a3b-6e37-a696-d6bb-38745c144995@oracle.com> <148eabc7-ee00-33fe-b013-0f48fda0163a@oracle.com> Message-ID: On 03/07/2025 01:02, Eugene Loh wrote: > On 7/2/25 11:06, Alan Maguire wrote: > >> On 02/07/2025 00:16, Eugene Loh wrote: >>> On most VMs, >>> ???? test/unittest/tcp/tst.ipv4remotetcp.sh >>> ???? test/unittest/tcp/tst.ipv4remotetcpstate.sh >>> xfail due to missing remote.? Are we okay with "shrugging our shoulders" >>> like that? >> Yeah, I don't think the remote test is robust enough. Specifically in >> OCI it seems to always fail. I'd suggest we replace it with creating a >> network namespace with IP addresses configured on top of veths to >> simulate the remote case, the codepaths will be the same. I've done this >> in other test suites and it works well. > > Sounds great (if "we" is "you", haha). > I had a go; see https://lore.kernel.org/dtrace/20250703113345.1273604-1-alan.maguire at oracle.com/ >>> Meanwhile, my one non-OCI VM ran those tests.? The first test passes. >>> The second one consistently reports >>> ???? -tcp:::state-change to time-wait - yes >>> ???? +tcp:::state-change to time-wait - no >> I hit some of these failure during development; adding the >> fbt::tcp_time_wait:entry probe helped. Is that inlined or something >> perhaps (grep tcp_time_wait /proc/kallsyms)? > > On the VM in question: > > # grep -w tcp_time_wait /proc/kallsyms > ffffffff92ad25b0 T tcp_time_wait > # dtrace -lP fbt |& grep tcp_time_wait > 49373??????? fbt?????????? vmlinux???????????????????? tcp_time_wait return > 49372??????? fbt?????????? vmlinux???????????????????? tcp_time_wait entry > # dtrace -lP rawfbt |& grep tcp_time_wait > 51079???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait return > 51078???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait entry > I'm not sure if it's related, but in testing the IP provider with the net namespace stuff I saw some weird behaviour with the IP sdt probes that had multiple underlying probe definitions. If we had a program with ip:::send and ip:::receive, we were often left one probe short (i.e. no BPF prog created/attached) whatever the first probe point in the program was. So if I traced ip:::send then ip:::receive the ip6_finish_output send probe was missing and the test failed. Reversing the order seemed to transfer the problem to the receive probe. So maybe there's a general bug around synthetic probes that's biting us here? Not sure but I'll investigate further. >>> and occasionally reports stuff like >>> ???? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): >>> invalid address (0x1fc0c0000000000) at BPF pc 287 >>> ???? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): >>> invalid address (0x225b80000000000) at BPF pc 287 >>> >> ah, ok there must be a null deref somewhere. Haven't seen this before; >> what kernel version/arch is this? > > 5.15.0-300.161.13.el9uek.x86_64 > > FWIW, I can comment out all probes in tcp other than: > > ??????? { "send", DTRACE_PROBESPEC_NAME, > "rawfbt::ip_send_unicast_reply:entry" }, > > Then I run > > dtrace -c "$testdir/client.ip.pl tcp $dest $tcpport" -qn 'tcp:::send / > args[2]->ip_saddr == "'$source'"/ { tcpsend++; }' > > The disassembly shows that I look up args[2] using dt_bvar_args() > (including checking for a fault).? Then we try to dereference args[2]- >>ip_saddr.? We first check the pointer is non NULL.? Then we call > dt_cg_load_scalar() to bpf_probe_read() from the desired location.? This > call is problematic. > Great, thanks for narrowing this down! >>> The non-remote tests fail on OL8 UEK6 (x86 and arm). >>> ???? dtrace: failed to compile script /dev/stdin: >>> ???? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of >>> inet_ntoa arg#1 (ipaddr_t *): >>> ???? Unknown type name >>> >> This is a weird failure; I see it on some systems but not on others. >> In tcp.d we have >> >> #pragma D depends_on library net.d >> >> which contains the typedef for ipaddr_t ; it seems that's not enough to >> pull in the typedef reliably. I suspect there is a timing element >> involved here in when the net.d library is included. Perhaps there is a >> better way to define ipaddr_t ; would using a builtin typedef in >> _dtrace_typedefs_32/64 work better perhaps? > > Don't know. > I'll dig into this further. If anyone has hints here it would be great. >>> The probe names are >>> ?? ? tcp:ip:*:*?? ? ?? Solaris >>> ?? ? tcp:vmlinux:*:* ? DTv1 >>> ?? ? tcp:vmlinux::* ?? with this patch (that is, no more function) >>> I guess precedents have already been set for other SDT providers;? so, >>> okay.? Just noting for my own sake. >>> Meanwhile, the typed args[] have changed in number and type from >>> Solaris> to DTv1 to this patch.? Does that merit discussion? >> Hmm, that's not intentional (aside from the additional INBOUND/OUTBOUND >> etc which we use to help inform translation). > > Worth mentioning somewhere? > I guess though I hadn't really considered the fact that the argN values become args[] values unless we intervene. >> Do you see other changes aside from them? Thanks! > > This is what I have for typed args[] for tcp probes. > > The typed probe arguments for probes > ??????? accept-[refused|established] > ??????? connect-[refused|established|request] > ??????? receive > are the same as for send. > > The typed probe arguments for state-change may be different. > > So, the typed probe arguments are (wide screen, fixed-width font): > > args[0]:????? args[1]:????? args[2]:????? args[3]: args[4]:????? > args[5]:????? args[6]:????? args[7]: > > ??????????? send Solaris???????? pktinfo_t *?? csinfo_t * ipinfo_t *??? > tcpsinfo_t *? tcpinfo_t * > ??????????? send DTv1??????????? (unknown)???? (unknown) (unknown)???? > (unknown)???? (unknown)???? (unknown) int?????????? int > ??????????? send DTv2??????????? pktinfo_t *?? csinfo_t * ipinfo_t *??? > tcpsinfo_t *? tcpinfo_t *?? int tcplsinfo_t * int > > ??????????? state-change Solaris void????????? csinfo_t * void????????? > tcpsinfo_t *? void????????? tcplsinfo_t * > ??????????? state-change DTv1??? (unknown)???? (unknown) (unknown)???? > (unknown)???? (unknown)???? (unknown) int?????????? int > ??????????? state-change DTv2??? void????? *?? csinfo_t * void???? *??? > tcpsinfo_t *? void????? *?? void * tcplsinfo_t * int > > Here, "DTv1" refers to legacy DTrace on Linux.? I guess we can ignore > that.? By "DTv2" I mean your patch.? For state-change, Solaris calls > some things "void" (not "void *") and tcplsinfo_t* moves from args[5] to > args[6]. That latter one definitely needs fixing; I think in the other cases it's just that we need to fix up the provider description as the fields aren't set for Linux either. From alan.maguire at oracle.com Thu Jul 3 15:18:08 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Thu, 3 Jul 2025 16:18:08 +0100 Subject: [DTrace-devel] [PATCH v2 2/4] dt_impl: bump number of TSLOTS to 8 In-Reply-To: <2d8d7b61-29ef-04ae-bffd-0dcb0709a106@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-3-alan.maguire@oracle.com> <4a079f61-5806-68b8-9051-af918a822adc@oracle.com> <2d8d7b61-29ef-04ae-bffd-0dcb0709a106@oracle.com> Message-ID: <7e2c55b0-9472-42cb-86d0-23b1e5e01ea4@oracle.com> On 02/07/2025 21:22, Eugene Loh wrote: > On 7/2/25 10:52, Alan Maguire wrote: > >> On 01/07/2025 19:31, Eugene Loh wrote: >>> Very simple patch, but one key problem.? The changed line is preceded by >>> a huge comment block that goes to excruciating pains to explain why the >>> value should be 4.? So, I'm fine with the change itself and I think the >>> "Because of the complexity..." sentence in the commit message can be >>> dropped, but the comment block in the file needs to be updated to >>> explain (with corresponding pains?) why we need 8 (not 9, not 7). >>> >> Ah good catch, forgot to do this. I actually think 6 is enough. >> The additional seems to be a result of the various complex inet_ntoa*() >> calls in the tcp.d translators; some have ternary operators with >> multiple inet_ntoa*()s. The origin appears to be dt_cg_ternary_op() and >> that has the comment: >> >> ???????? /* >> ????????? * Strings complicate things a bit because dn_left and dn_right >> might >> ????????? * actually be temporary strings (tstring) *and* in different >> slots. >> ????????? * We need to allocate a new tstring to hold the result, and >> copy the >> ????????? * value into the new tstring (and free any tstrings in >> dn_left and >> ????????? * dn_right). >> ????????? */ >> >> So if we have a left and right ternary and both allocate 3 tstring slots >> for inet_ntoa*()s that means 3*2 = 6 tstring slots should actually be >> enough. Additional testing suggests that is so; does the above make >> sense or are there additional things to consider here? > > Ha!? I do not know;? I prefer NOT to have a headache.? But maybe there > should also be some tstring stress test using inet_ntoa*() and ternary ops. > sure, I can add some tests to operators/tst.ternary.d >> I'll update the >> next version with a comment describing the above anyhow. Thanks for the >> review! >> Alan >> >> >>> On 6/10/25 09:58, Alan Maguire wrote: >>>> Because of the complexity of the TCP translators, more tslots are >>>> needed. >>>> >>>> Signed-off-by: Alan Maguire >>>> --- >>>> ?? libdtrace/dt_impl.h | 2 +- >>>> ?? 1 file changed, 1 insertion(+), 1 deletion(-) >>>> >>>> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h >>>> index 68fb8ec5..10424f9c 100644 >>>> --- a/libdtrace/dt_impl.h >>>> +++ b/libdtrace/dt_impl.h >>>> @@ -218,7 +218,7 @@ typedef struct dt_kern_path { >>>> ??? * - cleanpath() holds a prepended '/' char, a string, an appended >>>> '/' char, >>>> ??? *?? and a terminating NUL char, or STRSZ + 3 chars altogether >>>> ??? */ >>>> -#define DT_TSTRING_SLOTS??? 4 >>>> +#define DT_TSTRING_SLOTS??? 8 >>>> ?? #define DT_TSTRING_SIZE(dtp)??? \ >>>> ?????????? MAX(P2ROUNDUP((dtp)->dt_options[DTRACEOPT_STRSIZE] + 3, >>>> 8), \ >>>> ?????????????? 72) From kris.van.hees at oracle.com Thu Jul 3 15:29:42 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Thu, 3 Jul 2025 11:29:42 -0400 Subject: [DTrace-devel] [PATCH v2 3/4] dtrace: add tcp provider In-Reply-To: <148eabc7-ee00-33fe-b013-0f48fda0163a@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-4-alan.maguire@oracle.com> <8ed88a3b-6e37-a696-d6bb-38745c144995@oracle.com> <148eabc7-ee00-33fe-b013-0f48fda0163a@oracle.com> Message-ID: Not sure if this would confuse things or help, but here is an early attempt I did on implementing a tcp provider: kvh/2.0-branch-dev-tcp >From my recollection it was missing stuff, since it was WIP compared to what Alan has worked on. On Wed, Jul 02, 2025 at 08:02:00PM -0400, Eugene Loh wrote: > On 7/2/25 11:06, Alan Maguire wrote: > > > On 02/07/2025 00:16, Eugene Loh wrote: > > > On most VMs, > > > ??? test/unittest/tcp/tst.ipv4remotetcp.sh > > > ??? test/unittest/tcp/tst.ipv4remotetcpstate.sh > > > xfail due to missing remote.? Are we okay with "shrugging our shoulders" > > > like that? > > Yeah, I don't think the remote test is robust enough. Specifically in > > OCI it seems to always fail. I'd suggest we replace it with creating a > > network namespace with IP addresses configured on top of veths to > > simulate the remote case, the codepaths will be the same. I've done this > > in other test suites and it works well. > > Sounds great (if "we" is "you", haha). > > > > Meanwhile, my one non-OCI VM ran those tests.? The first test passes. > > > The second one consistently reports > > > ??? -tcp:::state-change to time-wait - yes > > > ??? +tcp:::state-change to time-wait - no > > I hit some of these failure during development; adding the > > fbt::tcp_time_wait:entry probe helped. Is that inlined or something > > perhaps (grep tcp_time_wait /proc/kallsyms)? > > On the VM in question: > > # grep -w tcp_time_wait /proc/kallsyms > ffffffff92ad25b0 T tcp_time_wait > # dtrace -lP fbt |& grep tcp_time_wait > 49373??????? fbt?????????? vmlinux???????????????????? tcp_time_wait return > 49372??????? fbt?????????? vmlinux???????????????????? tcp_time_wait entry > # dtrace -lP rawfbt |& grep tcp_time_wait > 51079???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait return > 51078???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait entry > > > > and occasionally reports stuff like > > > ??? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): > > > invalid address (0x1fc0c0000000000) at BPF pc 287 > > > ??? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): > > > invalid address (0x225b80000000000) at BPF pc 287 > > > > > ah, ok there must be a null deref somewhere. Haven't seen this before; > > what kernel version/arch is this? > > 5.15.0-300.161.13.el9uek.x86_64 > > FWIW, I can comment out all probes in tcp other than: > > ??????? { "send", DTRACE_PROBESPEC_NAME, > "rawfbt::ip_send_unicast_reply:entry" }, > > Then I run > > dtrace -c "$testdir/client.ip.pl tcp $dest $tcpport" -qn 'tcp:::send > /args[2]->ip_saddr == "'$source'"/ { tcpsend++; }' > > The disassembly shows that I look up args[2] using dt_bvar_args() (including > checking for a fault).? Then we try to dereference args[2]->ip_saddr.? We > first check the pointer is non NULL.? Then we call dt_cg_load_scalar() to > bpf_probe_read() from the desired location.? This call is problematic. > > > > The non-remote tests fail on OL8 UEK6 (x86 and arm). > > > ??? dtrace: failed to compile script /dev/stdin: > > > ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of > > > inet_ntoa arg#1 (ipaddr_t *): > > > ??? Unknown type name > > > > > This is a weird failure; I see it on some systems but not on others. > > In tcp.d we have > > > > #pragma D depends_on library net.d > > > > which contains the typedef for ipaddr_t ; it seems that's not enough to > > pull in the typedef reliably. I suspect there is a timing element > > involved here in when the net.d library is included. Perhaps there is a > > better way to define ipaddr_t ; would using a builtin typedef in > > _dtrace_typedefs_32/64 work better perhaps? > > Don't know. > > > > The probe names are > > > ? ? tcp:ip:*:*?? ? ?? Solaris > > > ? ? tcp:vmlinux:*:* ? DTv1 > > > ? ? tcp:vmlinux::* ?? with this patch (that is, no more function) > > > I guess precedents have already been set for other SDT providers;? so, > > > okay.? Just noting for my own sake. > > > Meanwhile, the typed args[] have changed in number and type from Solaris> to DTv1 to this patch.? Does that merit discussion? > > Hmm, that's not intentional (aside from the additional INBOUND/OUTBOUND > > etc which we use to help inform translation). > > Worth mentioning somewhere? > > > Do you see other changes aside from them? Thanks! > > This is what I have for typed args[] for tcp probes. > > The typed probe arguments for probes > ??????? accept-[refused|established] > ??????? connect-[refused|established|request] > ??????? receive > are the same as for send. > > The typed probe arguments for state-change may be different. > > So, the typed probe arguments are (wide screen, fixed-width font): > > args[0]:????? args[1]:????? args[2]:????? args[3]: args[4]:????? > args[5]:????? args[6]:????? args[7]: > > ??????????? send Solaris???????? pktinfo_t *?? csinfo_t * ipinfo_t *??? > tcpsinfo_t *? tcpinfo_t * > ??????????? send DTv1??????????? (unknown)???? (unknown) (unknown)???? > (unknown)???? (unknown)???? (unknown) int?????????? int > ??????????? send DTv2??????????? pktinfo_t *?? csinfo_t * ipinfo_t *??? > tcpsinfo_t *? tcpinfo_t *?? int tcplsinfo_t * int > > ??????????? state-change Solaris void????????? csinfo_t * void????????? > tcpsinfo_t *? void????????? tcplsinfo_t * > ??????????? state-change DTv1??? (unknown)???? (unknown) (unknown)???? > (unknown)???? (unknown)???? (unknown) int?????????? int > ??????????? state-change DTv2??? void????? *?? csinfo_t * void???? *??? > tcpsinfo_t *? void????? *?? void * tcplsinfo_t * int > > Here, "DTv1" refers to legacy DTrace on Linux.? I guess we can ignore that.? > By "DTv2" I mean your patch.? For state-change, Solaris calls some things > "void" (not "void *") and tcplsinfo_t* moves from args[5] to args[6]. From kris.van.hees at oracle.com Thu Jul 3 15:38:57 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Thu, 3 Jul 2025 11:38:57 -0400 Subject: [DTrace-devel] [PATCH v2 3/4] dtrace: add tcp provider In-Reply-To: References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-4-alan.maguire@oracle.com> <8ed88a3b-6e37-a696-d6bb-38745c144995@oracle.com> <148eabc7-ee00-33fe-b013-0f48fda0163a@oracle.com> Message-ID: On Thu, Jul 03, 2025 at 11:29:42AM -0400, Kris Van Hees wrote: > Not sure if this would confuse things or help, but here is an early attempt I > did on implementing a tcp provider: > > kvh/2.0-branch-dev-tcp But... it is only on an internal repo since I never finished it :) But perhaps it can help with review. Probably not worth pushing to github because I do not think what I did is useful anymore in view of Alan's work. > >From my recollection it was missing stuff, since it was WIP compared to what > Alan has worked on. > > On Wed, Jul 02, 2025 at 08:02:00PM -0400, Eugene Loh wrote: > > On 7/2/25 11:06, Alan Maguire wrote: > > > > > On 02/07/2025 00:16, Eugene Loh wrote: > > > > On most VMs, > > > > ??? test/unittest/tcp/tst.ipv4remotetcp.sh > > > > ??? test/unittest/tcp/tst.ipv4remotetcpstate.sh > > > > xfail due to missing remote.? Are we okay with "shrugging our shoulders" > > > > like that? > > > Yeah, I don't think the remote test is robust enough. Specifically in > > > OCI it seems to always fail. I'd suggest we replace it with creating a > > > network namespace with IP addresses configured on top of veths to > > > simulate the remote case, the codepaths will be the same. I've done this > > > in other test suites and it works well. > > > > Sounds great (if "we" is "you", haha). > > > > > > Meanwhile, my one non-OCI VM ran those tests.? The first test passes. > > > > The second one consistently reports > > > > ??? -tcp:::state-change to time-wait - yes > > > > ??? +tcp:::state-change to time-wait - no > > > I hit some of these failure during development; adding the > > > fbt::tcp_time_wait:entry probe helped. Is that inlined or something > > > perhaps (grep tcp_time_wait /proc/kallsyms)? > > > > On the VM in question: > > > > # grep -w tcp_time_wait /proc/kallsyms > > ffffffff92ad25b0 T tcp_time_wait > > # dtrace -lP fbt |& grep tcp_time_wait > > 49373??????? fbt?????????? vmlinux???????????????????? tcp_time_wait return > > 49372??????? fbt?????????? vmlinux???????????????????? tcp_time_wait entry > > # dtrace -lP rawfbt |& grep tcp_time_wait > > 51079???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait return > > 51078???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait entry > > > > > > and occasionally reports stuff like > > > > ??? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): > > > > invalid address (0x1fc0c0000000000) at BPF pc 287 > > > > ??? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): > > > > invalid address (0x225b80000000000) at BPF pc 287 > > > > > > > ah, ok there must be a null deref somewhere. Haven't seen this before; > > > what kernel version/arch is this? > > > > 5.15.0-300.161.13.el9uek.x86_64 > > > > FWIW, I can comment out all probes in tcp other than: > > > > ??????? { "send", DTRACE_PROBESPEC_NAME, > > "rawfbt::ip_send_unicast_reply:entry" }, > > > > Then I run > > > > dtrace -c "$testdir/client.ip.pl tcp $dest $tcpport" -qn 'tcp:::send > > /args[2]->ip_saddr == "'$source'"/ { tcpsend++; }' > > > > The disassembly shows that I look up args[2] using dt_bvar_args() (including > > checking for a fault).? Then we try to dereference args[2]->ip_saddr.? We > > first check the pointer is non NULL.? Then we call dt_cg_load_scalar() to > > bpf_probe_read() from the desired location.? This call is problematic. > > > > > > The non-remote tests fail on OL8 UEK6 (x86 and arm). > > > > ??? dtrace: failed to compile script /dev/stdin: > > > > ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of > > > > inet_ntoa arg#1 (ipaddr_t *): > > > > ??? Unknown type name > > > > > > > This is a weird failure; I see it on some systems but not on others. > > > In tcp.d we have > > > > > > #pragma D depends_on library net.d > > > > > > which contains the typedef for ipaddr_t ; it seems that's not enough to > > > pull in the typedef reliably. I suspect there is a timing element > > > involved here in when the net.d library is included. Perhaps there is a > > > better way to define ipaddr_t ; would using a builtin typedef in > > > _dtrace_typedefs_32/64 work better perhaps? > > > > Don't know. > > > > > > The probe names are > > > > ? ? tcp:ip:*:*?? ? ?? Solaris > > > > ? ? tcp:vmlinux:*:* ? DTv1 > > > > ? ? tcp:vmlinux::* ?? with this patch (that is, no more function) > > > > I guess precedents have already been set for other SDT providers;? so, > > > > okay.? Just noting for my own sake. > > > > Meanwhile, the typed args[] have changed in number and type from Solaris> to DTv1 to this patch.? Does that merit discussion? > > > Hmm, that's not intentional (aside from the additional INBOUND/OUTBOUND > > > etc which we use to help inform translation). > > > > Worth mentioning somewhere? > > > > > Do you see other changes aside from them? Thanks! > > > > This is what I have for typed args[] for tcp probes. > > > > The typed probe arguments for probes > > ??????? accept-[refused|established] > > ??????? connect-[refused|established|request] > > ??????? receive > > are the same as for send. > > > > The typed probe arguments for state-change may be different. > > > > So, the typed probe arguments are (wide screen, fixed-width font): > > > > args[0]:????? args[1]:????? args[2]:????? args[3]: args[4]:????? > > args[5]:????? args[6]:????? args[7]: > > > > ??????????? send Solaris???????? pktinfo_t *?? csinfo_t * ipinfo_t *??? > > tcpsinfo_t *? tcpinfo_t * > > ??????????? send DTv1??????????? (unknown)???? (unknown) (unknown)???? > > (unknown)???? (unknown)???? (unknown) int?????????? int > > ??????????? send DTv2??????????? pktinfo_t *?? csinfo_t * ipinfo_t *??? > > tcpsinfo_t *? tcpinfo_t *?? int tcplsinfo_t * int > > > > ??????????? state-change Solaris void????????? csinfo_t * void????????? > > tcpsinfo_t *? void????????? tcplsinfo_t * > > ??????????? state-change DTv1??? (unknown)???? (unknown) (unknown)???? > > (unknown)???? (unknown)???? (unknown) int?????????? int > > ??????????? state-change DTv2??? void????? *?? csinfo_t * void???? *??? > > tcpsinfo_t *? void????? *?? void * tcplsinfo_t * int > > > > Here, "DTv1" refers to legacy DTrace on Linux.? I guess we can ignore that.? > > By "DTv2" I mean your patch.? For state-change, Solaris calls some things > > "void" (not "void *") and tcplsinfo_t* moves from args[5] to args[6]. From alan.maguire at oracle.com Thu Jul 3 16:59:40 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Thu, 3 Jul 2025 17:59:40 +0100 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: References: <20250703113345.1273604-1-alan.maguire@oracle.com> Message-ID: <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> On 03/07/2025 17:43, Eugene Loh wrote: > Reviewed-by: Eugene Loh > Thanks for the review! > I confess I don't understand all the details, but it seems like a nice > improvement.? Thanks. > Creating a network namespace essentially gives you an independent TCP/IP stack on the system; you then connect to it from the main (global network namespace) via a veth pair; one lives on the network namespace side in that TCP/IP stack and the other lives on the local system (global namespace) side; kind of like using a back-to-back cable between two ethernet cards on different systems, but virtually. The upshot is that the networking stack sends to the namespaced interface just like it does for "real" network traffic; that is the benefit it gives us. > I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the > patch 3/4 feedback). > Sorry I couldn't find that issue; is this the 5.15 problem with the ip send probes? > I think you need to update Copyright years in the modified files. And... > Will do, thanks! > On 7/3/25 07:33, Alan Maguire via DTrace-devel wrote: > >> The current approach of looking for remote addresses >> is brittle and fails in many environments; it checks the >> default route gateway and looks for open ports in the TCP >> case. >> >> We can however achieve the same goal reliably by creating >> a network namespace on the system and configuring either >> IPv4 or IPv6 addresses on the namespaced and local veth >> interfaces that support communication between namespaces. >> If a tcp port is required start sshd to listen on that port. > > Maybe a comma after "required"? > yep, will fix. >> Teardown is managed in runtest.sh as signal handling for >> timeouts within the test scripts is not working; a trap >> function does not trigger for TERM. > > I'm having trouble parsing the text before the semicolon.? I think I > understand it, but cannot seem to figure out the grammar. I'll try and rephrase; basically I tried adding a trap cleanup TERM to the test script to catch a SIGTERM when the test timed out; unfortunately this didn't trigger when tests timed out so we were left with network namespaces hanging around. How about Teardown of network namespaces is managed in the toplevel runtest.sh to ensure that network namespaces are removed after test completion for all cases; success, failure and timeout. From eugene.loh at oracle.com Thu Jul 3 17:06:10 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Thu, 3 Jul 2025 13:06:10 -0400 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> References: <20250703113345.1273604-1-alan.maguire@oracle.com> <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> Message-ID: On 7/3/25 12:59, Alan Maguire wrote: > On 03/07/2025 17:43, Eugene Loh wrote: > >> I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the >> patch 3/4 feedback). >> > Sorry I couldn't find that issue; is this the 5.15 problem with the ip > send probes? ??? dtrace: failed to compile script /dev/stdin: ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of inet_ntoa arg#1 (ipaddr_t *): ??? Unknown type name >> On 7/3/25 07:33, Alan Maguire via DTrace-devel wrote: >> >>> The current approach of looking for remote addresses >>> is brittle and fails in many environments; it checks the >>> default route gateway and looks for open ports in the TCP >>> case. >>> >>> We can however achieve the same goal reliably by creating >>> a network namespace on the system and configuring either >>> IPv4 or IPv6 addresses on the namespaced and local veth >>> interfaces that support communication between namespaces. >>> If a tcp port is required start sshd to listen on that port. >> Maybe a comma after "required"? >> > yep, will fix. > >>> Teardown is managed in runtest.sh as signal handling for >>> timeouts within the test scripts is not working; a trap >>> function does not trigger for TERM. >> I'm having trouble parsing the text before the semicolon.? I think I >> understand it, but cannot seem to figure out the grammar. > I'll try and rephrase; basically I tried adding a > > trap cleanup TERM > > to the test script to catch a SIGTERM when the test timed out; > unfortunately this didn't trigger when tests timed out so we were left > with network namespaces hanging around. > > How about > > Teardown of network namespaces is managed in the toplevel runtest.sh to > ensure that network namespaces are removed after test completion for all > cases; success, failure and timeout. Great.? Or how about a colon instead of semicolon? From eugene.loh at oracle.com Thu Jul 3 16:43:06 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Thu, 3 Jul 2025 12:43:06 -0400 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: <20250703113345.1273604-1-alan.maguire@oracle.com> References: <20250703113345.1273604-1-alan.maguire@oracle.com> Message-ID: Reviewed-by: Eugene Loh I confess I don't understand all the details, but it seems like a nice improvement.? Thanks. I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the patch 3/4 feedback). I think you need to update Copyright years in the modified files. And... On 7/3/25 07:33, Alan Maguire via DTrace-devel wrote: > The current approach of looking for remote addresses > is brittle and fails in many environments; it checks the > default route gateway and looks for open ports in the TCP > case. > > We can however achieve the same goal reliably by creating > a network namespace on the system and configuring either > IPv4 or IPv6 addresses on the namespaced and local veth > interfaces that support communication between namespaces. > If a tcp port is required start sshd to listen on that port. Maybe a comma after "required"? > Teardown is managed in runtest.sh as signal handling for > timeouts within the test scripts is not working; a trap > function does not trigger for TERM. I'm having trouble parsing the text before the semicolon.? I think I understand it, but cannot seem to figure out the grammar. From alan.maguire at oracle.com Thu Jul 3 18:02:57 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Thu, 3 Jul 2025 19:02:57 +0100 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: References: <20250703113345.1273604-1-alan.maguire@oracle.com> <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> Message-ID: <58cb8219-337e-42ed-ae17-1c4e77894a9c@oracle.com> On 03/07/2025 18:06, Eugene Loh wrote: > On 7/3/25 12:59, Alan Maguire wrote: > >> On 03/07/2025 17:43, Eugene Loh wrote: >> >>> I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the >>> patch 3/4 feedback). >>> >> Sorry I couldn't find that issue; is this the 5.15 problem with the ip >> send probes? > > ??? dtrace: failed to compile script /dev/stdin: > ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of > inet_ntoa arg#1 (ipaddr_t *): > ??? Unknown type name > Ah, sorry yep I have a fix for that one in the next round. Basically we need to add it to the core set of typedefs and add a type for a pointer to ipaddr_t; we can't rely on the #pragma to include net.d unfortunately. >>> On 7/3/25 07:33, Alan Maguire via DTrace-devel wrote: >>> >>>> The current approach of looking for remote addresses >>>> is brittle and fails in many environments; it checks the >>>> default route gateway and looks for open ports in the TCP >>>> case. >>>> >>>> We can however achieve the same goal reliably by creating >>>> a network namespace on the system and configuring either >>>> IPv4 or IPv6 addresses on the namespaced and local veth >>>> interfaces that support communication between namespaces. >>>> If a tcp port is required start sshd to listen on that port. >>> Maybe a comma after "required"? >>> >> yep, will fix. >> >>>> Teardown is managed in runtest.sh as signal handling for >>>> timeouts within the test scripts is not working; a trap >>>> function does not trigger for TERM. >>> I'm having trouble parsing the text before the semicolon.? I think I >>> understand it, but cannot seem to figure out the grammar. >> I'll try and rephrase; basically I tried adding a >> >> trap cleanup TERM >> >> to the test script to catch a SIGTERM when the test timed out; >> unfortunately this didn't trigger when tests timed out so we were left >> with network namespaces hanging around. >> >> How about >> >> Teardown of network namespaces is managed in the toplevel runtest.sh to >> ensure that network namespaces are removed after test completion for all >> cases; success, failure and timeout. > > Great.? Or how about a colon instead of semicolon? Sure! From alan.maguire at oracle.com Thu Jul 3 16:29:21 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Thu, 3 Jul 2025 17:29:21 +0100 Subject: [DTrace-devel] [PATCH v2 3/4] dtrace: add tcp provider In-Reply-To: References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-4-alan.maguire@oracle.com> <8ed88a3b-6e37-a696-d6bb-38745c144995@oracle.com> <148eabc7-ee00-33fe-b013-0f48fda0163a@oracle.com> Message-ID: <4ea606e1-e9fc-439c-871b-2acf5c0e058d@oracle.com> On 03/07/2025 16:03, Alan Maguire via DTrace-devel wrote: > On 03/07/2025 01:02, Eugene Loh wrote: >> On 7/2/25 11:06, Alan Maguire wrote: >> >>> On 02/07/2025 00:16, Eugene Loh wrote: >>>> On most VMs, >>>> ???? test/unittest/tcp/tst.ipv4remotetcp.sh >>>> ???? test/unittest/tcp/tst.ipv4remotetcpstate.sh >>>> xfail due to missing remote.? Are we okay with "shrugging our shoulders" >>>> like that? >>> Yeah, I don't think the remote test is robust enough. Specifically in >>> OCI it seems to always fail. I'd suggest we replace it with creating a >>> network namespace with IP addresses configured on top of veths to >>> simulate the remote case, the codepaths will be the same. I've done this >>> in other test suites and it works well. >> >> Sounds great (if "we" is "you", haha). >> > > I had a go; see > > https://lore.kernel.org/dtrace/20250703113345.1273604-1-alan.maguire at oracle.com/ > > >>>> Meanwhile, my one non-OCI VM ran those tests.? The first test passes. >>>> The second one consistently reports >>>> ???? -tcp:::state-change to time-wait - yes >>>> ???? +tcp:::state-change to time-wait - no >>> I hit some of these failure during development; adding the >>> fbt::tcp_time_wait:entry probe helped. Is that inlined or something >>> perhaps (grep tcp_time_wait /proc/kallsyms)? >> >> On the VM in question: >> >> # grep -w tcp_time_wait /proc/kallsyms >> ffffffff92ad25b0 T tcp_time_wait >> # dtrace -lP fbt |& grep tcp_time_wait >> 49373??????? fbt?????????? vmlinux???????????????????? tcp_time_wait return >> 49372??????? fbt?????????? vmlinux???????????????????? tcp_time_wait entry >> # dtrace -lP rawfbt |& grep tcp_time_wait >> 51079???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait return >> 51078???? rawfbt?????????? vmlinux???????????????????? tcp_time_wait entry >> > > I'm not sure if it's related, but in testing the IP provider with the > net namespace stuff I saw some weird behaviour with the IP sdt probes > that had multiple underlying probe definitions. If we had a program with > ip:::send and ip:::receive, we were often left one probe short (i.e. no > BPF prog created/attached) whatever the first probe point in the program > was. So if I traced ip:::send then ip:::receive the ip6_finish_output > send probe was missing and the test failed. Reversing the order seemed > to transfer the problem to the receive probe. So maybe there's a general > bug around synthetic probes that's biting us here? Not sure but I'll > investigate further. > >>>> and occasionally reports stuff like >>>> ???? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): >>>> invalid address (0x1fc0c0000000000) at BPF pc 287 >>>> ???? dtrace: error in dt_clause_2 for probe ID 4976 (tcp:vmlinux::send): >>>> invalid address (0x225b80000000000) at BPF pc 287 >>>> >>> ah, ok there must be a null deref somewhere. Haven't seen this before; >>> what kernel version/arch is this? >> >> 5.15.0-300.161.13.el9uek.x86_64 >> >> FWIW, I can comment out all probes in tcp other than: >> >> ??????? { "send", DTRACE_PROBESPEC_NAME, >> "rawfbt::ip_send_unicast_reply:entry" }, >> >> Then I run >> >> dtrace -c "$testdir/client.ip.pl tcp $dest $tcpport" -qn 'tcp:::send / >> args[2]->ip_saddr == "'$source'"/ { tcpsend++; }' >> >> The disassembly shows that I look up args[2] using dt_bvar_args() >> (including checking for a fault).? Then we try to dereference args[2]- >>> ip_saddr.? We first check the pointer is non NULL.? Then we call >> dt_cg_load_scalar() to bpf_probe_read() from the desired location.? This >> call is problematic. >> > > Great, thanks for narrowing this down! > >>>> The non-remote tests fail on OL8 UEK6 (x86 and arm). >>>> ???? dtrace: failed to compile script /dev/stdin: >>>> ???? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of >>>> inet_ntoa arg#1 (ipaddr_t *): >>>> ???? Unknown type name >>>> >>> This is a weird failure; I see it on some systems but not on others. >>> In tcp.d we have >>> >>> #pragma D depends_on library net.d >>> >>> which contains the typedef for ipaddr_t ; it seems that's not enough to >>> pull in the typedef reliably. I suspect there is a timing element >>> involved here in when the net.d library is included. Perhaps there is a >>> better way to define ipaddr_t ; would using a builtin typedef in >>> _dtrace_typedefs_32/64 work better perhaps? >> >> Don't know. >> > > I'll dig into this further. If anyone has hints here it would be great. > Sorted this one at least. We need to add ipaddr_t to the internal set of typedefs _and_ also add a pointer to it to the CTF dict. With that change the ipaddr_t typedef in ip.d can be removed. I'll add a separate patch to the next rev of the series to carry this out prior to the tcp provider patch. Other providers in the future that use inet_ntoa() in translators (e.g. udp.d) will need this too. > >>>> The probe names are >>>> ?? ? tcp:ip:*:*?? ? ?? Solaris >>>> ?? ? tcp:vmlinux:*:* ? DTv1 >>>> ?? ? tcp:vmlinux::* ?? with this patch (that is, no more function) >>>> I guess precedents have already been set for other SDT providers;? so, >>>> okay.? Just noting for my own sake. >>>> Meanwhile, the typed args[] have changed in number and type from >>>> Solaris> to DTv1 to this patch.? Does that merit discussion? >>> Hmm, that's not intentional (aside from the additional INBOUND/OUTBOUND >>> etc which we use to help inform translation). >> >> Worth mentioning somewhere? >> > > I guess though I hadn't really considered the fact that the argN values > become args[] values unless we intervene. > >>> Do you see other changes aside from them? Thanks! >> >> This is what I have for typed args[] for tcp probes. >> >> The typed probe arguments for probes >> ??????? accept-[refused|established] >> ??????? connect-[refused|established|request] >> ??????? receive >> are the same as for send. >> >> The typed probe arguments for state-change may be different. >> >> So, the typed probe arguments are (wide screen, fixed-width font): >> >> args[0]:????? args[1]:????? args[2]:????? args[3]: args[4]:????? >> args[5]:????? args[6]:????? args[7]: >> >> ??????????? send Solaris???????? pktinfo_t *?? csinfo_t * ipinfo_t *??? >> tcpsinfo_t *? tcpinfo_t * >> ??????????? send DTv1??????????? (unknown)???? (unknown) (unknown)???? >> (unknown)???? (unknown)???? (unknown) int?????????? int >> ??????????? send DTv2??????????? pktinfo_t *?? csinfo_t * ipinfo_t *??? >> tcpsinfo_t *? tcpinfo_t *?? int tcplsinfo_t * int >> >> ??????????? state-change Solaris void????????? csinfo_t * void????????? >> tcpsinfo_t *? void????????? tcplsinfo_t * >> ??????????? state-change DTv1??? (unknown)???? (unknown) (unknown)???? >> (unknown)???? (unknown)???? (unknown) int?????????? int >> ??????????? state-change DTv2??? void????? *?? csinfo_t * void???? *??? >> tcpsinfo_t *? void????? *?? void * tcplsinfo_t * int >> >> Here, "DTv1" refers to legacy DTrace on Linux.? I guess we can ignore >> that.? By "DTv2" I mean your patch.? For state-change, Solaris calls >> some things "void" (not "void *") and tcplsinfo_t* moves from args[5] to >> args[6]. > > That latter one definitely needs fixing; I think in the other cases it's > just that we need to fix up the provider description as the fields > aren't set for Linux either. > > _______________________________________________ > DTrace-devel mailing list > DTrace-devel at oss.oracle.com > https://oss.oracle.com/mailman/listinfo/dtrace-devel From kris.van.hees at oracle.com Thu Jul 3 18:26:20 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Thu, 3 Jul 2025 14:26:20 -0400 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: <58cb8219-337e-42ed-ae17-1c4e77894a9c@oracle.com> References: <20250703113345.1273604-1-alan.maguire@oracle.com> <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> <58cb8219-337e-42ed-ae17-1c4e77894a9c@oracle.com> Message-ID: On Thu, Jul 03, 2025 at 07:02:57PM +0100, Alan Maguire wrote: > On 03/07/2025 18:06, Eugene Loh wrote: > > On 7/3/25 12:59, Alan Maguire wrote: > > > >> On 03/07/2025 17:43, Eugene Loh wrote: > >> > >>> I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the > >>> patch 3/4 feedback). > >>> > >> Sorry I couldn't find that issue; is this the 5.15 problem with the ip > >> send probes? > > > > ??? dtrace: failed to compile script /dev/stdin: > > ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of > > inet_ntoa arg#1 (ipaddr_t *): > > ??? Unknown type name > > > > Ah, sorry yep I have a fix for that one in the next round. Basically we > need to add it to the core set of typedefs and add a type for a pointer > to ipaddr_t; we can't rely on the #pragma to include net.d unfortunately. Why can't we rely on the pragma? That is how e.g. the ip provider manages this I believe? I'd really rather not add a type like this to the core set of typedefs we can avoid it, because it really isn't a core type. > >>> On 7/3/25 07:33, Alan Maguire via DTrace-devel wrote: > >>> > >>>> The current approach of looking for remote addresses > >>>> is brittle and fails in many environments; it checks the > >>>> default route gateway and looks for open ports in the TCP > >>>> case. > >>>> > >>>> We can however achieve the same goal reliably by creating > >>>> a network namespace on the system and configuring either > >>>> IPv4 or IPv6 addresses on the namespaced and local veth > >>>> interfaces that support communication between namespaces. > >>>> If a tcp port is required start sshd to listen on that port. > >>> Maybe a comma after "required"? > >>> > >> yep, will fix. > >> > >>>> Teardown is managed in runtest.sh as signal handling for > >>>> timeouts within the test scripts is not working; a trap > >>>> function does not trigger for TERM. > >>> I'm having trouble parsing the text before the semicolon.? I think I > >>> understand it, but cannot seem to figure out the grammar. > >> I'll try and rephrase; basically I tried adding a > >> > >> trap cleanup TERM > >> > >> to the test script to catch a SIGTERM when the test timed out; > >> unfortunately this didn't trigger when tests timed out so we were left > >> with network namespaces hanging around. > >> > >> How about > >> > >> Teardown of network namespaces is managed in the toplevel runtest.sh to > >> ensure that network namespaces are removed after test completion for all > >> cases; success, failure and timeout. > > > > Great.? Or how about a colon instead of semicolon? > > Sure! From alan.maguire at oracle.com Thu Jul 3 18:41:41 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Thu, 3 Jul 2025 19:41:41 +0100 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: References: <20250703113345.1273604-1-alan.maguire@oracle.com> <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> <58cb8219-337e-42ed-ae17-1c4e77894a9c@oracle.com> Message-ID: On 03/07/2025 19:26, Kris Van Hees wrote: > On Thu, Jul 03, 2025 at 07:02:57PM +0100, Alan Maguire wrote: >> On 03/07/2025 18:06, Eugene Loh wrote: >>> On 7/3/25 12:59, Alan Maguire wrote: >>> >>>> On 03/07/2025 17:43, Eugene Loh wrote: >>>> >>>>> I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the >>>>> patch 3/4 feedback). >>>>> >>>> Sorry I couldn't find that issue; is this the 5.15 problem with the ip >>>> send probes? >>> >>> ??? dtrace: failed to compile script /dev/stdin: >>> ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of >>> inet_ntoa arg#1 (ipaddr_t *): >>> ??? Unknown type name >>> >> >> Ah, sorry yep I have a fix for that one in the next round. Basically we >> need to add it to the core set of typedefs and add a type for a pointer >> to ipaddr_t; we can't rely on the #pragma to include net.d unfortunately. > > Why can't we rely on the pragma? That is how e.g. the ip provider manages > this I believe? > Unfortunately the #pragma include doesn't do enough; it just defines a type for ipaddr_t , not a type for a _pointer_ to an ipaddr_t , which is what we need as a parameter to inet_ntoa(). I tried adding the ipaddr_t typedef to net.d and doing the pointer lookup/addition but that doesn't work either. Seems we need the core typedef + pointer addition or we hit this failure. > I'd really rather not add a type like this to the core set of typedefs we > can avoid it, because it really isn't a core type. > I can't see another way round this currently unfortunately. Alan From kris.van.hees at oracle.com Thu Jul 3 19:03:22 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Thu, 3 Jul 2025 15:03:22 -0400 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: References: <20250703113345.1273604-1-alan.maguire@oracle.com> <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> <58cb8219-337e-42ed-ae17-1c4e77894a9c@oracle.com> Message-ID: On Thu, Jul 03, 2025 at 07:41:41PM +0100, Alan Maguire wrote: > On 03/07/2025 19:26, Kris Van Hees wrote: > > On Thu, Jul 03, 2025 at 07:02:57PM +0100, Alan Maguire wrote: > >> On 03/07/2025 18:06, Eugene Loh wrote: > >>> On 7/3/25 12:59, Alan Maguire wrote: > >>> > >>>> On 03/07/2025 17:43, Eugene Loh wrote: > >>>> > >>>>> I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the > >>>>> patch 3/4 feedback). > >>>>> > >>>> Sorry I couldn't find that issue; is this the 5.15 problem with the ip > >>>> send probes? > >>> > >>> ??? dtrace: failed to compile script /dev/stdin: > >>> ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of > >>> inet_ntoa arg#1 (ipaddr_t *): > >>> ??? Unknown type name > >>> > >> > >> Ah, sorry yep I have a fix for that one in the next round. Basically we > >> need to add it to the core set of typedefs and add a type for a pointer > >> to ipaddr_t; we can't rely on the #pragma to include net.d unfortunately. > > > > Why can't we rely on the pragma? That is how e.g. the ip provider manages > > this I believe? > > > > Unfortunately the #pragma include doesn't do enough; it just defines a > type for ipaddr_t , not a type for a _pointer_ to an ipaddr_t , which is > what we need as a parameter to inet_ntoa(). I tried adding the ipaddr_t > typedef to net.d and doing the pointer lookup/addition but that doesn't > work either. Seems we need the core typedef + pointer addition or we hit > this failure. Actually, if you move 'typedef __be32 ipaddr_t;' from ip.d to net.d, you should be set. That is what I did in my priliminary tcp provider impl. I do believe that works. Either way, we use inet_ntoa() in the ip.d translators and that works with that typedef in the file, so this really ought to work. > > I'd really rather not add a type like this to the core set of typedefs we > > can avoid it, because it really isn't a core type. > > > > I can't see another way round this currently unfortunately. > > Alan From eugene.loh at oracle.com Thu Jul 3 19:55:08 2025 From: eugene.loh at oracle.com (Eugene Loh) Date: Thu, 3 Jul 2025 15:55:08 -0400 Subject: [DTrace-devel] [PATCH v2 3/4] dtrace: add tcp provider In-Reply-To: <20250610135813.15746-4-alan.maguire@oracle.com> References: <20250610135813.15746-1-alan.maguire@oracle.com> <20250610135813.15746-4-alan.maguire@oracle.com> Message-ID: <51b72ab6-2a75-a48e-4b15-fe51be4f6ba5@oracle.com> In general, there are lots of code paths here.? Ideally, they would all get tested, but I know that's hard. Also... On 6/10/25 09:58, Alan Maguire wrote: > diff --git a/libdtrace/dt_prov_tcp.c b/libdtrace/dt_prov_tcp.c > +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl) > +{ > + dt_irlist_t *dlp = &pcb->pcb_ir; > + dt_probe_t *prp = pcb->pcb_probe; > + dt_probe_t *uprp = pcb->pcb_parent_probe; > + int direction, have_iphdr; > + int skarg = 0, skbarg = 1, tcparg = 0; > + int skarg_maybe_null; > + int skstate = 0; > + > + /* > + * We construct the tcp::: probe arguments as > + * follows: > + * args[0] = skb > + * args[1] = sk > + * args[2] = ip_hdr(skb) [if available] > + * args[3] = sk [struct tcp_sock *] > + * args[4] = tcp_hdr(skb) > + * args[5] = sk->sk_state > + * args[6] = sk->sk_state > + * args[7] = NET_PROBE_INBOUND (0x1) | NET_PROBE_OUTBOUND (0x0) > + */ > + > + if (strcmp(prp->desc->prb, "state-change") == 0) { > + int newstatearg; > + int skip_state = 0; > + int check_proto = IPPROTO_TCP; > + > + /* For pre-6.14 kernels, inet_sock_state_change() to > + * TCP_SYN_RCV is broken in that the cloned socket has > + * not yet copied info of interest like addresses, ports. > + * This is fixed in 6.14 via > + * > + * commit a3a128f611a965fddf8a02dd45716f96e0738e00 > + * Author: Eric Dumazet > + * Date: Wed Feb 12 13:13:28 2025 +0000 > + * > + * inet: consolidate inet_csk_clone_lock() > + * > + * To work around this we trace inet_csk_clone_lock and > + * use the reqsk (arg1) as the means to populate the > + * struct tcpinfo. We need then to explicitly set the > + * state to TCP_SYN_RCV and also skip the case where > + * inet_sock_set_state() specifies TCP_SYN_RCV otherwise > + * we will get a probe double-firing. > + */ > + if (strcmp(uprp->desc->fun, "inet_csk_clone_lock") == 0) { > + skarg = 1; > + newstatearg = 2; > + check_proto = 0; > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), > + BPF_TCP_SYN_RECV)); > + } else if (strcmp(uprp->desc->fun, "tcp_time_wait") == 0) { > + skarg = 0; > + newstatearg = 1; > + } else { > + skarg = 0; > + newstatearg = 2; > + skip_state = BPF_TCP_SYN_RECV; > + } > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skarg))); > + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, 0, exitlbl)); > + /* check it is a TCP socket */ > + if (check_proto) { > + dt_cg_get_member(pcb, "struct sock", BPF_REG_6, > + "sk_protocol"); > + emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, > + IPPROTO_TCP, exitlbl)); > + } > + /* save sk */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(skarg))); BTW, is it actually necessary to reload %r6 here? > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(3), BPF_REG_6)); > + > + /* save new state */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(newstatearg))); > + if (skip_state) { > + emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_6, skip_state, > + exitlbl)); > + } > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(6), BPF_REG_6)); > + > + /* save sk */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(3))); > + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_6)); > + > + /* save empty args */ > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(0), 0)); > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(2), 0)); > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(4), 0)); > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(5), 0)); > + > + /* NET_PROBE_STATE */ > + emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(7), > + NET_PROBE_STATE)); > + return 0; > + } > + > + if (strcmp(prp->desc->prb, "accept-established") == 0) { > + direction = NET_PROBE_OUTBOUND; > + have_iphdr = 1; > + /* skb in arg2 not arg1 */ > + skbarg = 2; > + skarg_maybe_null = 0; > + /* ensure arg1 is BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB */ > + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_7, DMST_ARG(1))); > + emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_6, > + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, > + exitlbl)); > + } else if (strcmp(prp->desc->prb, "receive") == 0 || > + strcmp(prp->desc->prb, "accept-refused") == 0) { > + direction = NET_PROBE_INBOUND; > + have_iphdr = 1; > + if (strcmp(uprp->desc->fun, "tcp_v4_send_reset") == 0 || > + strcmp(uprp->desc->fun, "tcp_v6_send_reset") == 0) > + skarg_maybe_null = 1; > + else > + skarg_maybe_null = 0; > + } else if (strcmp(prp->desc->prb, "connect-established") == 0) { > + direction = NET_PROBE_INBOUND; > + have_iphdr = 1; > + skarg_maybe_null = 0; > + } else if (strcmp(prp->desc->prb, "connect-refused") == 0) { > + direction = NET_PROBE_INBOUND; > + have_iphdr = 1; > + skarg_maybe_null = 0; > + skstate = BPF_TCP_SYN_SENT; > + } else { > + direction = NET_PROBE_OUTBOUND; > + if (strcmp(uprp->desc->fun, "ip_send_unicast_reply") == 0) { > + /* NULL sk in arg1 not arg2 (we dont want ctl_sk) */ > + skarg = 1; > + /* skb in arg2 not arg1 */ > + skbarg = 2; > + have_iphdr = 1; > + /* tcp hdr in ip_reply_arg * */ > + tcparg = 6; > + skarg_maybe_null = 1; > + } else if (strcmp(uprp->desc->fun, "ip_build_and_send_pkt") == 0) { > + skarg = 1; > + skbarg = 0; > + have_iphdr = 0; > + skarg_maybe_null = 1; > + } else if (strcmp(prp->desc->prb, "connect-request") == 0) { > + skstate = BPF_TCP_SYN_SENT; > + have_iphdr = 0; > + skarg_maybe_null = 0; > + } else { > + have_iphdr = 0; > + skarg_maybe_null = 0; > + } > + } > + From alan.maguire at oracle.com Thu Jul 3 20:23:46 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Thu, 3 Jul 2025 21:23:46 +0100 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: References: <20250703113345.1273604-1-alan.maguire@oracle.com> <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> <58cb8219-337e-42ed-ae17-1c4e77894a9c@oracle.com> Message-ID: <077a08c1-e999-4782-9269-62a269d76f65@oracle.com> On 03/07/2025 20:03, Kris Van Hees wrote: > On Thu, Jul 03, 2025 at 07:41:41PM +0100, Alan Maguire wrote: >> On 03/07/2025 19:26, Kris Van Hees wrote: >>> On Thu, Jul 03, 2025 at 07:02:57PM +0100, Alan Maguire wrote: >>>> On 03/07/2025 18:06, Eugene Loh wrote: >>>>> On 7/3/25 12:59, Alan Maguire wrote: >>>>> >>>>>> On 03/07/2025 17:43, Eugene Loh wrote: >>>>>> >>>>>>> I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the >>>>>>> patch 3/4 feedback). >>>>>>> >>>>>> Sorry I couldn't find that issue; is this the 5.15 problem with the ip >>>>>> send probes? >>>>> >>>>> ??? dtrace: failed to compile script /dev/stdin: >>>>> ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of >>>>> inet_ntoa arg#1 (ipaddr_t *): >>>>> ??? Unknown type name >>>>> >>>> >>>> Ah, sorry yep I have a fix for that one in the next round. Basically we >>>> need to add it to the core set of typedefs and add a type for a pointer >>>> to ipaddr_t; we can't rely on the #pragma to include net.d unfortunately. >>> >>> Why can't we rely on the pragma? That is how e.g. the ip provider manages >>> this I believe? >>> >> >> Unfortunately the #pragma include doesn't do enough; it just defines a >> type for ipaddr_t , not a type for a _pointer_ to an ipaddr_t , which is >> what we need as a parameter to inet_ntoa(). I tried adding the ipaddr_t >> typedef to net.d and doing the pointer lookup/addition but that doesn't >> work either. Seems we need the core typedef + pointer addition or we hit >> this failure. > > Actually, if you move 'typedef __be32 ipaddr_t;' from ip.d to net.d, > you should be set. That is what I did in my priliminary tcp provider impl. > I do believe that works. Either way, we use inet_ntoa() in the ip.d > translators and that works with that typedef in the file, so this really ought > to work. > Yep, I tried that in the v2 patch series; Eugene hit the undefined error in one test and I now hit it consistently for all tcp/ip tests unfortunately with "typedef __be32 ipaddr_t;" in net.d. My assumption (probably wrong) is that the include of the library does happen but nothing triggers the pointer type generation for "ipaddr *" in the CTF dict. If there was a way to force that type generation at the .d file level that would be great, not sure I see a way currently tho. Alan >>> I'd really rather not add a type like this to the core set of typedefs we >>> can avoid it, because it really isn't a core type. >>> >> >> I can't see another way round this currently unfortunately. >> >> Alan From kris.van.hees at oracle.com Thu Jul 3 20:59:44 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Thu, 3 Jul 2025 16:59:44 -0400 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: <077a08c1-e999-4782-9269-62a269d76f65@oracle.com> References: <20250703113345.1273604-1-alan.maguire@oracle.com> <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> <58cb8219-337e-42ed-ae17-1c4e77894a9c@oracle.com> <077a08c1-e999-4782-9269-62a269d76f65@oracle.com> Message-ID: On Thu, Jul 03, 2025 at 09:23:46PM +0100, Alan Maguire wrote: > On 03/07/2025 20:03, Kris Van Hees wrote: > > On Thu, Jul 03, 2025 at 07:41:41PM +0100, Alan Maguire wrote: > >> On 03/07/2025 19:26, Kris Van Hees wrote: > >>> On Thu, Jul 03, 2025 at 07:02:57PM +0100, Alan Maguire wrote: > >>>> On 03/07/2025 18:06, Eugene Loh wrote: > >>>>> On 7/3/25 12:59, Alan Maguire wrote: > >>>>> > >>>>>> On 03/07/2025 17:43, Eugene Loh wrote: > >>>>>> > >>>>>>> I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the > >>>>>>> patch 3/4 feedback). > >>>>>>> > >>>>>> Sorry I couldn't find that issue; is this the 5.15 problem with the ip > >>>>>> send probes? > >>>>> > >>>>> ??? dtrace: failed to compile script /dev/stdin: > >>>>> ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of > >>>>> inet_ntoa arg#1 (ipaddr_t *): > >>>>> ??? Unknown type name > >>>>> > >>>> > >>>> Ah, sorry yep I have a fix for that one in the next round. Basically we > >>>> need to add it to the core set of typedefs and add a type for a pointer > >>>> to ipaddr_t; we can't rely on the #pragma to include net.d unfortunately. > >>> > >>> Why can't we rely on the pragma? That is how e.g. the ip provider manages > >>> this I believe? > >>> > >> > >> Unfortunately the #pragma include doesn't do enough; it just defines a > >> type for ipaddr_t , not a type for a _pointer_ to an ipaddr_t , which is > >> what we need as a parameter to inet_ntoa(). I tried adding the ipaddr_t > >> typedef to net.d and doing the pointer lookup/addition but that doesn't > >> work either. Seems we need the core typedef + pointer addition or we hit > >> this failure. > > > > Actually, if you move 'typedef __be32 ipaddr_t;' from ip.d to net.d, > > you should be set. That is what I did in my priliminary tcp provider impl. > > I do believe that works. Either way, we use inet_ntoa() in the ip.d > > translators and that works with that typedef in the file, so this really ought > > to work. > Yep, I tried that in the v2 patch series; Eugene hit the undefined error > in one test and I now hit it consistently for all tcp/ip tests > unfortunately with "typedef __be32 ipaddr_t;" in net.d. > > My assumption (probably wrong) is that the include of the library does > happen but nothing triggers the pointer type generation for "ipaddr *" > in the CTF dict. If there was a way to force that type generation at the > .d file level that would be great, not sure I see a way currently tho. Well, like I said, it does work for ip.d so I don't see why this would be any different. I'll have a look and see if I can figure something out. Kris From kris.van.hees at oracle.com Thu Jul 3 22:36:27 2025 From: kris.van.hees at oracle.com (Kris Van Hees) Date: Thu, 3 Jul 2025 18:36:27 -0400 Subject: [DTrace-devel] [PATCH] test/utils: add more reliable "get remote address" approach In-Reply-To: References: <20250703113345.1273604-1-alan.maguire@oracle.com> <08bae006-f438-4134-a811-609b3fda69d1@oracle.com> <58cb8219-337e-42ed-ae17-1c4e77894a9c@oracle.com> <077a08c1-e999-4782-9269-62a269d76f65@oracle.com> Message-ID: On Thu, Jul 03, 2025 at 04:59:44PM -0400, Kris Van Hees wrote: > On Thu, Jul 03, 2025 at 09:23:46PM +0100, Alan Maguire wrote: > > On 03/07/2025 20:03, Kris Van Hees wrote: > > > On Thu, Jul 03, 2025 at 07:41:41PM +0100, Alan Maguire wrote: > > >> On 03/07/2025 19:26, Kris Van Hees wrote: > > >>> On Thu, Jul 03, 2025 at 07:02:57PM +0100, Alan Maguire wrote: > > >>>> On 03/07/2025 18:06, Eugene Loh wrote: > > >>>>> On 7/3/25 12:59, Alan Maguire wrote: > > >>>>> > > >>>>>> On 03/07/2025 17:43, Eugene Loh wrote: > > >>>>>> > > >>>>>>> I tested and it looks good (modulo the OL8 UEK6 issue mentioned in the > > >>>>>>> patch 3/4 feedback). > > >>>>>>> > > >>>>>> Sorry I couldn't find that issue; is this the 5.15 problem with the ip > > >>>>>> send probes? > > >>>>> > > >>>>> ??? dtrace: failed to compile script /dev/stdin: > > >>>>> ??? ".../build/dlibs/5.2/tcp.d", line 177: failed to resolve type of > > >>>>> inet_ntoa arg#1 (ipaddr_t *): > > >>>>> ??? Unknown type name > > >>>>> > > >>>> > > >>>> Ah, sorry yep I have a fix for that one in the next round. Basically we > > >>>> need to add it to the core set of typedefs and add a type for a pointer > > >>>> to ipaddr_t; we can't rely on the #pragma to include net.d unfortunately. > > >>> > > >>> Why can't we rely on the pragma? That is how e.g. the ip provider manages > > >>> this I believe? > > >>> > > >> > > >> Unfortunately the #pragma include doesn't do enough; it just defines a > > >> type for ipaddr_t , not a type for a _pointer_ to an ipaddr_t , which is > > >> what we need as a parameter to inet_ntoa(). I tried adding the ipaddr_t > > >> typedef to net.d and doing the pointer lookup/addition but that doesn't > > >> work either. Seems we need the core typedef + pointer addition or we hit > > >> this failure. > > > > > > Actually, if you move 'typedef __be32 ipaddr_t;' from ip.d to net.d, > > > you should be set. That is what I did in my priliminary tcp provider impl. > > > I do believe that works. Either way, we use inet_ntoa() in the ip.d > > > translators and that works with that typedef in the file, so this really ought > > > to work. > > > Yep, I tried that in the v2 patch series; Eugene hit the undefined error > > in one test and I now hit it consistently for all tcp/ip tests > > unfortunately with "typedef __be32 ipaddr_t;" in net.d. > > > > My assumption (probably wrong) is that the include of the library does > > happen but nothing triggers the pointer type generation for "ipaddr *" > > in the CTF dict. If there was a way to force that type generation at the > > .d file level that would be great, not sure I see a way currently tho. > > Well, like I said, it does work for ip.d so I don't see why this would be > any different. I'll have a look and see if I can figure something out. Looking into this more, I think the problem is simply that you did not sync all the dlibs for the various kernel versions with the updated ip.d, net.d, and tcp.d files. So, if the kernel on the OL8 instance you test on does not have your change, it will fail. Also, I do not understand why you removed the pragma #pragma D depends_on provider tcp from tcp.d. From alan.maguire at oracle.com Fri Jul 4 17:23:35 2025 From: alan.maguire at oracle.com (Alan Maguire) Date: Fri, 4 Jul 2025 18:23:35 +0100 Subject: [DTrace-devel] [PATCH v2] test/utils: add more reliable "get remote address" approach Message-ID: <20250704172335.98854-1-alan.maguire@oracle.com> The current approach of looking for remote addresses is brittle and fails in many environments; it checks the default route gateway and looks for open ports in the TCP case. We can however achieve the same goal reliably by creating a network namespace on the system and configuring either IPv4 or IPv6 addresses on the namespaced and local veth interfaces that support communication between namespaces. If a tcp port is required, start sshd to listen on that port. Teardown of network namespaces is managed in the toplevel runtest.sh to ensure that network namespaces are removed after test completion for all cases: success, failure and timeout. Move the get_remote.sh script to test/utils also as it seems a more natural location. One issue - this cannot be run on a local system with a VPN running as the VPN connection is pretty aggressive in disconnecting/reconnecting when spotting a link-up event associated with the global netns side of the veth. However in my experience the remote IP tests do not work reliably in that environment anyway. Reviewed-by: Eugene Loh Signed-off-by: Alan Maguire --- runtest.sh | 2 + test/unittest/ip/get.ipv4remote.pl | 87 --------------------- test/unittest/ip/get.ipv6remote.pl | 70 ----------------- test/unittest/ip/tst.ipv4remoteicmp.sh | 12 +-- test/unittest/ip/tst.ipv4remotetcp.sh | 27 +++---- test/unittest/ip/tst.ipv4remoteudp.sh | 10 +-- test/unittest/ip/tst.ipv6remoteicmp.sh | 18 ++--- test/unittest/tcp/tst.ipv4remotetcp.sh | 26 ++---- test/unittest/tcp/tst.ipv4remotetcpstate.sh | 31 +++----- test/unittest/udp/tst.ipv4remoteudp.sh | 6 +- test/utils/get_remote.sh | 71 +++++++++++++++++ 11 files changed, 123 insertions(+), 237 deletions(-) delete mode 100755 test/unittest/ip/get.ipv4remote.pl delete mode 100755 test/unittest/ip/get.ipv6remote.pl create mode 100755 test/utils/get_remote.sh diff --git a/runtest.sh b/runtest.sh index 156e7dec..9f06a499 100755 --- a/runtest.sh +++ b/runtest.sh @@ -1473,6 +1473,8 @@ for dt in $dtrace; do log "\n" + test/utils/get_remote.sh cleanup + if [[ -n $regression ]]; then # If regtesting, we run a second time, with intermediate results # displayed, and output redirected to a per-test, per-dtrace diff --git a/test/unittest/ip/get.ipv4remote.pl b/test/unittest/ip/get.ipv4remote.pl deleted file mode 100755 index 3cc47d01..00000000 --- a/test/unittest/ip/get.ipv4remote.pl +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/perl -w -# -# Oracle Linux DTrace. -# Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at -# http://oss.oracle.com/licenses/upl. -# - -# -# get.ipv4remote.pl [tcpport] -# -# Find an IPv4 reachable remote host using both ip(8) and ping(8). -# If a tcpport is specified, return a host that is also listening on this -# TCP port. Print the local address and the remote address, or an -# error message if no suitable remote host was found. Exit status is 0 if -# a host was found. (Note: the only host we check is the gateway. Nobody -# responds to broadcast pings these days, and portscanning the local net is -# unfriendly.) -# - -use strict; -use IO::Socket; - -my $TIMEOUT = 3; -my $tcpport = @ARGV == 1 ? $ARGV[0] : 0; - -# -# Determine gateway IP address -# - -my $local = ""; -my $remote = ""; -my $responsive = ""; -my $up; -open IP, '/sbin/ip -o -4 route show |' or die "Couldn't run ip route show: $!\n"; -while () { - next unless /^default /; - - if (/via (\S+)/) { - $remote = $1; - } -} -close IP; -die "Could not determine gateway router IP address" if $remote eq ""; - -open IP, "/sbin/ip -o route get to $remote |" or die "Couldn't run ip route get: $!\n"; -while () { - next unless /^$remote /; - if (/src (\S+)/) { - $local = $1; - } -} -close IP; -die "Could not determine local IP address" if $local eq ""; - -# -# See if the rmote host responds to an icmp echo. -# -open PING, "/bin/ping -n -s 56 -w $TIMEOUT $remote |" or - die "Couldn't run ping: $!\n"; -while () { - if (/bytes from (.*): /) { - my $addr = $1; - - if ($tcpport != 0) { - # - # Test TCP - # - my $socket = IO::Socket::INET->new( - Proto => "tcp", - PeerAddr => $addr, - PeerPort => $tcpport, - Timeout => $TIMEOUT, - ); - next unless $socket; - close $socket; - } - - $responsive = $addr; - last; - } -} -close PING; -die "Can't find a remote host for testing: No suitable response from " . - "$remote\n" if $responsive eq ""; - -print "$local $responsive\n"; diff --git a/test/unittest/ip/get.ipv6remote.pl b/test/unittest/ip/get.ipv6remote.pl deleted file mode 100755 index b2136c5b..00000000 --- a/test/unittest/ip/get.ipv6remote.pl +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/perl -w -# -# Oracle Linux DTrace. -# Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at -# http://oss.oracle.com/licenses/upl. -# - -# -# get.ipv6remote.pl -# -# Find an IPv6 reachable remote host using both ip(8) and ping(8). -# Print the local address and the remote address, or print nothing if either -# no IPv6 interfaces or remote hosts were found. (Remote IPv6 testing is -# considered optional, and so not finding another IPv6 host is not an error -# state we need to log.) Exit status is 0 if a host was found. -# - -use strict; -use IO::Socket; - -my $TIMEOUT = 3; # connection timeout - -# possible paths for ping6 -$ENV{'PATH'} = "/bin:/usr/bin:/sbin:/usr/sbin:$ENV{'PATH'}"; - -# -# Determine local IP address -# -my $local = ""; -my $remote = ""; -my $responsive = ""; -my $up; -open IP, '/sbin/ip -o -6 route show |' or die "Couldn't run ip route show: $!\n"; -while () { - next unless /^default /; - - if (/via (\S+)/) { - $remote = $1; - } -} -close IP; -die "Could not determine gateway router IPv6 address" if $remote eq ""; - -open IP, "/sbin/ip -o route get to $remote |" or die "Couldn't run ip route get: $!\n"; -while () { - next unless /^$remote /; - if (/src (\S+)/) { - $local = $1; - } -} -close IP; -die "Could not determine local IPv6 address" if $local eq ""; - -# -# Find the first remote host that responds to an icmp echo, -# which isn't a local address. -# -open PING, "ping6 -n -s 56 -w $TIMEOUT $remote 2>/dev/null |" or - die "Couldn't run ping: $!\n"; -while () { - if (/bytes from (.*): /) { - $responsive = $1; - last; - } -} -close PING; -exit 2 if $responsive eq ""; - -print "$local $responsive\n"; diff --git a/test/unittest/ip/tst.ipv4remoteicmp.sh b/test/unittest/ip/tst.ipv4remoteicmp.sh index c165cbdc..033dae16 100755 --- a/test/unittest/ip/tst.ipv4remoteicmp.sh +++ b/test/unittest/ip/tst.ipv4remoteicmp.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Oracle Linux DTrace. -# Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at # http://oss.oracle.com/licenses/upl. # @@ -13,9 +13,7 @@ # # 1. A change to the ip stack breaking expected probe behavior, # which is the reason we are testing. -# 2. No physical network interface is plumbed and up. -# 3. The subnet gateway is not reachable. -# 4. An unrelated ICMP between these hosts was traced by accident. +# 2. An unrelated ICMP between these hosts was traced by accident. # if (( $# != 1 )); then @@ -25,18 +23,20 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/get.ipv4remote.pl +getaddr=$testdir/../../utils/get_remote.sh if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -set -- $($getaddr) + +set -- $($getaddr ipv4) source="$1" dest="$2" if [[ $? -ne 0 ]] || [[ -z $dest ]]; then exit 67 fi + $dtrace $dt_flags -c "$testdir/perlping.pl icmp $dest" -qs /dev/stdin <&2 exit 3 fi -for port in $tcpports ; do - res=`$getaddr $port 2>/dev/null` - if (( $? == 0 )); then - read s d <<< $res - tcpport=$port - source=$s - dest=$d - break - fi -done -if [ -z $tcpport ]; then +set -- $($getaddr ipv4 $tcpport) +source="$1" +dest="$2" + +if [[ $? -ne 0 ]] || [[ -z $dest ]]; then exit 67 fi diff --git a/test/unittest/ip/tst.ipv4remoteudp.sh b/test/unittest/ip/tst.ipv4remoteudp.sh index 3d25e1f5..08e32740 100755 --- a/test/unittest/ip/tst.ipv4remoteudp.sh +++ b/test/unittest/ip/tst.ipv4remoteudp.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Oracle Linux DTrace. -# Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at # http://oss.oracle.com/licenses/upl. # @@ -13,9 +13,7 @@ # # 1. A change to the ip stack breaking expected probe behavior, # which is the reason we are testing. -# 2. No physical network interface is plumbed and up. -# 3. The gateway is not reachable and listening on rpcbind. -# 4. An unlikely race causes the unlocked global send/receive +# 2. An unlikely race causes the unlocked global send/receive # variables to be corrupted. # # This test sends a UDP message using ping and checks that at least the @@ -31,13 +29,13 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/get.ipv4remote.pl +getaddr=$testdir/../../utils/get_remote.sh if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -set -- $($getaddr) +set -- $($getaddr ipv4) source="$1" dest="$2" if [[ $? -ne 0 ]] || [[ -z $dest ]]; then diff --git a/test/unittest/ip/tst.ipv6remoteicmp.sh b/test/unittest/ip/tst.ipv6remoteicmp.sh index 90fd48b4..d0eef77f 100755 --- a/test/unittest/ip/tst.ipv6remoteicmp.sh +++ b/test/unittest/ip/tst.ipv6remoteicmp.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Oracle Linux DTrace. -# Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at # http://oss.oracle.com/licenses/upl. # @@ -19,7 +19,7 @@ # # @@tags: unstable -# possible paths for ping6 +# possible paths for ping export PATH=/bin:/usr/bin:/sbin:/usr/sbin:$PATH if (( $# != 1 )); then @@ -29,24 +29,24 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/get.ipv6remote.pl +getaddr=$testdir/../../utils/get_remote.sh if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -set -- $($getaddr) + +set -- $($getaddr ipv6) source="$1" dest="$2" + if [[ $? -ne 0 ]] || [[ -z $dest ]]; then echo -n "Could not find a local IPv6 interface and a remote IPv6 " >&2 echo "host. Aborting test." >&2 exit 67 fi -nolinkdest="$(printf "%s" "$dest" | sed 's,%.*,,')" - -$dtrace $dt_flags -c "ping6 -c 6 $dest" -qs /dev/stdin <ip_saddr == "$source" && args[2]->ip_daddr == "$nolinkdest" && +/args[2]->ip_saddr == "$source" && args[2]->ip_daddr == "$dest" && args[5]->ipv6_nexthdr == IPPROTO_ICMPV6 && args[2]->ip_plength > 32/ { printf("1 ip:::send ("); @@ -64,7 +64,7 @@ ip:::send } ip:::receive -/args[2]->ip_saddr == "$nolinkdest" && args[2]->ip_daddr == "$source" && +/args[2]->ip_saddr == "$dest" && args[2]->ip_daddr == "$source" && args[5]->ipv6_nexthdr == IPPROTO_ICMPV6 && args[2]->ip_plength > 32/ { printf("2 ip:::receive ("); diff --git a/test/unittest/tcp/tst.ipv4remotetcp.sh b/test/unittest/tcp/tst.ipv4remotetcp.sh index 333760a1..78ae369d 100755 --- a/test/unittest/tcp/tst.ipv4remotetcp.sh +++ b/test/unittest/tcp/tst.ipv4remotetcp.sh @@ -2,7 +2,7 @@ # # Oracle Linux DTrace. -# Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at # http://oss.oracle.com/licenses/upl. @@ -13,9 +13,7 @@ # # 1. A change to the tcp stack breaking expected probe behavior, # which is the reason we are testing. -# 2. No physical network interface is plumbed and up. -# 3. No other hosts on this subnet are reachable and listening on ssh. -# 4. An unlikely race causes the unlocked global send/receive +# 2. An unlikely race causes the unlocked global send/receive # variables to be corrupted. # # This test performs a TCP connection and checks that at least the @@ -32,9 +30,8 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/../ip/get.ipv4remote.pl -tcpports="22 80" -tcpport="" +getaddr=$testdir/../../utils/get_remote.sh +tcpport="22" dest="" if [[ ! -x $getaddr ]]; then @@ -42,18 +39,11 @@ if [[ ! -x $getaddr ]]; then exit 3 fi -for port in $tcpports ; do - res=`$getaddr $port 2>/dev/null` - if (( $? == 0 )); then - read s d <<< $res - tcpport=$port - source=$s - dest=$d - break - fi -done +set -- $($getaddr ipv4 $tcpport) +source="$1" +dest="$2" -if [[ -z $tcpport ]]; then +if [[ $? -ne 0 ]] || [[ -z $dest ]]; then exit 67 fi diff --git a/test/unittest/tcp/tst.ipv4remotetcpstate.sh b/test/unittest/tcp/tst.ipv4remotetcpstate.sh index 74fb4ce3..ea0eeec0 100755 --- a/test/unittest/tcp/tst.ipv4remotetcpstate.sh +++ b/test/unittest/tcp/tst.ipv4remotetcpstate.sh @@ -2,7 +2,7 @@ # # Oracle Linux DTrace. -# Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at # http://oss.oracle.com/licenses/upl. @@ -17,8 +17,7 @@ # # 1. A change to the ip stack breaking expected probe behavior, # which is the reason we are testing. -# 2. The remote ssh service is not online. -# 3. An unlikely race causes the unlocked global send/receive +# 2. An unlikely race causes the unlocked global send/receive # variables to be corrupted. # # This test performs a TCP connection to the ssh service (port 22) and @@ -40,29 +39,21 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/../ip/get.ipv4remote.pl +getaddr=$testdir/../../utils/get_remote.sh client=$testdir/../ip/client.ip.pl -tcpports="22 80" -tcpport="" -dest="" +tcpport="22" if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -for port in $tcpports ; do - res=`$getaddr $port 2>/dev/null` - if (( $? == 0 )); then - read s d <<< $res - tcpport=$port - source=$s - dest=$d - break - fi -done - -if [ -z $tcpport ]; then - exit 67 + +set -- $($getaddr ipv4 $tcpport) +source="$1" +dest="$2" + +if [[ $? -ne 0 ]] || [[ -z $dest ]]; then + exit 67 fi diff --git a/test/unittest/udp/tst.ipv4remoteudp.sh b/test/unittest/udp/tst.ipv4remoteudp.sh index 1c5f2a9a..89309cfc 100755 --- a/test/unittest/udp/tst.ipv4remoteudp.sh +++ b/test/unittest/udp/tst.ipv4remoteudp.sh @@ -2,7 +2,7 @@ # # Oracle Linux DTrace. -# Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at # http://oss.oracle.com/licenses/upl. @@ -34,14 +34,14 @@ fi dtrace=$1 testdir="$(dirname $_test)" -getaddr=$testdir/../ip/get.ipv4remote.pl +getaddr=$testdir/../../utils/get_remote.sh port=31337 if [[ ! -x $getaddr ]]; then echo "could not find or execute sub program: $getaddr" >&2 exit 3 fi -read source dest <<<`$getaddr 2>/dev/null` +read source dest <<<`$getaddr ipv4 2>/dev/null` if (( $? != 0 )) || [[ -z $dest ]]; then exit 67 fi diff --git a/test/utils/get_remote.sh b/test/utils/get_remote.sh new file mode 100755 index 00000000..d8a4d450 --- /dev/null +++ b/test/utils/get_remote.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# +# Oracle Linux DTrace. +# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at +# http://oss.oracle.com/licenses/upl. +# + +# +# get_remote.sh ipv4|ipv6|cleanup [tcpport] +# +# Create (or cleanup) a network namespace with either IPv4 or IPv6 +# address associated. +# +# Print the local address and the remote address, or an +# error message if a failure occurred during setup. +# +# If tcpport is specified, start sshd on that port. +# +# Exit status is 0 if all succceeded. +# + +cmd=$1 +tcpport=$2 + +prefix=$(basename $tmpdir) +netns=${prefix}ns +veth1=${prefix}v1 +veth2=${prefix}v2 +mtu=1500 + +set -e + +case $cmd in +cleanup) pids=$(ip netns pids ${netns} 2>/dev/null) + if [[ -n "$pids" ]]; then + kill -TERM $pids + fi + ip netns del ${netns} 2>/dev/null + exit 0 + ;; + ipv4) veth1_addr=192.168.168.1 + veth2_addr=192.168.168.2 + prefixlen=24 + family= + ;; + ipv6) veth1_addr=fd::1 + veth2_addr=fd::2 + prefixlen=64 + family=-6 + ;; + *) echo "Unexpected cmd $cmd" >2 + exit 1 + ;; +esac + +ip netns add $netns +ip link add dev $veth1 mtu $mtu netns $netns type veth \ + peer name $veth2 mtu $mtu +ip netns exec $netns ip $family addr add ${veth1_addr}/$prefixlen dev $veth1 +ip netns exec $netns ip link set $veth1 up +ip addr add ${veth2_addr}/${prefixlen} dev $veth2 +ip link set $veth2 up + +if [[ -n "$tcpport" ]]; then + sshd=$(which sshd) + ip netns exec $netns $sshd -p $tcpport & +fi + +echo "$veth2_addr $veth1_addr" +exit 0 -- 2.39.3