[DTrace-devel] [PATCH] Copy fprobes entry args with BPF helper function

eugene.loh at oracle.com eugene.loh at oracle.com
Wed Mar 19 06:32:28 UTC 2025


From: Eugene Loh <eugene.loh at oracle.com>

With commit a6b626a89 ("Fix fprobe/kprobe selection"), fprobes were
effectively turned on.  Unfortunately, with this fix, some tests like
test/unittest/stack/tst.stack_fbt.sh encountered problems on UEK7
since the BPF verifier would complain about the prototypes of some of
the probe arguments.  E.g., when loading arg3 in fprobe_trampoline()
from fbt::vfs_write:entry from %r8+24, the BPF verifier complains

        func 'vfs_write' arg3 type INT is not a struct
        invalid bpf_context access off=24 size=8

We can bypass this problem by using a BPF helper function to copy the
arguments onto the BPF stack and then load the arguments into mstate
from there.

There is also a BPF get_func_arg() helper function, but it is not
introduced until 5.17 -- that is, it appears after UEK7.  See Linux
commit f92c1e1 ("bpf: Add get_func_[arg|ret|arg_cnt] helpers").

While the already mentioned test signals the problem and the fix, we
also add an additional test that actually checks the correctness of
the arguments.

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_prov_fbt.c                     |  14 ++-
 test/unittest/fbtprovider/tst.entryargs2.r  |  29 ++++++
 test/unittest/fbtprovider/tst.entryargs2.sh | 105 ++++++++++++++++++++
 3 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 test/unittest/fbtprovider/tst.entryargs2.r
 create mode 100755 test/unittest/fbtprovider/tst.entryargs2.sh

diff --git a/libdtrace/dt_prov_fbt.c b/libdtrace/dt_prov_fbt.c
index 8aa53d643..50fa0d9dc 100644
--- a/libdtrace/dt_prov_fbt.c
+++ b/libdtrace/dt_prov_fbt.c
@@ -285,8 +285,20 @@ static int fprobe_trampoline(dt_pcb_t *pcb, uint_t exitlbl)
 	if (strcmp(pcb->pcb_probe->desc->prb, "entry") == 0) {
 		int	i;
 
+		/*
+		 * We want to copy entry args from %r8 to %r7 (plus offsets).
+		 * Unfortunately, for fprobes, the BPF verifier can reject
+		 * certain argument types.  We work around this by copying
+		 * the arguments onto the BPF stack and loading them from there.
+		 */
+		emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
+		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_TRAMP_SP_SLOT(prp->argc - 1)));
+		emit(dlp, BPF_MOV_IMM(BPF_REG_2, 8 * prp->argc));
+		emit(dlp, BPF_MOV_REG(BPF_REG_3, BPF_REG_8));
+		emit(dlp, BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel]));
+
 		for (i = 0; i < prp->argc; i++) {
-			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_8, i * 8));
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DT_TRAMP_SP_SLOT(prp->argc - 1) + i * 8));
 			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(i), BPF_REG_0));
 		}
 	} else {
diff --git a/test/unittest/fbtprovider/tst.entryargs2.r b/test/unittest/fbtprovider/tst.entryargs2.r
new file mode 100644
index 000000000..efc4685f9
--- /dev/null
+++ b/test/unittest/fbtprovider/tst.entryargs2.r
@@ -0,0 +1,29 @@
+mode READ           : no
+mode WRITE          : yes
+mode LSEEK          : yes
+mode PREAD          : yes
+mode PWRITE         : yes
+mode WRITER         : yes
+mode CAN_READ       : no
+mode CAN_WRITE      : yes
+mode OPENED         : yes
+buf: =========================
+count: 8
+pos: 20
+abcdefghijklmnopqrst========CDEFGHIJKLMNOPQRSTUVWXYZ0123456789
+
+mode READ           : yes
+mode WRITE          : yes
+mode LSEEK          : yes
+mode PREAD          : yes
+mode PWRITE         : yes
+mode WRITER         : yes
+mode CAN_READ       : yes
+mode CAN_WRITE      : yes
+mode OPENED         : yes
+buf: =========================
+count: 8
+pos: 20
+abcdefghijklmnopqrst========CDEFGHIJKLMNOPQRSTUVWXYZ0123456789
+
+success
diff --git a/test/unittest/fbtprovider/tst.entryargs2.sh b/test/unittest/fbtprovider/tst.entryargs2.sh
new file mode 100755
index 000000000..f5b435f56
--- /dev/null
+++ b/test/unittest/fbtprovider/tst.entryargs2.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+# Another test of entry args.
+#
+
+dtrace=$1
+CC=${CC:-/usr/bin/gcc}
+
+# Set up test directory.
+
+DIRNAME=$tmpdir/entryargs2.$$.$RANDOM
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+# Make the trigger.
+
+cat << EOF > main.c
+#include <stdio.h>
+#include <fcntl.h>  // open()
+#include <unistd.h> // lseek(), write(), close()
+
+int main(int c, char **v) {
+	int fd = open("tmp.txt", c == 1 ? O_WRONLY : O_RDWR);
+
+	if (fd == -1)
+		return 1;
+
+	/* Move the offset, then write to the file. */
+	/* (We will overwrite some "middle section" of the file with "========".) */
+	lseek(fd, 20, SEEK_SET);
+	write(fd, "=========================", 8);
+	close(fd);
+
+	return 0;
+}
+EOF
+
+# Build the trigger.           FIXME do consistent with Sam's changes.
+
+$CC $test_cppflags $test_ldflags main.c
+if [ $? -ne 0 ]; then
+	echo "failed to link final executable" >&2
+	exit 1
+fi
+
+# Prepare the D script.
+
+cat << EOF > D.d
+/* these definitions come from kernel header include/linux/fs.h */
+#define FMODE_READ	(1 << 0)
+#define FMODE_WRITE	(1 << 1)
+#define FMODE_LSEEK	(1 << 2)
+#define FMODE_PREAD	(1 << 3)
+#define FMODE_PWRITE	(1 << 4)
+
+#define FMODE_WRITER	(1 << 16)
+#define FMODE_CAN_READ	(1 << 17)
+#define FMODE_CAN_WRITE	(1 << 18)
+#define FMODE_OPENED	(1 << 19)
+
+fbt::vfs_write:entry
+/pid == \$target/
+{
+	mode = ((struct file *)arg0)->f_mode;
+	printf("mode READ           : %s\n", mode & FMODE_READ      ? "yes" : "no");
+	printf("mode WRITE          : %s\n", mode & FMODE_WRITE     ? "yes" : "no");
+	printf("mode LSEEK          : %s\n", mode & FMODE_LSEEK     ? "yes" : "no");
+	printf("mode PREAD          : %s\n", mode & FMODE_PREAD     ? "yes" : "no");
+	printf("mode PWRITE         : %s\n", mode & FMODE_PWRITE    ? "yes" : "no");
+	printf("mode WRITER         : %s\n", mode & FMODE_WRITER    ? "yes" : "no");
+	printf("mode CAN_READ       : %s\n", mode & FMODE_CAN_READ  ? "yes" : "no");
+	printf("mode CAN_WRITE      : %s\n", mode & FMODE_CAN_WRITE ? "yes" : "no");
+	printf("mode OPENED         : %s\n", mode & FMODE_OPENED    ? "yes" : "no");
+
+	printf("buf: %s\n", stringof(copyinstr(arg1)));
+	printf("count: %d\n", arg2);
+	printf("pos: %d", *((loff_t *)arg3));
+	exit(0);
+}
+EOF
+
+# Run the D script and trigger twice, once with O_WRONLY and then O_RDWR.
+
+for args in "" "dummy"; do
+
+	# Prepare the file to be (over)written.
+	rm -f tmp.txt
+	echo abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 > tmp.txt
+
+	# Run the D script and trigger.
+	$dtrace $dt_flags -c "./a.out $args" -Cqs D.d
+
+        # Report the output file.
+	cat tmp.txt
+	echo
+
+done
+
+echo success
+exit 0
-- 
2.43.5




More information about the DTrace-devel mailing list