[DTrace-devel] [PATCH 2/5] Add -xcpu support to dtrace provider

eugene.loh at oracle.com eugene.loh at oracle.com
Tue Sep 5 04:11:39 UTC 2023


From: Eugene Loh <eugene.loh at oracle.com>

Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
 libdtrace/dt_impl.h                    |   1 +
 libdtrace/dt_prov_dtrace.c             |   4 +-
 libdtrace/dt_work.c                    | 169 +++++++++++++++++++++++--
 test/unittest/options/tst.cpu-BEGIN.sh |  20 +++
 test/unittest/options/tst.cpu-END.sh   |  20 +++
 5 files changed, 204 insertions(+), 10 deletions(-)
 create mode 100755 test/unittest/options/tst.cpu-BEGIN.sh
 create mode 100755 test/unittest/options/tst.cpu-END.sh

diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 674905b9..597081d1 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -403,6 +403,7 @@ struct dtrace_hdl {
 	uint_t dt_stopped;	/* boolean:  set once tracing is stopped */
 	processorid_t dt_beganon; /* CPU that executed BEGIN probe (if any) */
 	processorid_t dt_endedon; /* CPU that executed END probe (if any) */
+	pid_t dt_beginendchild;	/* process running BEGIN and END probes */
 	uint_t dt_oflags;	/* dtrace open-time options (see dtrace.h) */
 	uint_t dt_cflags;	/* dtrace compile-time options (see dtrace.h) */
 	uint_t dt_dflags;	/* dtrace link-time options (see dtrace.h) */
diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
index a8ea66d6..a76534f8 100644
--- a/libdtrace/dt_prov_dtrace.c
+++ b/libdtrace/dt_prov_dtrace.c
@@ -208,7 +208,9 @@ static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
 		int	fd, rc = -1;
 
 		/* get a uprobe specification for this probe */
-		spec = uprobe_spec(getpid(), prp->desc->prb);
+		spec = uprobe_spec(dtp->dt_beginendchild ?
+				   dtp->dt_beginendchild : getpid(),
+				   prp->desc->prb);
 		if (spec == NULL)
 			return -ENOENT;
 
diff --git a/libdtrace/dt_work.c b/libdtrace/dt_work.c
index 1bb6104c..02cdfa47 100644
--- a/libdtrace/dt_work.c
+++ b/libdtrace/dt_work.c
@@ -19,15 +19,16 @@
 #include <port.h>
 #include <linux/perf_event.h>
 #include <sys/epoll.h>
+#include <sys/prctl.h>
 #include <valgrind/valgrind.h>
 
 void
-BEGIN_probe(void)
+BEGIN_probe(int signum, siginfo_t *si, void *uc)
 {
 }
 
 void
-END_probe(void)
+END_probe(int signum, siginfo_t *si, void *uc)
 {
 }
 
@@ -131,6 +132,90 @@ dtrace_status(dtrace_hdl_t *dtp)
 	return DTRACE_STATUS_OKAY;
 }
 
+static
+void mychild_bind_to_cpu(size_t cpu) {
+	int ncpus;
+	cpu_set_t *mask;
+	size_t size;
+
+	/*
+	 * Grow the mask as needed.
+	 *
+	 * Other ways of determining the number of CPUs available on the system:
+	 * - inspecting the contents of /proc/cpuinfo
+	 * - using sysconf(3) to obtain _SC_NPROCESSORS_CONF and _SC_NPROCESSORS_ONLN
+	 * - inspecting the list of CPU directories under /sys/devices/system/cpu/
+	 */
+	ncpus = 1024;
+	while ((mask = CPU_ALLOC(ncpus)) != NULL &&
+	       sched_getaffinity(0, CPU_ALLOC_SIZE(ncpus), mask) != 0 &&
+	       errno == EINVAL) {
+		CPU_FREE(mask);
+		errno = 0;
+		ncpus *= 2;
+	}
+	if (mask == NULL || (errno != 0 && errno != EINVAL)) {
+		/* FIXME: some other failure mode? */
+		exit(1);
+	}
+
+	/* Set the CPU mask. */
+	size = CPU_ALLOC_SIZE(ncpus);
+	CPU_ZERO_S(size, mask);
+	CPU_SET_S(cpu, size, mask);
+
+	/* Set my affinity. */
+	if (sched_setaffinity(0, size, mask) != 0) {
+		/* FIXME: some other failure mode? */
+		exit(1);
+	}
+
+	/* Free the mask. */
+	CPU_FREE(mask);
+}
+
+static void
+mychild_sethandlers() {
+        struct sigaction act;
+
+        memset(&act, 0, sizeof(struct sigaction));
+        act.sa_flags = SA_SIGINFO;
+
+        act.sa_sigaction = &BEGIN_probe;
+        if (sigaction(SIGUSR1, &act, NULL))
+                printf("ERROR cannot set handler\n");
+
+        act.sa_sigaction = &END_probe;
+        if (sigaction(SIGUSR2, &act, NULL))
+                printf("ERROR cannot set handler\n");
+}
+
+static void
+mychild(int fd) {
+	int dummy = 1234;
+
+	mychild_sethandlers();
+
+	/* Wait for USR1 and BEGIN_probe(). */
+	pause();
+
+	/* Notify parent that BEGIN_probe() triggered. */
+	write(fd, &dummy, sizeof(dummy));
+
+	/* Wait for USR2 and END_probe(). */
+	pause();
+
+	exit(0);
+}
+
+static unsigned long long
+elapsed_msecs() {
+	struct timespec tstruct;
+
+	clock_gettime(CLOCK_MONOTONIC, &tstruct);
+	return tstruct.tv_sec * 1000ull + tstruct.tv_nsec / 1000000;
+}
+
 int
 dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
 {
@@ -138,10 +223,42 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
 	struct epoll_event	ev;
 	dtrace_optval_t		lockmem = dtp->dt_options[DTRACEOPT_LOCKMEM];
 	struct rlimit		rl;
+	int			begin_end_child_fds[2];
 
 	if (dtp->dt_active)
 		return dt_set_errno(dtp, EINVAL);
 
+	/*
+	 * Fork a child for the BEGIN and END probes if -xcpu is used.
+	 */
+	if (dtp->dt_options[DTRACEOPT_CPU] != DTRACEOPT_UNSET) {
+		pid_t child;
+
+		pipe(begin_end_child_fds);
+
+		/* FIXME: Do we have to worry about an offline CPU? */
+		child = fork();
+
+		if (child == -1) {
+			/* FIXME: emit an error message? */
+			return -1;
+		}
+		if (child == 0) {
+			/* Make sure we die when the parent does. */
+			if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) != 0) {
+				/* FIXME: emit an error message? */
+				exit(1);
+			}
+
+			mychild_bind_to_cpu(dtp->dt_options[DTRACEOPT_CPU]);
+			mychild(begin_end_child_fds[1]);
+			/* Do not return. */
+		}
+		dtp->dt_beginendchild = child;
+	} else {
+		dtp->dt_beginendchild = 0;
+	}
+
 	/*
 	 * Set the locked-memory limit if so directed by the user.
 	 */
@@ -200,10 +317,42 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
 	if (dt_aggregate_go(dtp) == -1)
 		return -1;
 
-	if (RUNNING_ON_VALGRIND)
-		VALGRIND_NON_SIMD_CALL0(BEGIN_probe);
-	else
-		BEGIN_probe();
+	if (dtp->dt_beginendchild) {
+		/*
+		 * We forked a child to run on a specific CPU for
+		 * BEGIN and END probes.
+		 */
+		int flags, dummy;
+		unsigned long long timeout;
+
+		/* Make the read end of the pipe nonblocking. */
+		flags = fcntl(begin_end_child_fds[0], F_GETFL, 0);
+		if (flags == -1) {
+			/* FIXME: better diagnosibility? */
+			printf("ERROR: could not get flags\n");
+			return -1;
+		}
+		flags |= O_NONBLOCK;
+		if (fcntl(begin_end_child_fds[0], F_SETFL, flags) == -1) {
+			/* FIXME: better diagnosibility? */
+			printf("ERROR: could not set flags\n");
+			return -1;
+		}
+
+		/* Signal the child to BEGIN. */
+		kill(dtp->dt_beginendchild, SIGUSR1);
+
+		/* Wait for the child to ack. */
+		timeout = elapsed_msecs() + 2000;
+		while (read(begin_end_child_fds[0], &dummy, 4) <= 0) {
+			usleep(100000);
+			if (elapsed_msecs() > timeout)
+				return -1;
+		}
+	} else if (RUNNING_ON_VALGRIND) {
+		VALGRIND_NON_SIMD_CALL3(BEGIN_probe, 0, NULL, NULL);
+	} else
+		BEGIN_probe(0, NULL, NULL);
 
 	dtp->dt_active = 1;
 	dtp->dt_beganon = dt_state_get_beganon(dtp);
@@ -233,10 +382,12 @@ dtrace_stop(dtrace_hdl_t *dtp)
 	if (dt_state_get_activity(dtp) < DT_ACTIVITY_DRAINING)
 		dt_state_set_activity(dtp, DT_ACTIVITY_DRAINING);
 
-	if (RUNNING_ON_VALGRIND)
-		VALGRIND_NON_SIMD_CALL0(END_probe);
+	if (dtp->dt_beginendchild)
+		kill(dtp->dt_beginendchild, SIGUSR2);
+	else if (RUNNING_ON_VALGRIND)
+		VALGRIND_NON_SIMD_CALL3(END_probe, 0, NULL, NULL);
 	else
-		END_probe();
+		END_probe(0, NULL, NULL);
 
 	dtp->dt_stopped = 1;
 	dtp->dt_endedon = dt_state_get_endedon(dtp);
diff --git a/test/unittest/options/tst.cpu-BEGIN.sh b/test/unittest/options/tst.cpu-BEGIN.sh
new file mode 100755
index 00000000..0bacfea0
--- /dev/null
+++ b/test/unittest/options/tst.cpu-BEGIN.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+nerr=0
+for cpu0 in `awk '/^processor[ 	]*: [0-9]*$/ {print $3}' /proc/cpuinfo`; do
+	cpu=`$dtrace $dt_flags -xcpu=$cpu0 -qn 'BEGIN { trace(cpu); exit(0); }'`
+	echo expected cpu $cpu0 got cpu $cpu
+	if [ $cpu != $cpu0 ]; then
+		nerr=$(($nerr + 1))
+	fi
+done
+
+exit $nerr
diff --git a/test/unittest/options/tst.cpu-END.sh b/test/unittest/options/tst.cpu-END.sh
new file mode 100755
index 00000000..32080d94
--- /dev/null
+++ b/test/unittest/options/tst.cpu-END.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+nerr=0
+for cpu0 in `awk '/^processor[ 	]*: [0-9]*$/ {print $3}' /proc/cpuinfo`; do
+	cpu=`$dtrace $dt_flags -xcpu=$cpu0 -qn 'BEGIN { exit(0) } END { trace(cpu); }'`
+	echo expected cpu $cpu0 got cpu $cpu
+	if [ $cpu != $cpu0 ]; then
+		nerr=$(($nerr + 1))
+	fi
+done
+
+exit $nerr
-- 
2.18.4




More information about the DTrace-devel mailing list