[DTrace-devel] [PATCH 2/5] Add -xcpu support to dtrace provider
eugene.loh at oracle.com
eugene.loh at oracle.com
Tue Sep 5 04:11:39 UTC 2023
From: Eugene Loh <eugene.loh at oracle.com>
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
libdtrace/dt_impl.h | 1 +
libdtrace/dt_prov_dtrace.c | 4 +-
libdtrace/dt_work.c | 169 +++++++++++++++++++++++--
test/unittest/options/tst.cpu-BEGIN.sh | 20 +++
test/unittest/options/tst.cpu-END.sh | 20 +++
5 files changed, 204 insertions(+), 10 deletions(-)
create mode 100755 test/unittest/options/tst.cpu-BEGIN.sh
create mode 100755 test/unittest/options/tst.cpu-END.sh
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 674905b9..597081d1 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -403,6 +403,7 @@ struct dtrace_hdl {
uint_t dt_stopped; /* boolean: set once tracing is stopped */
processorid_t dt_beganon; /* CPU that executed BEGIN probe (if any) */
processorid_t dt_endedon; /* CPU that executed END probe (if any) */
+ pid_t dt_beginendchild; /* process running BEGIN and END probes */
uint_t dt_oflags; /* dtrace open-time options (see dtrace.h) */
uint_t dt_cflags; /* dtrace compile-time options (see dtrace.h) */
uint_t dt_dflags; /* dtrace link-time options (see dtrace.h) */
diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
index a8ea66d6..a76534f8 100644
--- a/libdtrace/dt_prov_dtrace.c
+++ b/libdtrace/dt_prov_dtrace.c
@@ -208,7 +208,9 @@ static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
int fd, rc = -1;
/* get a uprobe specification for this probe */
- spec = uprobe_spec(getpid(), prp->desc->prb);
+ spec = uprobe_spec(dtp->dt_beginendchild ?
+ dtp->dt_beginendchild : getpid(),
+ prp->desc->prb);
if (spec == NULL)
return -ENOENT;
diff --git a/libdtrace/dt_work.c b/libdtrace/dt_work.c
index 1bb6104c..02cdfa47 100644
--- a/libdtrace/dt_work.c
+++ b/libdtrace/dt_work.c
@@ -19,15 +19,16 @@
#include <port.h>
#include <linux/perf_event.h>
#include <sys/epoll.h>
+#include <sys/prctl.h>
#include <valgrind/valgrind.h>
void
-BEGIN_probe(void)
+BEGIN_probe(int signum, siginfo_t *si, void *uc)
{
}
void
-END_probe(void)
+END_probe(int signum, siginfo_t *si, void *uc)
{
}
@@ -131,6 +132,90 @@ dtrace_status(dtrace_hdl_t *dtp)
return DTRACE_STATUS_OKAY;
}
+static
+void mychild_bind_to_cpu(size_t cpu) {
+ int ncpus;
+ cpu_set_t *mask;
+ size_t size;
+
+ /*
+ * Grow the mask as needed.
+ *
+ * Other ways of determining the number of CPUs available on the system:
+ * - inspecting the contents of /proc/cpuinfo
+ * - using sysconf(3) to obtain _SC_NPROCESSORS_CONF and _SC_NPROCESSORS_ONLN
+ * - inspecting the list of CPU directories under /sys/devices/system/cpu/
+ */
+ ncpus = 1024;
+ while ((mask = CPU_ALLOC(ncpus)) != NULL &&
+ sched_getaffinity(0, CPU_ALLOC_SIZE(ncpus), mask) != 0 &&
+ errno == EINVAL) {
+ CPU_FREE(mask);
+ errno = 0;
+ ncpus *= 2;
+ }
+ if (mask == NULL || (errno != 0 && errno != EINVAL)) {
+ /* FIXME: some other failure mode? */
+ exit(1);
+ }
+
+ /* Set the CPU mask. */
+ size = CPU_ALLOC_SIZE(ncpus);
+ CPU_ZERO_S(size, mask);
+ CPU_SET_S(cpu, size, mask);
+
+ /* Set my affinity. */
+ if (sched_setaffinity(0, size, mask) != 0) {
+ /* FIXME: some other failure mode? */
+ exit(1);
+ }
+
+ /* Free the mask. */
+ CPU_FREE(mask);
+}
+
+static void
+mychild_sethandlers() {
+ struct sigaction act;
+
+ memset(&act, 0, sizeof(struct sigaction));
+ act.sa_flags = SA_SIGINFO;
+
+ act.sa_sigaction = &BEGIN_probe;
+ if (sigaction(SIGUSR1, &act, NULL))
+ printf("ERROR cannot set handler\n");
+
+ act.sa_sigaction = &END_probe;
+ if (sigaction(SIGUSR2, &act, NULL))
+ printf("ERROR cannot set handler\n");
+}
+
+static void
+mychild(int fd) {
+ int dummy = 1234;
+
+ mychild_sethandlers();
+
+ /* Wait for USR1 and BEGIN_probe(). */
+ pause();
+
+ /* Notify parent that BEGIN_probe() triggered. */
+ write(fd, &dummy, sizeof(dummy));
+
+ /* Wait for USR2 and END_probe(). */
+ pause();
+
+ exit(0);
+}
+
+static unsigned long long
+elapsed_msecs() {
+ struct timespec tstruct;
+
+ clock_gettime(CLOCK_MONOTONIC, &tstruct);
+ return tstruct.tv_sec * 1000ull + tstruct.tv_nsec / 1000000;
+}
+
int
dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
{
@@ -138,10 +223,42 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
struct epoll_event ev;
dtrace_optval_t lockmem = dtp->dt_options[DTRACEOPT_LOCKMEM];
struct rlimit rl;
+ int begin_end_child_fds[2];
if (dtp->dt_active)
return dt_set_errno(dtp, EINVAL);
+ /*
+ * Fork a child for the BEGIN and END probes if -xcpu is used.
+ */
+ if (dtp->dt_options[DTRACEOPT_CPU] != DTRACEOPT_UNSET) {
+ pid_t child;
+
+ pipe(begin_end_child_fds);
+
+ /* FIXME: Do we have to worry about an offline CPU? */
+ child = fork();
+
+ if (child == -1) {
+ /* FIXME: emit an error message? */
+ return -1;
+ }
+ if (child == 0) {
+ /* Make sure we die when the parent does. */
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) != 0) {
+ /* FIXME: emit an error message? */
+ exit(1);
+ }
+
+ mychild_bind_to_cpu(dtp->dt_options[DTRACEOPT_CPU]);
+ mychild(begin_end_child_fds[1]);
+ /* Do not return. */
+ }
+ dtp->dt_beginendchild = child;
+ } else {
+ dtp->dt_beginendchild = 0;
+ }
+
/*
* Set the locked-memory limit if so directed by the user.
*/
@@ -200,10 +317,42 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
if (dt_aggregate_go(dtp) == -1)
return -1;
- if (RUNNING_ON_VALGRIND)
- VALGRIND_NON_SIMD_CALL0(BEGIN_probe);
- else
- BEGIN_probe();
+ if (dtp->dt_beginendchild) {
+ /*
+ * We forked a child to run on a specific CPU for
+ * BEGIN and END probes.
+ */
+ int flags, dummy;
+ unsigned long long timeout;
+
+ /* Make the read end of the pipe nonblocking. */
+ flags = fcntl(begin_end_child_fds[0], F_GETFL, 0);
+ if (flags == -1) {
+ /* FIXME: better diagnosibility? */
+ printf("ERROR: could not get flags\n");
+ return -1;
+ }
+ flags |= O_NONBLOCK;
+ if (fcntl(begin_end_child_fds[0], F_SETFL, flags) == -1) {
+ /* FIXME: better diagnosibility? */
+ printf("ERROR: could not set flags\n");
+ return -1;
+ }
+
+ /* Signal the child to BEGIN. */
+ kill(dtp->dt_beginendchild, SIGUSR1);
+
+ /* Wait for the child to ack. */
+ timeout = elapsed_msecs() + 2000;
+ while (read(begin_end_child_fds[0], &dummy, 4) <= 0) {
+ usleep(100000);
+ if (elapsed_msecs() > timeout)
+ return -1;
+ }
+ } else if (RUNNING_ON_VALGRIND) {
+ VALGRIND_NON_SIMD_CALL3(BEGIN_probe, 0, NULL, NULL);
+ } else
+ BEGIN_probe(0, NULL, NULL);
dtp->dt_active = 1;
dtp->dt_beganon = dt_state_get_beganon(dtp);
@@ -233,10 +382,12 @@ dtrace_stop(dtrace_hdl_t *dtp)
if (dt_state_get_activity(dtp) < DT_ACTIVITY_DRAINING)
dt_state_set_activity(dtp, DT_ACTIVITY_DRAINING);
- if (RUNNING_ON_VALGRIND)
- VALGRIND_NON_SIMD_CALL0(END_probe);
+ if (dtp->dt_beginendchild)
+ kill(dtp->dt_beginendchild, SIGUSR2);
+ else if (RUNNING_ON_VALGRIND)
+ VALGRIND_NON_SIMD_CALL3(END_probe, 0, NULL, NULL);
else
- END_probe();
+ END_probe(0, NULL, NULL);
dtp->dt_stopped = 1;
dtp->dt_endedon = dt_state_get_endedon(dtp);
diff --git a/test/unittest/options/tst.cpu-BEGIN.sh b/test/unittest/options/tst.cpu-BEGIN.sh
new file mode 100755
index 00000000..0bacfea0
--- /dev/null
+++ b/test/unittest/options/tst.cpu-BEGIN.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+nerr=0
+for cpu0 in `awk '/^processor[ ]*: [0-9]*$/ {print $3}' /proc/cpuinfo`; do
+ cpu=`$dtrace $dt_flags -xcpu=$cpu0 -qn 'BEGIN { trace(cpu); exit(0); }'`
+ echo expected cpu $cpu0 got cpu $cpu
+ if [ $cpu != $cpu0 ]; then
+ nerr=$(($nerr + 1))
+ fi
+done
+
+exit $nerr
diff --git a/test/unittest/options/tst.cpu-END.sh b/test/unittest/options/tst.cpu-END.sh
new file mode 100755
index 00000000..32080d94
--- /dev/null
+++ b/test/unittest/options/tst.cpu-END.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+nerr=0
+for cpu0 in `awk '/^processor[ ]*: [0-9]*$/ {print $3}' /proc/cpuinfo`; do
+ cpu=`$dtrace $dt_flags -xcpu=$cpu0 -qn 'BEGIN { exit(0) } END { trace(cpu); }'`
+ echo expected cpu $cpu0 got cpu $cpu
+ if [ $cpu != $cpu0 ]; then
+ nerr=$(($nerr + 1))
+ fi
+done
+
+exit $nerr
--
2.18.4
More information about the DTrace-devel
mailing list