[DTrace-devel] [PATCH 2/5] Add -xcpu support to dtrace provider
Kris Van Hees
kris.van.hees at oracle.com
Wed Dec 20 16:42:14 UTC 2023
Preliminary comment...
On Tue, Sep 05, 2023 at 12:11:39AM -0400, eugene.loh at oracle.com wrote:
>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
> libdtrace/dt_impl.h | 1 +
> libdtrace/dt_prov_dtrace.c | 4 +-
> libdtrace/dt_work.c | 169 +++++++++++++++++++++++--
> test/unittest/options/tst.cpu-BEGIN.sh | 20 +++
> test/unittest/options/tst.cpu-END.sh | 20 +++
> 5 files changed, 204 insertions(+), 10 deletions(-)
> create mode 100755 test/unittest/options/tst.cpu-BEGIN.sh
> create mode 100755 test/unittest/options/tst.cpu-END.sh
>
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index 674905b9..597081d1 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -403,6 +403,7 @@ struct dtrace_hdl {
> uint_t dt_stopped; /* boolean: set once tracing is stopped */
> processorid_t dt_beganon; /* CPU that executed BEGIN probe (if any) */
> processorid_t dt_endedon; /* CPU that executed END probe (if any) */
> + pid_t dt_beginendchild; /* process running BEGIN and END probes */
> uint_t dt_oflags; /* dtrace open-time options (see dtrace.h) */
> uint_t dt_cflags; /* dtrace compile-time options (see dtrace.h) */
> uint_t dt_dflags; /* dtrace link-time options (see dtrace.h) */
> diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
> index a8ea66d6..a76534f8 100644
> --- a/libdtrace/dt_prov_dtrace.c
> +++ b/libdtrace/dt_prov_dtrace.c
> @@ -208,7 +208,9 @@ static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
> int fd, rc = -1;
>
> /* get a uprobe specification for this probe */
> - spec = uprobe_spec(getpid(), prp->desc->prb);
> + spec = uprobe_spec(dtp->dt_beginendchild ?
> + dtp->dt_beginendchild : getpid(),
> + prp->desc->prb);
> if (spec == NULL)
> return -ENOENT;
>
> diff --git a/libdtrace/dt_work.c b/libdtrace/dt_work.c
> index 1bb6104c..02cdfa47 100644
> --- a/libdtrace/dt_work.c
> +++ b/libdtrace/dt_work.c
> @@ -19,15 +19,16 @@
> #include <port.h>
> #include <linux/perf_event.h>
> #include <sys/epoll.h>
> +#include <sys/prctl.h>
> #include <valgrind/valgrind.h>
>
> void
> -BEGIN_probe(void)
> +BEGIN_probe(int signum, siginfo_t *si, void *uc)
> {
> }
>
> void
> -END_probe(void)
> +END_probe(int signum, siginfo_t *si, void *uc)
> {
> }
>
> @@ -131,6 +132,90 @@ dtrace_status(dtrace_hdl_t *dtp)
> return DTRACE_STATUS_OKAY;
> }
>
> +static
> +void mychild_bind_to_cpu(size_t cpu) {
> + int ncpus;
> + cpu_set_t *mask;
> + size_t size;
> +
> + /*
> + * Grow the mask as needed.
> + *
> + * Other ways of determining the number of CPUs available on the system:
> + * - inspecting the contents of /proc/cpuinfo
> + * - using sysconf(3) to obtain _SC_NPROCESSORS_CONF and _SC_NPROCESSORS_ONLN
> + * - inspecting the list of CPU directories under /sys/devices/system/cpu/
> + */
Can't this use the dt_conf info about cpuinfo rather than having more code
that seems todo the same (determining how masny CPUs we have - and which)?
> + ncpus = 1024;
> + while ((mask = CPU_ALLOC(ncpus)) != NULL &&
> + sched_getaffinity(0, CPU_ALLOC_SIZE(ncpus), mask) != 0 &&
> + errno == EINVAL) {
> + CPU_FREE(mask);
> + errno = 0;
> + ncpus *= 2;
> + }
> + if (mask == NULL || (errno != 0 && errno != EINVAL)) {
> + /* FIXME: some other failure mode? */
> + exit(1);
> + }
> +
> + /* Set the CPU mask. */
> + size = CPU_ALLOC_SIZE(ncpus);
> + CPU_ZERO_S(size, mask);
> + CPU_SET_S(cpu, size, mask);
> +
> + /* Set my affinity. */
> + if (sched_setaffinity(0, size, mask) != 0) {
> + /* FIXME: some other failure mode? */
> + exit(1);
> + }
> +
> + /* Free the mask. */
> + CPU_FREE(mask);
> +}
> +
> +static void
> +mychild_sethandlers() {
> + struct sigaction act;
> +
> + memset(&act, 0, sizeof(struct sigaction));
> + act.sa_flags = SA_SIGINFO;
> +
> + act.sa_sigaction = &BEGIN_probe;
> + if (sigaction(SIGUSR1, &act, NULL))
> + printf("ERROR cannot set handler\n");
> +
> + act.sa_sigaction = &END_probe;
> + if (sigaction(SIGUSR2, &act, NULL))
> + printf("ERROR cannot set handler\n");
> +}
> +
> +static void
> +mychild(int fd) {
> + int dummy = 1234;
> +
> + mychild_sethandlers();
> +
> + /* Wait for USR1 and BEGIN_probe(). */
> + pause();
> +
> + /* Notify parent that BEGIN_probe() triggered. */
> + write(fd, &dummy, sizeof(dummy));
> +
> + /* Wait for USR2 and END_probe(). */
> + pause();
> +
> + exit(0);
> +}
> +
> +static unsigned long long
> +elapsed_msecs() {
> + struct timespec tstruct;
> +
> + clock_gettime(CLOCK_MONOTONIC, &tstruct);
> + return tstruct.tv_sec * 1000ull + tstruct.tv_nsec / 1000000;
> +}
> +
> int
> dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
> {
> @@ -138,10 +223,42 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
> struct epoll_event ev;
> dtrace_optval_t lockmem = dtp->dt_options[DTRACEOPT_LOCKMEM];
> struct rlimit rl;
> + int begin_end_child_fds[2];
>
> if (dtp->dt_active)
> return dt_set_errno(dtp, EINVAL);
>
> + /*
> + * Fork a child for the BEGIN and END probes if -xcpu is used.
> + */
> + if (dtp->dt_options[DTRACEOPT_CPU] != DTRACEOPT_UNSET) {
> + pid_t child;
> +
> + pipe(begin_end_child_fds);
> +
> + /* FIXME: Do we have to worry about an offline CPU? */
> + child = fork();
> +
> + if (child == -1) {
> + /* FIXME: emit an error message? */
> + return -1;
> + }
> + if (child == 0) {
> + /* Make sure we die when the parent does. */
> + if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) != 0) {
> + /* FIXME: emit an error message? */
> + exit(1);
> + }
> +
> + mychild_bind_to_cpu(dtp->dt_options[DTRACEOPT_CPU]);
> + mychild(begin_end_child_fds[1]);
> + /* Do not return. */
> + }
> + dtp->dt_beginendchild = child;
> + } else {
> + dtp->dt_beginendchild = 0;
> + }
> +
> /*
> * Set the locked-memory limit if so directed by the user.
> */
> @@ -200,10 +317,42 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
> if (dt_aggregate_go(dtp) == -1)
> return -1;
>
> - if (RUNNING_ON_VALGRIND)
> - VALGRIND_NON_SIMD_CALL0(BEGIN_probe);
> - else
> - BEGIN_probe();
> + if (dtp->dt_beginendchild) {
> + /*
> + * We forked a child to run on a specific CPU for
> + * BEGIN and END probes.
> + */
> + int flags, dummy;
> + unsigned long long timeout;
> +
> + /* Make the read end of the pipe nonblocking. */
> + flags = fcntl(begin_end_child_fds[0], F_GETFL, 0);
> + if (flags == -1) {
> + /* FIXME: better diagnosibility? */
> + printf("ERROR: could not get flags\n");
> + return -1;
> + }
> + flags |= O_NONBLOCK;
> + if (fcntl(begin_end_child_fds[0], F_SETFL, flags) == -1) {
> + /* FIXME: better diagnosibility? */
> + printf("ERROR: could not set flags\n");
> + return -1;
> + }
> +
> + /* Signal the child to BEGIN. */
> + kill(dtp->dt_beginendchild, SIGUSR1);
> +
> + /* Wait for the child to ack. */
> + timeout = elapsed_msecs() + 2000;
> + while (read(begin_end_child_fds[0], &dummy, 4) <= 0) {
> + usleep(100000);
> + if (elapsed_msecs() > timeout)
> + return -1;
> + }
> + } else if (RUNNING_ON_VALGRIND) {
> + VALGRIND_NON_SIMD_CALL3(BEGIN_probe, 0, NULL, NULL);
> + } else
> + BEGIN_probe(0, NULL, NULL);
>
> dtp->dt_active = 1;
> dtp->dt_beganon = dt_state_get_beganon(dtp);
> @@ -233,10 +382,12 @@ dtrace_stop(dtrace_hdl_t *dtp)
> if (dt_state_get_activity(dtp) < DT_ACTIVITY_DRAINING)
> dt_state_set_activity(dtp, DT_ACTIVITY_DRAINING);
>
> - if (RUNNING_ON_VALGRIND)
> - VALGRIND_NON_SIMD_CALL0(END_probe);
> + if (dtp->dt_beginendchild)
> + kill(dtp->dt_beginendchild, SIGUSR2);
> + else if (RUNNING_ON_VALGRIND)
> + VALGRIND_NON_SIMD_CALL3(END_probe, 0, NULL, NULL);
> else
> - END_probe();
> + END_probe(0, NULL, NULL);
>
> dtp->dt_stopped = 1;
> dtp->dt_endedon = dt_state_get_endedon(dtp);
> diff --git a/test/unittest/options/tst.cpu-BEGIN.sh b/test/unittest/options/tst.cpu-BEGIN.sh
> new file mode 100755
> index 00000000..0bacfea0
> --- /dev/null
> +++ b/test/unittest/options/tst.cpu-BEGIN.sh
> @@ -0,0 +1,20 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +
> +dtrace=$1
> +
> +nerr=0
> +for cpu0 in `awk '/^processor[ ]*: [0-9]*$/ {print $3}' /proc/cpuinfo`; do
> + cpu=`$dtrace $dt_flags -xcpu=$cpu0 -qn 'BEGIN { trace(cpu); exit(0); }'`
> + echo expected cpu $cpu0 got cpu $cpu
> + if [ $cpu != $cpu0 ]; then
> + nerr=$(($nerr + 1))
> + fi
> +done
> +
> +exit $nerr
> diff --git a/test/unittest/options/tst.cpu-END.sh b/test/unittest/options/tst.cpu-END.sh
> new file mode 100755
> index 00000000..32080d94
> --- /dev/null
> +++ b/test/unittest/options/tst.cpu-END.sh
> @@ -0,0 +1,20 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +
> +dtrace=$1
> +
> +nerr=0
> +for cpu0 in `awk '/^processor[ ]*: [0-9]*$/ {print $3}' /proc/cpuinfo`; do
> + cpu=`$dtrace $dt_flags -xcpu=$cpu0 -qn 'BEGIN { exit(0) } END { trace(cpu); }'`
> + echo expected cpu $cpu0 got cpu $cpu
> + if [ $cpu != $cpu0 ]; then
> + nerr=$(($nerr + 1))
> + fi
> +done
> +
> +exit $nerr
> --
> 2.18.4
>
>
More information about the DTrace-devel
mailing list