[DTrace-devel] [PATCH 2/5] Add -xcpu support to dtrace provider

Kris Van Hees kris.van.hees at oracle.com
Wed Dec 20 16:42:14 UTC 2023


Preliminary comment...

On Tue, Sep 05, 2023 at 12:11:39AM -0400, eugene.loh at oracle.com wrote:
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>  libdtrace/dt_impl.h                    |   1 +
>  libdtrace/dt_prov_dtrace.c             |   4 +-
>  libdtrace/dt_work.c                    | 169 +++++++++++++++++++++++--
>  test/unittest/options/tst.cpu-BEGIN.sh |  20 +++
>  test/unittest/options/tst.cpu-END.sh   |  20 +++
>  5 files changed, 204 insertions(+), 10 deletions(-)
>  create mode 100755 test/unittest/options/tst.cpu-BEGIN.sh
>  create mode 100755 test/unittest/options/tst.cpu-END.sh
> 
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index 674905b9..597081d1 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -403,6 +403,7 @@ struct dtrace_hdl {
>  	uint_t dt_stopped;	/* boolean:  set once tracing is stopped */
>  	processorid_t dt_beganon; /* CPU that executed BEGIN probe (if any) */
>  	processorid_t dt_endedon; /* CPU that executed END probe (if any) */
> +	pid_t dt_beginendchild;	/* process running BEGIN and END probes */
>  	uint_t dt_oflags;	/* dtrace open-time options (see dtrace.h) */
>  	uint_t dt_cflags;	/* dtrace compile-time options (see dtrace.h) */
>  	uint_t dt_dflags;	/* dtrace link-time options (see dtrace.h) */
> diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
> index a8ea66d6..a76534f8 100644
> --- a/libdtrace/dt_prov_dtrace.c
> +++ b/libdtrace/dt_prov_dtrace.c
> @@ -208,7 +208,9 @@ static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
>  		int	fd, rc = -1;
>  
>  		/* get a uprobe specification for this probe */
> -		spec = uprobe_spec(getpid(), prp->desc->prb);
> +		spec = uprobe_spec(dtp->dt_beginendchild ?
> +				   dtp->dt_beginendchild : getpid(),
> +				   prp->desc->prb);
>  		if (spec == NULL)
>  			return -ENOENT;
>  
> diff --git a/libdtrace/dt_work.c b/libdtrace/dt_work.c
> index 1bb6104c..02cdfa47 100644
> --- a/libdtrace/dt_work.c
> +++ b/libdtrace/dt_work.c
> @@ -19,15 +19,16 @@
>  #include <port.h>
>  #include <linux/perf_event.h>
>  #include <sys/epoll.h>
> +#include <sys/prctl.h>
>  #include <valgrind/valgrind.h>
>  
>  void
> -BEGIN_probe(void)
> +BEGIN_probe(int signum, siginfo_t *si, void *uc)
>  {
>  }
>  
>  void
> -END_probe(void)
> +END_probe(int signum, siginfo_t *si, void *uc)
>  {
>  }
>  
> @@ -131,6 +132,90 @@ dtrace_status(dtrace_hdl_t *dtp)
>  	return DTRACE_STATUS_OKAY;
>  }
>  
> +static
> +void mychild_bind_to_cpu(size_t cpu) {
> +	int ncpus;
> +	cpu_set_t *mask;
> +	size_t size;
> +
> +	/*
> +	 * Grow the mask as needed.
> +	 *
> +	 * Other ways of determining the number of CPUs available on the system:
> +	 * - inspecting the contents of /proc/cpuinfo
> +	 * - using sysconf(3) to obtain _SC_NPROCESSORS_CONF and _SC_NPROCESSORS_ONLN
> +	 * - inspecting the list of CPU directories under /sys/devices/system/cpu/
> +	 */

Can't this use the dt_conf info about cpuinfo rather than having more code
that seems todo the same (determining how masny CPUs we have - and which)?

> +	ncpus = 1024;
> +	while ((mask = CPU_ALLOC(ncpus)) != NULL &&
> +	       sched_getaffinity(0, CPU_ALLOC_SIZE(ncpus), mask) != 0 &&
> +	       errno == EINVAL) {
> +		CPU_FREE(mask);
> +		errno = 0;
> +		ncpus *= 2;
> +	}
> +	if (mask == NULL || (errno != 0 && errno != EINVAL)) {
> +		/* FIXME: some other failure mode? */
> +		exit(1);
> +	}
> +
> +	/* Set the CPU mask. */
> +	size = CPU_ALLOC_SIZE(ncpus);
> +	CPU_ZERO_S(size, mask);
> +	CPU_SET_S(cpu, size, mask);
> +
> +	/* Set my affinity. */
> +	if (sched_setaffinity(0, size, mask) != 0) {
> +		/* FIXME: some other failure mode? */
> +		exit(1);
> +	}
> +
> +	/* Free the mask. */
> +	CPU_FREE(mask);
> +}
> +
> +static void
> +mychild_sethandlers() {
> +        struct sigaction act;
> +
> +        memset(&act, 0, sizeof(struct sigaction));
> +        act.sa_flags = SA_SIGINFO;
> +
> +        act.sa_sigaction = &BEGIN_probe;
> +        if (sigaction(SIGUSR1, &act, NULL))
> +                printf("ERROR cannot set handler\n");
> +
> +        act.sa_sigaction = &END_probe;
> +        if (sigaction(SIGUSR2, &act, NULL))
> +                printf("ERROR cannot set handler\n");
> +}
> +
> +static void
> +mychild(int fd) {
> +	int dummy = 1234;
> +
> +	mychild_sethandlers();
> +
> +	/* Wait for USR1 and BEGIN_probe(). */
> +	pause();
> +
> +	/* Notify parent that BEGIN_probe() triggered. */
> +	write(fd, &dummy, sizeof(dummy));
> +
> +	/* Wait for USR2 and END_probe(). */
> +	pause();
> +
> +	exit(0);
> +}
> +
> +static unsigned long long
> +elapsed_msecs() {
> +	struct timespec tstruct;
> +
> +	clock_gettime(CLOCK_MONOTONIC, &tstruct);
> +	return tstruct.tv_sec * 1000ull + tstruct.tv_nsec / 1000000;
> +}
> +
>  int
>  dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
>  {
> @@ -138,10 +223,42 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
>  	struct epoll_event	ev;
>  	dtrace_optval_t		lockmem = dtp->dt_options[DTRACEOPT_LOCKMEM];
>  	struct rlimit		rl;
> +	int			begin_end_child_fds[2];
>  
>  	if (dtp->dt_active)
>  		return dt_set_errno(dtp, EINVAL);
>  
> +	/*
> +	 * Fork a child for the BEGIN and END probes if -xcpu is used.
> +	 */
> +	if (dtp->dt_options[DTRACEOPT_CPU] != DTRACEOPT_UNSET) {
> +		pid_t child;
> +
> +		pipe(begin_end_child_fds);
> +
> +		/* FIXME: Do we have to worry about an offline CPU? */
> +		child = fork();
> +
> +		if (child == -1) {
> +			/* FIXME: emit an error message? */
> +			return -1;
> +		}
> +		if (child == 0) {
> +			/* Make sure we die when the parent does. */
> +			if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) != 0) {
> +				/* FIXME: emit an error message? */
> +				exit(1);
> +			}
> +
> +			mychild_bind_to_cpu(dtp->dt_options[DTRACEOPT_CPU]);
> +			mychild(begin_end_child_fds[1]);
> +			/* Do not return. */
> +		}
> +		dtp->dt_beginendchild = child;
> +	} else {
> +		dtp->dt_beginendchild = 0;
> +	}
> +
>  	/*
>  	 * Set the locked-memory limit if so directed by the user.
>  	 */
> @@ -200,10 +317,42 @@ dtrace_go(dtrace_hdl_t *dtp, uint_t cflags)
>  	if (dt_aggregate_go(dtp) == -1)
>  		return -1;
>  
> -	if (RUNNING_ON_VALGRIND)
> -		VALGRIND_NON_SIMD_CALL0(BEGIN_probe);
> -	else
> -		BEGIN_probe();
> +	if (dtp->dt_beginendchild) {
> +		/*
> +		 * We forked a child to run on a specific CPU for
> +		 * BEGIN and END probes.
> +		 */
> +		int flags, dummy;
> +		unsigned long long timeout;
> +
> +		/* Make the read end of the pipe nonblocking. */
> +		flags = fcntl(begin_end_child_fds[0], F_GETFL, 0);
> +		if (flags == -1) {
> +			/* FIXME: better diagnosibility? */
> +			printf("ERROR: could not get flags\n");
> +			return -1;
> +		}
> +		flags |= O_NONBLOCK;
> +		if (fcntl(begin_end_child_fds[0], F_SETFL, flags) == -1) {
> +			/* FIXME: better diagnosibility? */
> +			printf("ERROR: could not set flags\n");
> +			return -1;
> +		}
> +
> +		/* Signal the child to BEGIN. */
> +		kill(dtp->dt_beginendchild, SIGUSR1);
> +
> +		/* Wait for the child to ack. */
> +		timeout = elapsed_msecs() + 2000;
> +		while (read(begin_end_child_fds[0], &dummy, 4) <= 0) {
> +			usleep(100000);
> +			if (elapsed_msecs() > timeout)
> +				return -1;
> +		}
> +	} else if (RUNNING_ON_VALGRIND) {
> +		VALGRIND_NON_SIMD_CALL3(BEGIN_probe, 0, NULL, NULL);
> +	} else
> +		BEGIN_probe(0, NULL, NULL);
>  
>  	dtp->dt_active = 1;
>  	dtp->dt_beganon = dt_state_get_beganon(dtp);
> @@ -233,10 +382,12 @@ dtrace_stop(dtrace_hdl_t *dtp)
>  	if (dt_state_get_activity(dtp) < DT_ACTIVITY_DRAINING)
>  		dt_state_set_activity(dtp, DT_ACTIVITY_DRAINING);
>  
> -	if (RUNNING_ON_VALGRIND)
> -		VALGRIND_NON_SIMD_CALL0(END_probe);
> +	if (dtp->dt_beginendchild)
> +		kill(dtp->dt_beginendchild, SIGUSR2);
> +	else if (RUNNING_ON_VALGRIND)
> +		VALGRIND_NON_SIMD_CALL3(END_probe, 0, NULL, NULL);
>  	else
> -		END_probe();
> +		END_probe(0, NULL, NULL);
>  
>  	dtp->dt_stopped = 1;
>  	dtp->dt_endedon = dt_state_get_endedon(dtp);
> diff --git a/test/unittest/options/tst.cpu-BEGIN.sh b/test/unittest/options/tst.cpu-BEGIN.sh
> new file mode 100755
> index 00000000..0bacfea0
> --- /dev/null
> +++ b/test/unittest/options/tst.cpu-BEGIN.sh
> @@ -0,0 +1,20 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +
> +dtrace=$1
> +
> +nerr=0
> +for cpu0 in `awk '/^processor[ 	]*: [0-9]*$/ {print $3}' /proc/cpuinfo`; do
> +	cpu=`$dtrace $dt_flags -xcpu=$cpu0 -qn 'BEGIN { trace(cpu); exit(0); }'`
> +	echo expected cpu $cpu0 got cpu $cpu
> +	if [ $cpu != $cpu0 ]; then
> +		nerr=$(($nerr + 1))
> +	fi
> +done
> +
> +exit $nerr
> diff --git a/test/unittest/options/tst.cpu-END.sh b/test/unittest/options/tst.cpu-END.sh
> new file mode 100755
> index 00000000..32080d94
> --- /dev/null
> +++ b/test/unittest/options/tst.cpu-END.sh
> @@ -0,0 +1,20 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +
> +dtrace=$1
> +
> +nerr=0
> +for cpu0 in `awk '/^processor[ 	]*: [0-9]*$/ {print $3}' /proc/cpuinfo`; do
> +	cpu=`$dtrace $dt_flags -xcpu=$cpu0 -qn 'BEGIN { exit(0) } END { trace(cpu); }'`
> +	echo expected cpu $cpu0 got cpu $cpu
> +	if [ $cpu != $cpu0 ]; then
> +		nerr=$(($nerr + 1))
> +	fi
> +done
> +
> +exit $nerr
> -- 
> 2.18.4
> 
> 



More information about the DTrace-devel mailing list