[DTrace-devel] [PATCH v2 5/5] Add -xcpu support for general providers

Kris Van Hees kvanhees at kvh-deb-bpf.us.oracle.com
Tue Jan 23 17:06:19 UTC 2024


On Thu, Dec 21, 2023 at 09:46:53PM -0500, eugene.loh at oracle.com wrote:
> 
> Add code to dt_cg_tramp_prologue() to filter on the desired cpu.
> Then call the new dt_cg_tramp_prologue_cpu() function, which
> assumes a cpu has already been selected, if necessary.
> 
> Providers that handle -xcpu specially can bypass that filtering
> by calling the new dt_cg_tramp_prologue_cpu() function.
> 
> The new function simply calls
> dt_cg_tramp_prologue_act(pcb, DT_ACTIVITY_ACTIVE).  The _act
> function is called directly by only one provider:  dtrace.
> 
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>

Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

> ---
>  libdtrace/dt_cg.c                        |  28 ++++-
>  libdtrace/dt_cg.h                        |   1 +
>  libdtrace/dt_prov_cpc.c                  |   4 +-
>  libdtrace/dt_prov_profile.c              |   4 +-
>  test/unittest/options/tst.cpu-syscall.sh | 151 +++++++++++++++++++++++
>  5 files changed, 181 insertions(+), 7 deletions(-)
>  create mode 100755 test/unittest/options/tst.cpu-syscall.sh
> 
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 7ae16db8..c8e80abb 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -356,10 +356,32 @@ dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act)
>  	}
>  }
>  
> +void
> +dt_cg_tramp_prologue_cpu(dt_pcb_t *pcb) {
> +	dt_cg_tramp_prologue_act(pcb, DT_ACTIVITY_ACTIVE);
> +}
> +
>  void
>  dt_cg_tramp_prologue(dt_pcb_t *pcb)
>  {
> -	dt_cg_tramp_prologue_act(pcb, DT_ACTIVITY_ACTIVE);
> +	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
> +
> +	/* Check if we are on the specified CPU (if any). */
> +	if (dtp->dt_options[DTRACEOPT_CPU] != DTRACEOPT_UNSET) {
> +		dt_irlist_t	*dlp = &pcb->pcb_ir;
> +
> +		emit(dlp,  BPF_MOV_REG(BPF_REG_8, BPF_REG_1));
> +
> +		emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_get_smp_processor_id));
> +		emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0,
> +					  dtp->dt_options[DTRACEOPT_CPU],
> +					  pcb->pcb_exitlbl));
> +
> +		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_8));
> +	}
> +
> +	/* Call the rest of the prologue code generation. */
> +	dt_cg_tramp_prologue_cpu(pcb);
>  }
>  
>  /*
> @@ -506,8 +528,8 @@ dt_cg_tramp_copy_args_from_regs(dt_pcb_t *pcb, int called)
>   * So put the PC in both arg0 and arg1, test the PC, and then zero out
>   * either arg0 or arg1, as apropriate.
>   *
> - * The caller must ensure that %r7 and %r8 contain the values set by
> - * the dt_cg_tramp_prologue*() functions.
> + * The caller must ensure that %r7 and %r8 contain the values set by the
> + * dt_cg_tramp_prologue*() functions.
>   */
>  void
>  dt_cg_tramp_copy_pc_from_regs(dt_pcb_t *pcb)
> diff --git a/libdtrace/dt_cg.h b/libdtrace/dt_cg.h
> index 95bf507f..50dd256b 100644
> --- a/libdtrace/dt_cg.h
> +++ b/libdtrace/dt_cg.h
> @@ -21,6 +21,7 @@ extern void dt_cg(dt_pcb_t *, dt_node_t *);
>  extern void dt_cg_xsetx(dt_irlist_t *, dt_ident_t *, uint_t, int, uint64_t);
>  extern dt_irnode_t *dt_cg_node_alloc(uint_t, struct bpf_insn);
>  extern void dt_cg_tramp_prologue_act(dt_pcb_t *pcb, dt_activity_t act);
> +extern void dt_cg_tramp_prologue_cpu(dt_pcb_t *pcb);
>  extern void dt_cg_tramp_prologue(dt_pcb_t *pcb);
>  extern void dt_cg_tramp_clear_regs(dt_pcb_t *pcb);
>  extern void dt_cg_tramp_copy_regs(dt_pcb_t *pcb);
> diff --git a/libdtrace/dt_prov_cpc.c b/libdtrace/dt_prov_cpc.c
> index 9e0f5542..93968950 100644
> --- a/libdtrace/dt_prov_cpc.c
> +++ b/libdtrace/dt_prov_cpc.c
> @@ -395,10 +395,10 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
>  	int		i;
>  	dt_irlist_t	*dlp = &pcb->pcb_ir;
>  
> -	dt_cg_tramp_prologue(pcb);
> +	dt_cg_tramp_prologue_cpu(pcb);
>  
>  	/*
> -	 * After the dt_cg_tramp_prologue() call, we have:
> +	 * After the dt_cg_tramp_prologue_cpu() call, we have:
>  	 *				//     (%r7 = dctx->mst)
>  	 *				//     (%r8 = dctx->ctx)
>  	 */
> diff --git a/libdtrace/dt_prov_profile.c b/libdtrace/dt_prov_profile.c
> index f3f3bf23..967ca8c2 100644
> --- a/libdtrace/dt_prov_profile.c
> +++ b/libdtrace/dt_prov_profile.c
> @@ -220,10 +220,10 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
>  	int		i;
>  	dt_irlist_t	*dlp = &pcb->pcb_ir;
>  
> -	dt_cg_tramp_prologue(pcb);
> +	dt_cg_tramp_prologue_cpu(pcb);
>  
>  	/*
> -	 * After the dt_cg_tramp_prologue() call, we have:
> +	 * After the dt_cg_tramp_prologue_cpu() call, we have:
>  	 *				//     (%r7 = dctx->mst)
>  	 *				//     (%r8 = dctx->ctx)
>  	 */
> diff --git a/test/unittest/options/tst.cpu-syscall.sh b/test/unittest/options/tst.cpu-syscall.sh
> new file mode 100755
> index 00000000..8713e9d4
> --- /dev/null
> +++ b/test/unittest/options/tst.cpu-syscall.sh
> @@ -0,0 +1,151 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +#
> +
> +dtrace=$1
> +
> +DIRNAME="$tmpdir/cpu-syscall.$$.$RANDOM"
> +mkdir -p $DIRNAME
> +cd $DIRNAME
> +
> +#
> +# Make trigger code and compile it.
> +#
> +
> +cat << EOF > main.c
> +#define _GNU_SOURCE
> +#include <sched.h>
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +
> +#include <errno.h>
> +
> +int ncpus;
> +cpu_set_t *mask;
> +size_t size;
> +
> +/*
> + * Grow the CPU mask as needed.
> + *
> + * Other ways of determining the number of CPUs available on the system:
> + * - inspecting the contents of /proc/cpuinfo
> + * - using sysconf(3) to obtain _SC_NPROCESSORS_CONF and _SC_NPROCESSORS_ONLN
> + * - inspecting the list of CPU directories under /sys/devices/system/cpu/
> + */
> +int grow_mask() {
> +	ncpus = 1024;
> +	while ((mask = CPU_ALLOC(ncpus)) != NULL &&
> +	       sched_getaffinity(0, CPU_ALLOC_SIZE(ncpus), mask) != 0 &&
> +	       errno == EINVAL) {
> +		CPU_FREE(mask);
> +		errno = 0;
> +		ncpus *= 2;
> +	}
> +	if (mask == NULL || (errno != 0 && errno != EINVAL))
> +		return 1;
> +	size = CPU_ALLOC_SIZE(ncpus);
> +	return 0;
> +}
> +
> +int bind_to_cpu(size_t cpu) {
> +	CPU_ZERO_S(size, mask);
> +	CPU_SET_S(cpu, size, mask);
> +	if (sched_setaffinity(0, size, mask) != 0)
> +		return 1;
> +	return 0;
> +}
> +
> +int main(int c, char **v) {
> +	int i, fd = open("/dev/null", O_WRONLY);
> +
> +	if (fd == -1)
> +		return 1;
> +
> +	if (grow_mask() == -1)
> +		return 1;
> +
> +	/* Loop over CPUs (in argv[]). */
> +        for (i = 1; i < c; i++) {
> +		/* Bind to CPU. */
> +		if (bind_to_cpu(atol(v[i])) == -1)
> +			return 1;
> +
> +		/* Call write() from CPU. */
> +		write(fd, &i, sizeof(i));
> +	}
> +
> +	close(fd);
> +	CPU_FREE(mask);
> +
> +	return 0;
> +}
> +EOF
> +
> +gcc main.c
> +if [ $? -ne 0 ]; then
> +	echo ERROR compilation failed
> +	exit 1
> +fi
> +
> +#
> +# Get CPU list and form expected-results file.
> +#
> +
> +cpulist=`awk '/^processor[ 	]: [0-9]*$/ { print $3 }' /proc/cpuinfo`
> +echo $cpulist
> +
> +echo > expect.txt
> +for cpu in $cpulist; do
> +	echo $cpu >> expect.txt
> +done
> +
> +#
> +# Run DTrace (without -xcpu).  Check that all CPUs appear.
> +#
> +
> +$dtrace $dt_flags \
> +    -qn 'syscall::write:entry { printf("%d\n", cpu); }' \
> +    -c "./a.out $cpulist" | sort | uniq > actual.txt
> +if [ $? -ne 0 ]; then
> +	echo ERROR dtrace failed
> +	exit 1
> +fi
> +
> +if ! diff -q expect.txt actual.txt > /dev/null; then
> +	echo ERROR did not see expected CPUs in baseline case
> +	echo expect $cpulist
> +	echo actual:
> +	cat actual.txt
> +	exit 1
> +fi
> +
> +#
> +# Run DTrace (with -xcpu).  Check that only the specified CPU appears.
> +#
> +
> +nerr=0
> +for cpu0 in $cpulist; do
> +	$dtrace $dt_flags -xcpu=$cpu0 \
> +	    -qn 'syscall::write:entry { printf("%d\n", cpu); }' \
> +	    -c "./a.out $cpulist" | sort | uniq > actual.txt
> +	echo > expect.txt
> +	echo $cpu0 >> expect.txt
> +
> +	if ! diff -q expect.txt actual.txt > /dev/null; then
> +		echo ERROR did not see expected CPUs in cpu $cpu0 case, saw:
> +		cat actual.txt
> +		nerr=$(($nerr + 1))
> +	fi
> +done
> +
> +exit $nerr
> -- 
> 2.18.4
> 
> 



More information about the DTrace-devel mailing list