[DTrace-devel] [PATCH v2 01/12] proc: use a rawtp for the proc:::exit probe

Kris Van Hees kris.van.hees at oracle.com
Mon Jan 8 19:38:58 UTC 2024


On Mon, Jan 08, 2024 at 01:29:55PM -0500, Eugene Loh via DTrace-devel wrote:
> Looks good, I think.  A few minor things:
> 
> The scratch space for reading a struct member is at %fp + DT_STK_SPILL(0),
> but this is in the trampoline.  So, shouldn't it be at DT_TRAMP_SLOT(0)? 
> Also, with this patch, the proc provider will be accessing struct members at
> three different places.  So, it may make sense to use some sort of
> get_member() or deref_r3() function (as in the ip and io providers).  (Or,
> off course, combine them all.)

Ah yes, thank for catching that (the stack slot).  I'll fix that in the patch.

Yes, proc provider could use get_member() and deref_r3() functions but those
do not exist yet at the point of this series (and delaying this patch will
then impact other tests that depend on proc:::exit, etc...).

So I think that such consolidation is something that perhaps should be done
afterwards, especially since we have some other cleanup/consoliation that can
be done for providers now that we have more infrastructure built for it.

> Why do we use /bin/kill in tst.signal.sh but kill in all other tests?

Because in many shells, kill is a built-in command and we need to be abke to
detect the command getting issued with a probe.
> 
> On 1/6/24 13:44, Kris Van Hees via DTrace-devel wrote:
> > Also fix up tst.exitkilled.sh to not depend on pr_psargs (for which we
> > do not have an translator implementation yet).
> > 
> > Also fix up tst.signal.sh to not depend on pr_psargs and account for the
> > fact that when a process does not handle SIGUSR1, the kernel will deliver
> > it as a SIGKILL.
> > 
> > Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> > Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
> > ---
> >   libdtrace/dt_prov_proc.c             | 64 +++++++++++++++++++++++-----
> >   test/unittest/proc/tst.exitkilled.sh | 25 +++++++----
> >   test/unittest/proc/tst.signal.sh     | 33 ++++++++------
> >   3 files changed, 91 insertions(+), 31 deletions(-)
> > 
> > diff --git a/libdtrace/dt_prov_proc.c b/libdtrace/dt_prov_proc.c
> > index 42698e3a..b59aa6a3 100644
> > --- a/libdtrace/dt_prov_proc.c
> > +++ b/libdtrace/dt_prov_proc.c
> > @@ -1,6 +1,6 @@
> >   /*
> >    * Oracle Linux DTrace.
> > - * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> > + * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
> >    * Licensed under the Universal Permissive License v 1.0 as shown at
> >    * http://oss.oracle.com/licenses/upl.
> >    *
> > @@ -204,10 +204,10 @@ static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp)
> >   	} else if (strcmp(prp->desc->prb, "exit") == 0 ||
> >   		   strcmp(prp->desc->prb, "lwp-exit") == 0) {
> >   		pd.id = DTRACE_IDNONE;
> > -		pd.prv = "fbt";
> > +		pd.prv = "rawtp";
> >   		pd.mod = "";
> > -		pd.fun = "taskstats_exit";
> > -		pd.prb = "entry";
> > +		pd.fun = "";
> > +		pd.prb = "sched_process_exit";
> >   		uprp = dt_probe_lookup(dtp, &pd);
> >   		assert(uprp != NULL);
> > @@ -323,8 +323,13 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> >   		int		rc = 0;
> >   		uint_t		lbl_out = dt_irlist_label(dlp);
> > -		emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
> > -		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> > +		/* Only fire this probe for the task group leader. */
> > +		emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_get_current_pid_tgid));
> > +		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_0));
> > +		emit(dlp,  BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32));
> > +		emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32));
> > +		emit(dlp,  BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32));
> > +		emit(dlp,  BPF_BRANCH_REG(BPF_JNE, BPF_REG_0, BPF_REG_1, exitlbl));
> >   		if (!cfp)
> >   			longjmp(yypcb->pcb_jmpbuf, EDT_NOCTF);
> > @@ -342,9 +347,9 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> >   		/*
> >   		 * This implements:
> >   		 *	%r1 = tsk->exit_code
> > -		 *	if ((%r1 & 0x7f) == 0) args[0] = 1;
> > -		 *	else if (%r1 & 0x80) args[0] = 3;
> > -		 *	else args[0] = 2;
> > +		 *	if ((%r1 & 0x7f) == 0) args[0] = 1;	// CLD_EXITED
> > +		 *	else if (%r1 & 0x80) args[0] = 3;	// CLD_DUMPED
> > +		 *	else args[0] = 2;			// CLD_KILLED
> >   		 */
> >   		emit(dlp,  BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> >   		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_STK_SPILL(0)));
> > @@ -381,7 +386,46 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> >   		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(2)));
> >   		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> >   	} else if (strcmp(prp->desc->prb, "signal-handle") == 0) {
> > -		/* All three arguments are already in their proper place. */
> > +		int	off;
> > +		size_t	sz;
> > +		uint_t	lbl_keep = dt_irlist_label(dlp);
> > +
> > +		/*
> > +		 * Getting the signal number right is a bit tricky because any
> > +		 * unhandled signal triggers a SIGKILL to be delivered (to kill
> > +		 * the task) while reporting the actual signal number in the
> > +		 * signal struct for the task.  The real signal number is then
> > +		 * to be found in task->signal->group_exit_code.
> > +		 *
> > +		 * So. if arg0 is SIGKILL, we look at group_exit_code, and if
> > +		 * it is > 0, use that value as signal number.
> > +		 */
> > +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(0)));
> > +		emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_0, SIGKILL, lbl_keep));
> > +
> > +		emit(dlp, BPF_CALL_HELPER(BPF_FUNC_get_current_task));
> > +		off = dt_cg_ctf_offsetof("struct task_struct", "signal", &sz, 0);
> > +		emit(dlp, BPF_MOV_REG(BPF_REG_3, BPF_REG_0));
> > +		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, off));
> > +		emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> > +		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_STK_SPILL(0)));
> > +		emit(dlp, BPF_MOV_IMM(BPF_REG_2, sz));
> > +		emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
> > +		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_3, BPF_REG_FP, DT_STK_SPILL(0)));
> > +		off = dt_cg_ctf_offsetof("struct signal_struct", "group_exit_code", &sz, 0);
> > +		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, off));
> > +		emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_FP));
> > +		emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, DT_STK_SPILL(0)));
> > +		emit(dlp, BPF_MOV_IMM(BPF_REG_2, sz));
> > +		emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
> > +		emit(dlp, BPF_LOAD(BPF_W, BPF_REG_0, BPF_REG_FP, DT_STK_SPILL(0)));
> > +		emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, lbl_keep));
> > +		emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> > +
> > +		emitl(dlp, lbl_keep,
> > +			   BPF_NOP());
> > +
> > +		/* All three arguments are in their proper place. */
> >   	} else if (strcmp(prp->desc->prb, "signal-send") == 0) {
> >   		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_7, DMST_ARG(1)));
> >   		emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> > diff --git a/test/unittest/proc/tst.exitkilled.sh b/test/unittest/proc/tst.exitkilled.sh
> > index 57334136..94051e22 100755
> > --- a/test/unittest/proc/tst.exitkilled.sh
> > +++ b/test/unittest/proc/tst.exitkilled.sh
> > @@ -7,21 +7,28 @@
> >   #
> >   # This script tests that the proc:::exit probe fires with the correct argument
> >   # when the process is killed.
> > -#
> > -# If this fails, the script will run indefinitely; it relies on the harness
> > -# to time it out.
> > -#
> > -# @@xfail: dtv2
> >   script()
> >   {
> >   	$dtrace $dt_flags -s /dev/stdin <<EOF
> > +	syscall::execve:entry
> > +	/copyinstr((uintptr_t)args[1][0]) == "sleep" && args[1][1] &&
> > +	 copyinstr((uintptr_t)args[1][1]) == "10000"/
> > +	{
> > +		kill_pid = pid;
> > +	}
> > +
> >   	proc:::exit
> > -	/curpsinfo->pr_ppid == $child &&
> > -	    curpsinfo->pr_psargs == "$longsleep" && args[0] == CLD_KILLED/
> > +	/curpsinfo->pr_ppid == $child && kill_pid == pid &&
> > +	 args[0] == CLD_KILLED/
> >   	{
> >   		exit(0);
> >   	}
> > +
> > +	tick-5s
> > +	{
> > +		exit(124);
> > +	}
> >   EOF
> >   }
> > @@ -29,9 +36,11 @@ sleeper()
> >   {
> >   	while true; do
> >   		$longsleep &
> > +		target=$!
> >   		disown %+
> >   		sleep 1
> > -		kill -9 $!
> > +		kill -9 $target
> > +		sleep 1
> >   	done
> >   }
> > diff --git a/test/unittest/proc/tst.signal.sh b/test/unittest/proc/tst.signal.sh
> > index 2d672257..bc1b2530 100755
> > --- a/test/unittest/proc/tst.signal.sh
> > +++ b/test/unittest/proc/tst.signal.sh
> > @@ -7,29 +7,34 @@
> >   #
> >   # This script tests that the proc:::signal-send and proc:::signal-handle
> >   # probes fire correctly and with the correct arguments.
> > -#
> > -# If this fails, the script will run indefinitely; it relies on the harness
> > -# to time it out.
> > -#
> > -# @@xfail: dtv2
> >   script()
> >   {
> >   	$dtrace $dt_flags -s /dev/stdin <<EOF
> > +	syscall::execve:entry
> > +	/copyinstr((uintptr_t)args[1][0]) == "sleep" && args[1][1] &&
> > +	 copyinstr((uintptr_t)args[1][1]) == "10000"/
> > +	{
> > +		sig_pid = pid;
> > +	}
> > +
> >   	proc:::signal-send
> >   	/execname == "kill" && curpsinfo->pr_ppid == $child &&
> > -	    args[1]->pr_psargs == "$longsleep" && args[2] == SIGUSR1/
> > +	 sig_pid == args[1]->pr_pid && args[2] != SIGUSR1/
> >   	{
> > -		/*
> > -		 * This is guaranteed to not race with signal-handle.
> > -		 */
> > -		target = args[1]->pr_pid;
> > +		/* Wrong signal being sent. */
> > +		exit(1);
> >   	}
> >   	proc:::signal-handle
> > -	/target == pid && args[0] == SIGUSR1/
> > +	/sig_pid == pid/
> >   	{
> > -		exit(0);
> > +		exit(args[0] == SIGUSR1 ? 0 : 1);
> > +	}
> > +
> > +	tick-5s
> > +	{
> > +		exit(124);
> >   	}
> >   EOF
> >   }
> > @@ -38,9 +43,11 @@ sleeper()
> >   {
> >   	while true; do
> >   		$longsleep &
> > +		target=$!
> >   		disown %+
> >   		sleep 1
> > -		/bin/kill -USR1 $!
> > +		/bin/kill -USR1 $target
> > +		sleep 1
> >   	done
> >   }
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list