[DTrace-devel] [PATCH v2 2/2] Implement the lockstat provider

Kris Van Hees kris.van.hees at oracle.com
Wed May 24 06:01:45 UTC 2023


On Tue, May 23, 2023 at 07:05:22PM -0400, Eugene Loh via DTrace-devel wrote:
> I assume this is a preliminary post and a later version will have test
> updates.
> 
> Fun fact:  lockstat is not documented in DTv1?
> 
> I'm puzzled whether the 8 get_cpuinfo(dtp, dlp, exitlbl) calls are really
> needed?  Can the trampoline be called for an unsupported probe?

I moved it to the top so it only occurs once.

> The more compact code is great.  It helps expose the bigger picture and
> separate reasoning about codegen from reasoning about instrumentation. 
> There are a few more opportunities to consider:
> 
> This appears 4x:
> 
> +                       /* Store the start time. */
> +                       emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> +                       emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6,
> offsetof(dt_bpf_cpuinfo_t, lockstat_XXXX), BPF_REG_0));
> 
> This appears 3x:
> 
>     emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
>     emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6,
> offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
>     emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
>     emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
>     emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> 
> Perhaps adaptive-acquire-error can be combined with
> adaptive-[acquire|release]?
> 
> Perhaps rw-acquire and spin-acquire can be combined?
> 
> It looks like rw-spin and spin-spin have lots of overlap.

For now, I'd like to keep these separate because it makes it easier to
follow what they are doing.  Also... for newer kernels (6.x) we will be
able to use tracepoints for some of this and that will yield quite
different code.

> On 5/22/23 18:52, Kris Van Hees via DTrace-devel wrote:
> > diff --git a/libdtrace/dt_prov_lockstat.c b/libdtrace/dt_prov_lockstat.c
> > +
> > +/*
> > + * Get a reference to the cpuinfo structure for the current CPI.
> 
> CPI -> CPU
> 
> > + *
> > + * Clobbers %r0 through %r5
> > + * Stores pointer to cpuinfo struct in %r6
> > + */
> > +static void get_cpuinfo(dtrace_hdl_t *dtp, dt_irlist_t *dlp, uint_t exitlbl)
> > +{
> > +	dt_ident_t	*idp = dt_dlib_get_map(dtp, "cpuinfo");
> > +
> > +	assert(idp != NULL);
> > +	dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> > +	emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> > +	emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> > +	emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> > +	emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> > +	emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> > +	emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> > +}
> > +
> > +/*
> > + * Copy the lock address from args[n] into the per-CPU cpuinfo structure
> > + * referened by %r6.
> 
> referenced
> 
> > + *
> > + * Clobbers %r1
> > + */
> > +static void copy_lockaddr_into_cpuinfo(dt_irlist_t *dlp, int n)
> > +{
> > +	emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(n)));
> > +	emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
> > +}
> > +
> > +/*
> > + * Copy the lock address from the per-CPU cpuinfo structure referenced by %r6
> > + * into args[n], and reset the lock address in the per-CPU cpuinfo structure
> > + * to 0.  If lbl is not DT_LBL_NONE, it will be used to label the instruction
> > + * that resets the lock addrss in cpuinfo.
> 
> address
> 
> > + *
> > + * Clobbers %r1
> > + */
> > +static void copy_lockaddr_from_cpuinfo(dt_irlist_t *dlp, int n, uint_t lbl)
> > +{
> > +	emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
> > +	emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(n), BPF_REG_1));
> > +
> > +	emitl(dlp, lbl,
> > +		   BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
> > +}
> > +
> > +/*
> > + * Generate a BPF trampoline for a SDT probe.
> > + *
> > + * The trampoline function is called when a SDT probe triggers, and it must
> > + * satisfy the following prototype:
> > + *
> > + *	int dt_lockstat(void *data)
> > + *
> > + * The trampoline will populate a dt_dctx_t struct and then call the function
> > + * that implements the compiled D clause.  It returns the value that it gets
> > + * back from that function.
> > + */
> > +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> > +{
> > +	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
> > +	dt_irlist_t	*dlp = &pcb->pcb_ir;
> > +	dt_probe_t	*prp = pcb->pcb_probe;
> > +	dt_probe_t	*uprp = pcb->pcb_parent_probe;
> > +
> > +	assert(uprp != NULL);
> > +
> > +	if (strcmp(prp->desc->prb, "adaptive-acquire") == 0 ||
> > +	    strcmp(prp->desc->prb, "adaptive-release") == 0) {
> > +		get_cpuinfo(dtp, dlp, exitlbl);
> > +
> > +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> > +			copy_lockaddr_into_cpuinfo(dlp, 0);
> > +			return 1;
> > +		} else {
> > +			copy_lockaddr_from_cpuinfo(dlp, 0, DT_LBL_NONE);
> > +			return 0;
> > +		}
> > +	} else if (strcmp(prp->desc->prb, "adaptive-acquire-error") == 0) {
> > +		get_cpuinfo(dtp, dlp, exitlbl);
> > +
> > +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> > +			copy_lockaddr_into_cpuinfo(dlp, 0);
> > +			return 1;
> > +		} else {
> > +			/*
> > +			 * args[1] is already set by the underlying probe, but
> > +			 * we only report the adative-acquire-error probe if
> 
> adaptive
> 
> > +			 * the value is not 0.
> > +			 */
> > +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(1)));
> > +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
> > +			copy_lockaddr_from_cpuinfo(dlp, 0, DT_LBL_NONE);
> > +			return 0;
> > +		}
> > +	} else if (strcmp(prp->desc->prb, "adaptive-block") == 0) {
> > +		get_cpuinfo(dtp, dlp, exitlbl);
> > +
> > +		/*
> > +		 * - mutex_lock:entry inits lockstat_btime (0) and stores lock.
> > +		 * - schedule_preempt_disabled:entry sets lockstat_bfrom
> > +		 * - schedule_preempt_disabled:return increments lockstat_bfrom
> > +		 * - mutex_lock:return sets the adaptive-block arguments
> > +		 */
> 
> Unnecessary/unneeded comment block?

For now, I think it is worth leaving it in because it gives some info on what
the interaction of the 4 probes is to implement this one lockstat probe.

> > +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> > +			if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
> > +				copy_lockaddr_into_cpuinfo(dlp, 0);
> > +
> > +				/* Initialize lockstat_btime. */
> > +				emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_btime), 0));
> > +			} else {
> > +				/* Store the start time. */
> > +				emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> > +				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_bfrom), BPF_REG_0));
> > +			}
> > +
> > +			return 1;
> > +		} else {
> > +			if (strcmp(uprp->desc->fun, "mutex_lock") != 0) {
> > +				/* Increment the block time. */
> > +				emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> > +				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_bfrom)));
> > +				emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
> > +				emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_btime), BPF_REG_0));
> > +
> > +				return 1;
> > +			} else {
> > +				/*
> > +				 * If lockstat_btime = 0, bail.
> > +				 * Otherwise arg1 = lockstat_btime.
> > +				 */
> > +				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_btime)));
> > +				emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
> > +				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_1));
> > +
> > +				copy_lockaddr_from_cpuinfo(dlp, 0, DT_LBL_NONE);
> > +				return 0;
> > +			}
> > +		}
> > +	} else if (strcmp(prp->desc->prb, "adaptive-spin") == 0) {
> > +		get_cpuinfo(dtp, dlp, exitlbl);
> > +
> > +		/*
> > +		 * - mutex_lock:entry stores lock and inits lockstat_stime (0).
> > +		 * - _raw_spin_lock:entry sets lockstat_stime
> > +		 * - mutex_lock:return sets the adaptive-spin arguments
> > +		 */
> > +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> > +			if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
> > +				copy_lockaddr_into_cpuinfo(dlp, 0);
> > +
> > +				/* Initialize lockstat_stime. */
> > +				emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), 0));
> > +			} else {
> > +				/* Store the start time in lockstat_stime. */
> > +				emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> > +				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
> > +			}
> > +
> > +			return 1;
> > +		} else {
> > +			/*
> > +			 * If lockstat_stime is 0, bail.
> > +			 * Otherwise, arg1 = time - lockstat_stime.
> > +			 */
> > +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> > +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
> > +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
> > +			emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
> > +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> > +
> > +			copy_lockaddr_from_cpuinfo(dlp, 0, DT_LBL_NONE);
> > +			return 0;
> > +		}
> > +	} else if (strcmp(prp->desc->prb, "rw-acquire") == 0) {
> > +		get_cpuinfo(dtp, dlp, exitlbl);
> > +
> > +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> > +			copy_lockaddr_into_cpuinfo(dlp, 0);
> > +			return 1;
> > +		} else {
> > +			int	kind = 1;	/* reader (default) */
> > +			uint_t	lbl_reset = dt_irlist_label(dlp);;
> > +
> > +			if (strstr(uprp->desc->fun, "_write_") != NULL)
> > +				kind = 0;	/* writer */
> > +
> > +			if (strstr(uprp->desc->fun, "_trylock") != NULL) {
> > +				/* The return value (arg1) must be 1. */
> > +				emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(1)));
> > +				emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, lbl_reset));
> > +			}
> > +
> > +			/* Set arg1 = kind. */
> > +			emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), kind));
> > +
> > +			copy_lockaddr_from_cpuinfo(dlp, 0, lbl_reset);
> > +			return 0;
> > +		}
> > +	} else if (strcmp(prp->desc->prb, "rw-spin") == 0) {
> > +		get_cpuinfo(dtp, dlp, exitlbl);
> > +
> > +		/*
> > +		 * - *_lock_slowpath:entry stores lock and sets lockstat_stime
> > +		 * - *_lock_slowpath:return sets the rw-spin arguments
> > +		 */
> 
> Did you want rw-spin or lock_slowpath here?  Anyhow, again, the comment
> block doesn't contribute much over the code and can be omitted IMHO.

It is probably a bit of overkill but I want to assist as much as possible with
the further development of this code (with incremental improvements) and for
that the extra verbosity may be useful.

> > +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> > +			copy_lockaddr_into_cpuinfo(dlp, 0);
> > +
> > +			/* Store the start time in lockstat_stime. */
> > +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> > +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
> > +
> > +			return 1;
> > +		} else {
> > +			/*
> > +			 * If lockstat_stime is 0, bail.
> > +			 * Otherwise, arg1 = time - lockstat_stime.
> > +			 */
> > +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> > +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
> > +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
> > +			emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
> > +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> > +
> > +			copy_lockaddr_from_cpuinfo(dlp, 0, DT_LBL_NONE);
> > +			return 0;
> > +		}
> > +	} else if (strcmp(prp->desc->prb, "spin-acquire") == 0) {
> > +		get_cpuinfo(dtp, dlp, exitlbl);
> > +
> > +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> > +			copy_lockaddr_into_cpuinfo(dlp, 0);
> > +			return 1;
> > +		} else {
> > +			uint_t	lbl_reset = dt_irlist_label(dlp);;
> > +
> > +			if (strstr(uprp->desc->fun, "_trylock") != NULL) {
> > +				/* The return value (arg1) must be 1. */
> > +				emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(1)));
> > +				emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, lbl_reset));
> > +			}
> > +
> > +			copy_lockaddr_from_cpuinfo(dlp, 0, lbl_reset);
> > +			return 0;
> > +		}
> > +	} else if (strcmp(prp->desc->prb, "spin-spin") == 0) {
> > +		get_cpuinfo(dtp, dlp, exitlbl);
> > +
> > +		/*
> > +		 * - *_lock_slowpath:entry stores lock and sets lockstat_stime
> > +		 * - *_lock_slowpath:return sets the rw-spin arguments
> > +		 */
> 
> Is the comment block right?  (Or, again, even needed?)

Yes and yes :)

> > +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> > +			copy_lockaddr_into_cpuinfo(dlp, 0);
> > +
> > +			/* Store the start time in lockstat_stime. */
> > +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> > +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
> > +
> > +			return 1;
> > +		} else {
> > +			/*
> > +			 * If lockstat_stime is 0, bail.
> > +			 * Otherwise, arg1 = time - lockstat_stime.
> > +			 */
> > +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> > +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
> > +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
> > +			emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
> > +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> > +
> > +			copy_lockaddr_from_cpuinfo(dlp, 0, DT_LBL_NONE);
> > +			return 0;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +dt_provimpl_t	dt_lockstat = {
> > +	.name		= prvname,
> > +	.prog_type	= BPF_PROG_TYPE_UNSPEC,
> > +	.populate	= &populate,
> > +	.enable		= &dt_sdt_enable,
> > +	.trampoline	= &trampoline,
> > +	.probe_info	= &dt_sdt_probe_info,
> > +	.destroy	= &dt_sdt_destroy,
> > +};
> > diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> > index 252a73d4..8face769 100644
> > --- a/libdtrace/dt_provider.h
> > +++ b/libdtrace/dt_provider.h
> > @@ -70,6 +70,7 @@ typedef struct dt_provimpl {
> >   extern dt_provimpl_t dt_dtrace;
> >   extern dt_provimpl_t dt_cpc;
> >   extern dt_provimpl_t dt_fbt;
> > +extern dt_provimpl_t dt_lockstat;
> >   extern dt_provimpl_t dt_proc;
> >   extern dt_provimpl_t dt_profile;
> >   extern dt_provimpl_t dt_rawtp;
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list