[DTrace-devel] [PATCH 6/7] Implement the lockstat provider

Eugene Loh eugene.loh at oracle.com
Sat May 13 18:20:36 UTC 2023


Again, a patch that is implementing a new provider needs tests, 
including "dtrace -l", "dtrace -lv", probe firings, and probe arguments.

Trampoline code generation returns either a 1 or a 0.  When it returns a 
1, we see "return 1".  When it returns a 0, however, there is a comment 
"... and continue", at which point one has to scroll through a ton of 
code to see what's next:  nothing more than a "return 0".  Why not just 
say "return 0", so that the person reading the code can see that this 
code path is done?  The reader presumably needs to know anyhow what 
"return 0" signifies. The comment "and continue" suggests other stuff 
will happen inside this function, which is not the case.

I'll put some other high-level comments in a separate message, but for 
now just some small comments below...

On 5/9/23 18:32, Kris Van Hees via DTrace-devel wrote:
> diff --git a/libdtrace/dt_prov_lockstat.c b/libdtrace/dt_prov_lockstat.c
> new file mode 100644
> index 00000000..2cfb7915
> --- /dev/null
> +++ b/libdtrace/dt_prov_lockstat.c
> @@ -0,0 +1,725 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The 'lockstat' SDT provider for DTrace specific probes.

Hyphenate "DTrace-specific".

> + */
> +#include <assert.h>
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/ioctl.h>
> +#include <linux/bpf.h>
> +#include <linux/perf_event.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +
> +#include <bpf_asm.h>
> +
> +#include "dt_dctx.h"
> +#include "dt_cg.h"
> +#include "dt_bpf.h"
> +#include "dt_provider.h"
> +#include "dt_probe.h"
> +#include "dt_pt_regs.h"
> +
> +static const char		prvname[] = "lockstat";
> +static const char		modname[] = "vmlinux";
> +
> +/*
> + * The lockstat-provider probes make use of probes that are already provided by
> + * other providers.  As such, the lockstat probes are 'dependent probes'
> + * because they depend on underlying probes to get triggered and they also
> + * depend on argument data provided by the underlying probe to manufacture
> + * their own arguments.
> + *
> + * As a type of SDT probes, lockstat probes are defined with a signature (list
> + * of arguments - possibly empty) that may use translator support to provide
> + * the actual argument values.  Therefore, obtaining the value of arguments for
> + * a probe goes through two layers of processing:
> + *
> + *  (1) the arguments of the underlying probe are reworked to match the
> + *	expected layout of raw arguments for the lockstat probe
> + *  (2) an argument mapping table (and supporting translators) is used to get
> + *	the value of an arguument based on the raw variable data of the

arguument, also check the proc provider

> + *	lockstat probe
> + *
> + * To accomplish this, lockstat probes generate a trampoline that rewrites the
> + * arguments of the underlying probe.  (The dependent probe support code in the
> + * underlying probe saves the arguments of the underying probe in the mstate

underying, also check the proc provider

> + * before executing the trampoline and clauses of the dependent probe, and it
> + * restores them afterwards in case there are multiple dependent probes.)
> + *
> + * Because lockstat probes dependent on an underlying probe that may be too
> + * generic, the trampoline code can include a pre-condition (much like a
> + * predicate) that can bypass execution unless the condition is met.
> + */
> +
> +/*
> + * Probe dependencies
> + *
> + * Lockstat probes are implemented based on probes made available by other
> + * providers.  THe probe dependency table associates each lockstat probe with

THe

> + * one or more probe specifications (possibly containing wildcards).  Each
> + * matching probe will have the lockstat probe added as a dependent probe.
> + */
> +typedef struct probe_dep {
> +	const char		*name;			/* probe name */
> +	dtrace_probespec_t	spec;			/* spec type */
> +	const char		*str;			/* spec string */
> +} probe_dep_t;
> +
> +static probe_dep_t	probes[] = {
> +	{ "adaptive-acquire",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_lock" },
> +	{ "adaptive-acquire-error",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_lock" },
> +	{ "adaptive-block",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_lock" },
> +	{ "adaptive-block",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::schedule_preempt_disabled" },
> +	{ "adaptive-release",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_unlock" },
> +	{ "adaptive-spin",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_lock" },
> +	{ "adaptive-spin",
> +	   DTRACE_PROBESPEC_NAME,	"fbt::_raw_spin_lock:entry" },
> +	{ "rw-acquire",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_read_lock*" },
> +	{ "rw-acquire",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_read_trylock*" },
> +	{ "rw-acquire",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_write_lock*" },
> +	{ "rw-acquire",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_write_trylock*" },
> +	{ "rw-release",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_read_unlock*" },
> +	{ "rw-release",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_write_unlock*" },
> +	{ "rw-spin",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::queued_read_lock_slowpath" },
> +	{ "rw-spin",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::queued_write_lock_slowpath" },
> +	{ "spin-acquire",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_spin_lock*" },
> +	{ "spin-acquire",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_spin_trylock*" },
> +	{ "spin-release",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_spin_unlock*" },
> +	{ "spin-spin",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::queued_spin_lock_*" },
> +	{ "spin-spin",
> +	   DTRACE_PROBESPEC_FUNC,	"fbt::native_queued_spin_lock_*" },
> +};
> +
> +/*
> + * Probe signature specifications
> + *
> + * This table *must* group the arguments of probes.  I.e. the arguments of a
> + * given probe must be listed in consecutive records.
> + * A single probe entry that mentions only name of the probe indicates a probe

mentions only name
->
mentions only the name

> + * that provides no arguments.
> + */
> +typedef struct probe_arg {
> +	const char	*name;			/* name of probe */
> +	int		argno;			/* argument number */
> +	dt_argdesc_t	argdesc;		/* argument description */
> +} probe_arg_t;
> +
> +static probe_arg_t probe_args[] = {
> +	{ "adaptive-acquire", 0, { 0, 0, "struct mutex *" } },
> +	{ "adaptive-acquire-error", 0, { 0, 0, "struct mutex *" } },
> +	{ "adaptive-acquire-error", 1, { 1, 0, "int" } },
> +	{ "adaptive-block", 0, { 0, 0, "struct mutex *" } },
> +	{ "adaptive-block", 1, { 1, 0, "uint64_t" } },
> +	{ "adaptive-release", 0, { 0, 0, "struct mutex *" } },
> +	{ "adaptive-spin", 0, { 0, 0, "struct mutex *" } },
> +	{ "adaptive-spin", 1, { 1, 0, "uint64_t" } },
> +	{ "rw-acquire", 0, { 0, 0, "struct rwlock *" } },
> +	{ "rw-acquire", 1, { 1, 0, "int" } },
> +	{ "rw-release", 0, { 0, 0, "struct rwlock *" } },
> +	{ "rw-release", 1, { 1, 0, "int" } },
> +	{ "rw-spin", 0, { 0, 0, "struct rwlock *" } },
> +	{ "rw-spin", 1, { 1, 0, "uint64_t" } },
> +	{ "rw-spin", 2, { 2, 0, "int" } },
> +	{ "spin-acquire", 0, { 0, 0, "spinlock_t *" } },
> +	{ "spin-release", 0, { 0, 0, "spinlock_t *" } },
> +	{ "spin-spin", 0, { 0, 0, "spinlock_t *" } },
> +	{ "spin-spin", 1, { 1, 0, "uint64_t" } },
> +};
> +
> +static const dtrace_pattr_t	pattr = {
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
> +};
> +
> +/*
> + * Provide all the "lockstat" SDT probes.
> + */
> +static int populate(dtrace_hdl_t *dtp)
> +{
> +	dt_provider_t	*prv;
> +	int		i;
> +	int		n = 0;
> +
> +	prv = dt_provider_create(dtp, prvname, &dt_lockstat, &pattr);
> +	if (prv == NULL)
> +		return 0;
> +
> +	/*
> +	 * Create "lockstat" probes based on the probe_args list.  Since each
> +	 * probe will have at least one entry (with argno == 0), we can use
> +	 * those entries to identify the probe names.
> +	 */
> +	for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
> +		probe_arg_t	*arg = &probe_args[i];
> +
> +		if (arg->argno == 0 &&
> +		    dt_probe_insert(dtp, prv, prvname, modname, "", arg->name,
> +				    NULL))
> +			n++;
> +	}
> +
> +	return n;
> +}
> +
> +static int add_dependency(dtrace_hdl_t *dtp, dt_probe_t *uprp, void *arg)
> +{
> +	dt_probe_t	*prp = arg;
> +
> +	dt_probe_add_dependent(dtp, uprp, prp);
> +	dt_probe_enable(dtp, uprp);
> +
> +	return 0;
> +}
> +
> +static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp)
> +{
> +	int	i;
> +
> +	for (i = 0; i < ARRAY_SIZE(probes); i++) {
> +		probe_dep_t		*dep = &probes[i];
> +		dtrace_probedesc_t	pd;
> +
> +		if (strcmp(prp->desc->prb, dep->name) != 0)
> +			continue;
> +
> +		if (dtrace_str2desc(dtp, dep->spec, dep->str, &pd) == -1)
> +			return;
> +
> +		dt_probe_iter(dtp, &pd, add_dependency, NULL, prp);
> +
> +		free((void *)pd.prv);
> +		free((void *)pd.mod);
> +		free((void *)pd.fun);
> +		free((void *)pd.prb);
> +	}
> +
> +	/*
> +	 * Finally, ensure we're in the list of enablings as well.
> +	 * (This ensures that, among other things, the probes map
> +	 * gains entries for us.)

I'm having trouble understanding "the probes map gains entries for us."

> +	 */
> +	if (!dt_in_list(&dtp->dt_enablings, prp))
> +		dt_list_append(&dtp->dt_enablings, prp);
> +}
> +
> +/*
> + * Generate a BPF trampoline for a SDT probe.
> + *
> + * The trampoline function is called when a SDT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + *	int dt_lockstat(void *data)
> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause.  It returns the value that it gets
> + * back from that function.
> + */
> +#define IS_SPIN      (1 << 0)
> +#define IS_READ      (1 << 1)
> +#define IS_WRITE     (1 << 2)
> +#define IS_MUTEX     (1 << 5)

What are the IS_* macros for?

> +
> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> +	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
> +	dt_irlist_t	*dlp = &pcb->pcb_ir;
> +	dt_probe_t	*prp = pcb->pcb_probe;
> +	dt_probe_t	*uprp = pcb->pcb_parent_probe;
> +	dt_ident_t	*idp;
> +
> +	assert(uprp != NULL);
> +
> +	if (strcmp(prp->desc->prb, "adaptive-acquire") == 0 ||
> +	    strcmp(prp->desc->prb, "adaptive-release") == 0) {
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/* Store the lock address. */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
> +
> +			return 1;
> +		} else {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/* Set arg0 = stored lock. */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
> +
> +			/* Clear the lock address and continue. */
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
> +		}
> +	} else if (strcmp(prp->desc->prb, "adaptive-acquire-error") == 0) {
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/* Store the lock address. */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
> +
> +			return 1;
> +		} else {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/* Set arg1 = underlying arg0. */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_1));
> +
> +			/* Set arg0 = stored lock. */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
> +
> +			/* Clear the lock address and continue. */
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
> +		}
> +	} else if (strcmp(prp->desc->prb, "adaptive-block") == 0) {
> +		/*
> +		 * - mutex_lock:entry inits lockstat_btime (0) and stores lock.
> +		 * - schedule_preempt_disabled:entry sets lockstat_bfrom
> +		 * - schedule_preempt_disabled:return increments lockstat_bfrom
> +		 * - mutex_lock:return sets the adaptive-block arguments
> +		 */
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
> +				/* Store the lock address. */
> +				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
> +
> +				/* Initialize lockstat_btime. */
> +				emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_btime), 0));
> +			} else {
> +				/* Store the start time. */
> +				emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> +				emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> +				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_bfrom), BPF_REG_0));
> +			}
> +
> +			return 1;
> +		} else {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			if (strcmp(uprp->desc->fun, "mutex_lock") != 0) {
> +				/* Increment the block time. */
> +				emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> +				emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> +				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_bfrom)));
> +				emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
> +				emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_btime), BPF_REG_0));
> +
> +				return 1;
> +			} else {
> +				/*
> +				 * If lockstat_btime = 0, bail.
> +				 * Otherwise arg1 = lockstat_btime.
> +				 */
> +				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_btime)));
> +				emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
> +				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_1));
> +
> +				/* Set arg0 = stored lock */
> +				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
> +				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
> +
> +				/* Clear the lock address and continue. */
> +				emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
> +			}
> +		}
> +	} else if (strcmp(prp->desc->prb, "adaptive-spin") == 0) {
> +		/*
> +		 * - mutex_lock:entry stores lock and inits lockstat_stime (0).
> +		 * - _raw_spin_lock:entry sets lockstat_stime
> +		 * - mutex_lock:return sets the adaptive-spin arguments
> +		 */
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
> +				/* Store the lock address. */
> +				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
> +
> +				/* Initialize lockstat_stime. */
> +				emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), 0));
> +			} else {
> +				/* Store the start time in lockstat_stime. */
> +				emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> +				emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> +				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
> +			}
> +
> +			return 1;
> +		} else {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/*
> +			 * If lockstat_stime is 0, bail.
> +			 * Otherwise, arg1 = time - lockstat_stime.
> +			 */
> +			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
> +			emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> +
> +			/* Set arg0 = stored lock */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
> +
> +			/* Clear the lock address and continue. */
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
> +		}
> +	} else if (strcmp(prp->desc->prb, "rw-acquire") == 0) {
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/* Store the lock address. */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
> +
> +			return 1;
> +		} else {
> +			uint_t	lbl_reset = dt_irlist_label(dlp);;
> +			int	kind = 1;	/* reader (default) */
> +
> +			if (strstr(uprp->desc->fun, "_write_") != NULL)
> +				kind = 0;	/* writer */
> +
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			if (strstr(uprp->desc->fun, "_trylock") != NULL) {
> +				/* The return value (arg1) must be 1. */
> +				emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(1)));
> +				emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, lbl_reset));
> +			}
> +
> +			/* Set arg0 = stored lock, arg1 = kind. */
> +			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
> +			emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
> +			emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), kind));
> +
> +			/* Clear the lock address and continue. */
> +			emitl(dlp, lbl_reset,
> +				   BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
> +		}
> +	} else if (strcmp(prp->desc->prb, "rw-spin") == 0) {
> +		/*
> +		 * - *_lock_slowpath:entry stores lock and sets lockstat_stime
> +		 * - *_lock_slowpath:return sets the rw-spin arguments
> +		 */

Does it make more sense here to refer to _lock_slowpath or to rw-spin?

> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/* Store the lock address. */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
> +
> +			/* Store the start time in lockstat_stime. */
> +			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
> +
> +			return 1;
> +		} else {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/*
> +			 * If lockstat_stime is 0, bail.
> +			 * Otherwise, arg1 = time - lockstat_stime.
> +			 */
> +			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
> +			emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> +
> +			/* Set arg0 = stored lock */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
> +
> +			/* Clear the lock address and continue. */
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
> +		}
> +	} else if (strcmp(prp->desc->prb, "spin-acquire") == 0) {
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/* Store the lock address. */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
> +
> +			return 1;
> +		} else {
> +			uint_t	lbl_reset = dt_irlist_label(dlp);;
> +
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			if (strstr(uprp->desc->fun, "_trylock") != NULL) {
> +				/* The return value (arg1) must be 1. */
> +				emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(1)));
> +				emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, lbl_reset));
> +			}
> +
> +			/* Set arg0 = stored lock. */
> +			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
> +			emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
> +
> +			/* Clear the lock address and continue. */
> +			emitl(dlp, lbl_reset,
> +				   BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
> +		}
> +	} else if (strcmp(prp->desc->prb, "spin-spin") == 0) {
> +		/*
> +		 * - *_lock_slowpath:entry stores lock and sets lockstat_stime
> +		 * - *_lock_slowpath:return sets the rw-spin arguments
> +		 */
> +		if (strcmp(uprp->desc->prb, "entry") == 0) {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/* Store the lock address. */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
> +
> +			/* Store the start time in lockstat_stime. */
> +			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
> +
> +			return 1;
> +		} else {
> +			/* Get the (pre-CPU) cpuinfo struct. */
> +			idp = dt_dlib_get_map(dtp, "cpuinfo");
> +			assert(idp != NULL);
> +			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
> +			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> +			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
> +
> +			/*
> +			 * If lockstat_stime is 0, bail.
> +			 * Otherwise, arg1 = time - lockstat_stime.
> +			 */
> +			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
> +			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
> +			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
> +			emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
> +
> +			/* Set arg0 = stored lock */
> +			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
> +			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
> +
> +			/* Clear the lock address and continue. */
> +			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
> +		      int *argcp, dt_argdesc_t **argvp)
> +{
> +	int		i;
> +	int		pidx = -1;
> +	int		argc = 0;
> +	dt_argdesc_t	*argv = NULL;
> +
> +	for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
> +		probe_arg_t	*arg = &probe_args[i];
> +
> +		if (strcmp(arg->name, prp->desc->prb) == 0) {
> +			if (pidx == -1) {
> +				pidx = i;
> +
> +				if (arg->argdesc.native == NULL)
> +					break;
> +			}
> +
> +			argc++;
> +		}
> +	}
> +
> +	if (argc == 0)
> +		goto done;
> +
> +	argv = dt_zalloc(dtp, argc * sizeof(dt_argdesc_t));
> +	if (!argv)
> +		return -ENOMEM;
> +
> +	for (i = pidx; i < pidx + argc; i++) {
> +		probe_arg_t	*arg = &probe_args[i];
> +
> +		argv[arg->argno] = arg->argdesc;
> +	}
> +
> +done:
> +	*argcp = argc;
> +	*argvp = argv;
> +
> +	return 0;
> +}
> +
> +dt_provimpl_t	dt_lockstat = {
> +	.name		= prvname,
> +	.prog_type	= BPF_PROG_TYPE_UNSPEC,
> +	.populate	= &populate,
> +	.enable		= &enable,
> +	.trampoline	= &trampoline,
> +	.probe_info	= &probe_info,
> +};
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index d45244fe..51a691cc 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -81,6 +81,7 @@ typedef struct dt_provimpl {
>   extern dt_provimpl_t dt_dtrace;
>   extern dt_provimpl_t dt_cpc;
>   extern dt_provimpl_t dt_fbt;
> +extern dt_provimpl_t dt_lockstat;
>   extern dt_provimpl_t dt_proc;
>   extern dt_provimpl_t dt_profile;
>   extern dt_provimpl_t dt_rawtp;



More information about the DTrace-devel mailing list