[DTrace-devel] [PATCH 6/7] Implement the lockstat provider

Kris Van Hees kris.van.hees at oracle.com
Tue May 9 22:32:53 UTC 2023


Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 libdtrace/Build              |   2 +
 libdtrace/dt_bpf_maps.h      |   8 +-
 libdtrace/dt_open.c          |   1 +
 libdtrace/dt_prov_lockstat.c | 725 +++++++++++++++++++++++++++++++++++
 libdtrace/dt_provider.h      |   1 +
 5 files changed, 735 insertions(+), 2 deletions(-)
 create mode 100644 libdtrace/dt_prov_lockstat.c

diff --git a/libdtrace/Build b/libdtrace/Build
index 299cb128..3ebdb5a5 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -49,6 +49,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
 			  dt_prov_cpc.c \
 			  dt_prov_dtrace.c \
 			  dt_prov_fbt.c \
+			  dt_prov_lockstat.c \
 			  dt_prov_proc.c \
 			  dt_prov_profile.c \
 			  dt_prov_rawtp.c \
@@ -94,6 +95,7 @@ dt_prov_cpc.c_CFLAGS := -Wno-pedantic
 dt_prov_dtrace.c_CFLAGS := -Wno-pedantic
 dt_prov_fbt.c_CFLAGS := -Wno-pedantic
 dt_prov_proc.c_CFLAGS := -Wno-pedantic
+dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
 dt_prov_profile.c_CFLAGS := -Wno-pedantic
 dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
 dt_prov_sched.c_CFLAGS := -Wno-pedantic
diff --git a/libdtrace/dt_bpf_maps.h b/libdtrace/dt_bpf_maps.h
index 68b34b82..0dd36b16 100644
--- a/libdtrace/dt_bpf_maps.h
+++ b/libdtrace/dt_bpf_maps.h
@@ -34,8 +34,12 @@ struct dt_bpf_specs {
 typedef struct dt_bpf_cpuinfo	dt_bpf_cpuinfo_t;
 struct dt_bpf_cpuinfo {
 	cpuinfo_t	ci;
-	uint64_t	buf_drops;
-	uint64_t	agg_drops;
+	uint64_t	buf_drops;	/* CPU principal drop counter */
+	uint64_t	agg_drops;	/* CPU aggregation drop counter */
+	uint64_t	lockstat_lock;	/* lockstat: lock being traced */
+	uint64_t	lockstat_bfrom;	/* lockstat: block time start */
+	uint64_t	lockstat_btime;	/* lockstat: block time */
+	uint64_t	lockstat_stime;	/* lockstat: spin time */
 };
 
 #ifdef  __cplusplus
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 04364807..d71cc4be 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -67,6 +67,7 @@ static const dt_provimpl_t *dt_providers[] = {
 	&dt_dtrace,		/* list dt_dtrace first */
 	&dt_cpc,
 	&dt_fbt,
+	&dt_lockstat,
 	&dt_proc,
 	&dt_profile,
 	&dt_rawtp,
diff --git a/libdtrace/dt_prov_lockstat.c b/libdtrace/dt_prov_lockstat.c
new file mode 100644
index 00000000..2cfb7915
--- /dev/null
+++ b/libdtrace/dt_prov_lockstat.c
@@ -0,0 +1,725 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ *
+ * The 'lockstat' SDT provider for DTrace specific probes.
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <bpf_asm.h>
+
+#include "dt_dctx.h"
+#include "dt_cg.h"
+#include "dt_bpf.h"
+#include "dt_provider.h"
+#include "dt_probe.h"
+#include "dt_pt_regs.h"
+
+static const char		prvname[] = "lockstat";
+static const char		modname[] = "vmlinux";
+
+/*
+ * The lockstat-provider probes make use of probes that are already provided by
+ * other providers.  As such, the lockstat probes are 'dependent probes'
+ * because they depend on underlying probes to get triggered and they also
+ * depend on argument data provided by the underlying probe to manufacture
+ * their own arguments.
+ *
+ * As a type of SDT probes, lockstat probes are defined with a signature (list
+ * of arguments - possibly empty) that may use translator support to provide
+ * the actual argument values.  Therefore, obtaining the value of arguments for
+ * a probe goes through two layers of processing:
+ *
+ *  (1) the arguments of the underlying probe are reworked to match the
+ *	expected layout of raw arguments for the lockstat probe
+ *  (2) an argument mapping table (and supporting translators) is used to get
+ *	the value of an arguument based on the raw variable data of the
+ *	lockstat probe
+ *
+ * To accomplish this, lockstat probes generate a trampoline that rewrites the
+ * arguments of the underlying probe.  (The dependent probe support code in the
+ * underlying probe saves the arguments of the underying probe in the mstate
+ * before executing the trampoline and clauses of the dependent probe, and it
+ * restores them afterwards in case there are multiple dependent probes.)
+ *
+ * Because lockstat probes dependent on an underlying probe that may be too
+ * generic, the trampoline code can include a pre-condition (much like a
+ * predicate) that can bypass execution unless the condition is met.
+ */
+
+/*
+ * Probe dependencies
+ *
+ * Lockstat probes are implemented based on probes made available by other
+ * providers.  THe probe dependency table associates each lockstat probe with
+ * one or more probe specifications (possibly containing wildcards).  Each
+ * matching probe will have the lockstat probe added as a dependent probe.
+ */
+typedef struct probe_dep {
+	const char		*name;			/* probe name */
+	dtrace_probespec_t	spec;			/* spec type */
+	const char		*str;			/* spec string */
+} probe_dep_t;
+
+static probe_dep_t	probes[] = {
+	{ "adaptive-acquire",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_lock" },
+	{ "adaptive-acquire-error",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_lock" },
+	{ "adaptive-block",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_lock" },
+	{ "adaptive-block",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::schedule_preempt_disabled" },
+	{ "adaptive-release",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_unlock" },
+	{ "adaptive-spin",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::mutex_lock" },
+	{ "adaptive-spin",
+	   DTRACE_PROBESPEC_NAME,	"fbt::_raw_spin_lock:entry" },
+	{ "rw-acquire",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_read_lock*" },
+	{ "rw-acquire",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_read_trylock*" },
+	{ "rw-acquire",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_write_lock*" },
+	{ "rw-acquire",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_write_trylock*" },
+	{ "rw-release",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_read_unlock*" },
+	{ "rw-release",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_write_unlock*" },
+	{ "rw-spin",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::queued_read_lock_slowpath" },
+	{ "rw-spin",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::queued_write_lock_slowpath" },
+	{ "spin-acquire",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_spin_lock*" },
+	{ "spin-acquire",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_spin_trylock*" },
+	{ "spin-release",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::_raw_spin_unlock*" },
+	{ "spin-spin",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::queued_spin_lock_*" },
+	{ "spin-spin",
+	   DTRACE_PROBESPEC_FUNC,	"fbt::native_queued_spin_lock_*" },
+};
+
+/*
+ * Probe signature specifications
+ *
+ * This table *must* group the arguments of probes.  I.e. the arguments of a
+ * given probe must be listed in consecutive records.
+ * A single probe entry that mentions only name of the probe indicates a probe
+ * that provides no arguments.
+ */
+typedef struct probe_arg {
+	const char	*name;			/* name of probe */
+	int		argno;			/* argument number */
+	dt_argdesc_t	argdesc;		/* argument description */
+} probe_arg_t;
+
+static probe_arg_t probe_args[] = {
+	{ "adaptive-acquire", 0, { 0, 0, "struct mutex *" } },
+	{ "adaptive-acquire-error", 0, { 0, 0, "struct mutex *" } },
+	{ "adaptive-acquire-error", 1, { 1, 0, "int" } },
+	{ "adaptive-block", 0, { 0, 0, "struct mutex *" } },
+	{ "adaptive-block", 1, { 1, 0, "uint64_t" } },
+	{ "adaptive-release", 0, { 0, 0, "struct mutex *" } },
+	{ "adaptive-spin", 0, { 0, 0, "struct mutex *" } },
+	{ "adaptive-spin", 1, { 1, 0, "uint64_t" } },
+	{ "rw-acquire", 0, { 0, 0, "struct rwlock *" } },
+	{ "rw-acquire", 1, { 1, 0, "int" } },
+	{ "rw-release", 0, { 0, 0, "struct rwlock *" } },
+	{ "rw-release", 1, { 1, 0, "int" } },
+	{ "rw-spin", 0, { 0, 0, "struct rwlock *" } },
+	{ "rw-spin", 1, { 1, 0, "uint64_t" } },
+	{ "rw-spin", 2, { 2, 0, "int" } },
+	{ "spin-acquire", 0, { 0, 0, "spinlock_t *" } },
+	{ "spin-release", 0, { 0, 0, "spinlock_t *" } },
+	{ "spin-spin", 0, { 0, 0, "spinlock_t *" } },
+	{ "spin-spin", 1, { 1, 0, "uint64_t" } },
+};
+
+static const dtrace_pattr_t	pattr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
+/*
+ * Provide all the "lockstat" SDT probes.
+ */
+static int populate(dtrace_hdl_t *dtp)
+{
+	dt_provider_t	*prv;
+	int		i;
+	int		n = 0;
+
+	prv = dt_provider_create(dtp, prvname, &dt_lockstat, &pattr);
+	if (prv == NULL)
+		return 0;
+
+	/*
+	 * Create "lockstat" probes based on the probe_args list.  Since each
+	 * probe will have at least one entry (with argno == 0), we can use
+	 * those entries to identify the probe names.
+	 */
+	for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
+		probe_arg_t	*arg = &probe_args[i];
+
+		if (arg->argno == 0 &&
+		    dt_probe_insert(dtp, prv, prvname, modname, "", arg->name,
+				    NULL))
+			n++;
+	}
+
+	return n;
+}
+
+static int add_dependency(dtrace_hdl_t *dtp, dt_probe_t *uprp, void *arg)
+{
+	dt_probe_t	*prp = arg;
+
+	dt_probe_add_dependent(dtp, uprp, prp);
+	dt_probe_enable(dtp, uprp);
+
+	return 0;
+}
+
+static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp)
+{
+	int	i;
+
+	for (i = 0; i < ARRAY_SIZE(probes); i++) {
+		probe_dep_t		*dep = &probes[i];
+		dtrace_probedesc_t	pd;
+
+		if (strcmp(prp->desc->prb, dep->name) != 0)
+			continue;
+
+		if (dtrace_str2desc(dtp, dep->spec, dep->str, &pd) == -1)
+			return;
+
+		dt_probe_iter(dtp, &pd, add_dependency, NULL, prp);
+
+		free((void *)pd.prv);
+		free((void *)pd.mod);
+		free((void *)pd.fun);
+		free((void *)pd.prb);
+	}
+
+	/*
+	 * Finally, ensure we're in the list of enablings as well.
+	 * (This ensures that, among other things, the probes map
+	 * gains entries for us.)
+	 */
+	if (!dt_in_list(&dtp->dt_enablings, prp))
+		dt_list_append(&dtp->dt_enablings, prp);
+}
+
+/*
+ * Generate a BPF trampoline for a SDT probe.
+ *
+ * The trampoline function is called when a SDT probe triggers, and it must
+ * satisfy the following prototype:
+ *
+ *	int dt_lockstat(void *data)
+ *
+ * The trampoline will populate a dt_dctx_t struct and then call the function
+ * that implements the compiled D clause.  It returns the value that it gets
+ * back from that function.
+ */
+#define IS_SPIN      (1 << 0)
+#define IS_READ      (1 << 1)
+#define IS_WRITE     (1 << 2)
+#define IS_MUTEX     (1 << 5)
+
+static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
+{
+	dtrace_hdl_t	*dtp = pcb->pcb_hdl;
+	dt_irlist_t	*dlp = &pcb->pcb_ir;
+	dt_probe_t	*prp = pcb->pcb_probe;
+	dt_probe_t	*uprp = pcb->pcb_parent_probe;
+	dt_ident_t	*idp;
+
+	assert(uprp != NULL);
+
+	if (strcmp(prp->desc->prb, "adaptive-acquire") == 0 ||
+	    strcmp(prp->desc->prb, "adaptive-release") == 0) {
+		if (strcmp(uprp->desc->prb, "entry") == 0) {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/* Store the lock address. */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+			return 1;
+		} else {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/* Set arg0 = stored lock. */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+			/* Clear the lock address and continue. */
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+		}
+	} else if (strcmp(prp->desc->prb, "adaptive-acquire-error") == 0) {
+		if (strcmp(uprp->desc->prb, "entry") == 0) {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/* Store the lock address. */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+			return 1;
+		} else {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/* Set arg1 = underlying arg0. */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_1));
+
+			/* Set arg0 = stored lock. */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+			/* Clear the lock address and continue. */
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+		}
+	} else if (strcmp(prp->desc->prb, "adaptive-block") == 0) {
+		/*
+		 * - mutex_lock:entry inits lockstat_btime (0) and stores lock.
+		 * - schedule_preempt_disabled:entry sets lockstat_bfrom
+		 * - schedule_preempt_disabled:return increments lockstat_bfrom
+		 * - mutex_lock:return sets the adaptive-block arguments
+		 */
+		if (strcmp(uprp->desc->prb, "entry") == 0) {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
+				/* Store the lock address. */
+				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+				/* Initialize lockstat_btime. */
+				emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_btime), 0));
+			} else {
+				/* Store the start time. */
+				emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+				emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_bfrom), BPF_REG_0));
+			}
+
+			return 1;
+		} else {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			if (strcmp(uprp->desc->fun, "mutex_lock") != 0) {
+				/* Increment the block time. */
+				emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+				emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_bfrom)));
+				emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
+				emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_btime), BPF_REG_0));
+
+				return 1;
+			} else {
+				/*
+				 * If lockstat_btime = 0, bail.
+				 * Otherwise arg1 = lockstat_btime.
+				 */
+				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_btime)));
+				emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
+				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_1));
+
+				/* Set arg0 = stored lock */
+				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+				/* Clear the lock address and continue. */
+				emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+			}
+		}
+	} else if (strcmp(prp->desc->prb, "adaptive-spin") == 0) {
+		/*
+		 * - mutex_lock:entry stores lock and inits lockstat_stime (0).
+		 * - _raw_spin_lock:entry sets lockstat_stime
+		 * - mutex_lock:return sets the adaptive-spin arguments
+		 */
+		if (strcmp(uprp->desc->prb, "entry") == 0) {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
+				/* Store the lock address. */
+				emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+				/* Initialize lockstat_stime. */
+				emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), 0));
+			} else {
+				/* Store the start time in lockstat_stime. */
+				emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+				emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+				emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
+			}
+
+			return 1;
+		} else {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/*
+			 * If lockstat_stime is 0, bail.
+			 * Otherwise, arg1 = time - lockstat_stime.
+			 */
+			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
+			emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
+
+			/* Set arg0 = stored lock */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+			/* Clear the lock address and continue. */
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+		}
+	} else if (strcmp(prp->desc->prb, "rw-acquire") == 0) {
+		if (strcmp(uprp->desc->prb, "entry") == 0) {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/* Store the lock address. */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+			return 1;
+		} else {
+			uint_t	lbl_reset = dt_irlist_label(dlp);;
+			int	kind = 1;	/* reader (default) */
+
+			if (strstr(uprp->desc->fun, "_write_") != NULL)
+				kind = 0;	/* writer */
+
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			if (strstr(uprp->desc->fun, "_trylock") != NULL) {
+				/* The return value (arg1) must be 1. */
+				emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(1)));
+				emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, lbl_reset));
+			}
+
+			/* Set arg0 = stored lock, arg1 = kind. */
+			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+			emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+			emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), kind));
+
+			/* Clear the lock address and continue. */
+			emitl(dlp, lbl_reset,
+				   BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+		}
+	} else if (strcmp(prp->desc->prb, "rw-spin") == 0) {
+		/*
+		 * - *_lock_slowpath:entry stores lock and sets lockstat_stime
+		 * - *_lock_slowpath:return sets the rw-spin arguments
+		 */
+		if (strcmp(uprp->desc->prb, "entry") == 0) {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/* Store the lock address. */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+			/* Store the start time in lockstat_stime. */
+			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
+
+			return 1;
+		} else {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/*
+			 * If lockstat_stime is 0, bail.
+			 * Otherwise, arg1 = time - lockstat_stime.
+			 */
+			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
+			emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
+
+			/* Set arg0 = stored lock */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+			/* Clear the lock address and continue. */
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+		}
+	} else if (strcmp(prp->desc->prb, "spin-acquire") == 0) {
+		if (strcmp(uprp->desc->prb, "entry") == 0) {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/* Store the lock address. */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+			return 1;
+		} else {
+			uint_t	lbl_reset = dt_irlist_label(dlp);;
+
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp,  BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp,  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp,  BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			if (strstr(uprp->desc->fun, "_trylock") != NULL) {
+				/* The return value (arg1) must be 1. */
+				emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(1)));
+				emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, lbl_reset));
+			}
+
+			/* Set arg0 = stored lock. */
+			emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+			emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+			/* Clear the lock address and continue. */
+			emitl(dlp, lbl_reset,
+				   BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+		}
+	} else if (strcmp(prp->desc->prb, "spin-spin") == 0) {
+		/*
+		 * - *_lock_slowpath:entry stores lock and sets lockstat_stime
+		 * - *_lock_slowpath:return sets the rw-spin arguments
+		 */
+		if (strcmp(uprp->desc->prb, "entry") == 0) {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/* Store the lock address. */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+			/* Store the start time in lockstat_stime. */
+			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
+
+			return 1;
+		} else {
+			/* Get the (pre-CPU) cpuinfo struct. */
+			idp = dt_dlib_get_map(dtp, "cpuinfo");
+			assert(idp != NULL);
+			dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+			emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+			emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+			/*
+			 * If lockstat_stime is 0, bail.
+			 * Otherwise, arg1 = time - lockstat_stime.
+			 */
+			emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+			emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
+			emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
+			emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
+
+			/* Set arg0 = stored lock */
+			emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+			emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+			/* Clear the lock address and continue. */
+			emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+		}
+	}
+
+	return 0;
+}
+
+static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
+		      int *argcp, dt_argdesc_t **argvp)
+{
+	int		i;
+	int		pidx = -1;
+	int		argc = 0;
+	dt_argdesc_t	*argv = NULL;
+
+	for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
+		probe_arg_t	*arg = &probe_args[i];
+
+		if (strcmp(arg->name, prp->desc->prb) == 0) {
+			if (pidx == -1) {
+				pidx = i;
+
+				if (arg->argdesc.native == NULL)
+					break;
+			}
+
+			argc++;
+		}
+	}
+
+	if (argc == 0)
+		goto done;
+
+	argv = dt_zalloc(dtp, argc * sizeof(dt_argdesc_t));
+	if (!argv)
+		return -ENOMEM;
+
+	for (i = pidx; i < pidx + argc; i++) {
+		probe_arg_t	*arg = &probe_args[i];
+
+		argv[arg->argno] = arg->argdesc;
+	}
+
+done:
+	*argcp = argc;
+	*argvp = argv;
+
+	return 0;
+}
+
+dt_provimpl_t	dt_lockstat = {
+	.name		= prvname,
+	.prog_type	= BPF_PROG_TYPE_UNSPEC,
+	.populate	= &populate,
+	.enable		= &enable,
+	.trampoline	= &trampoline,
+	.probe_info	= &probe_info,
+};
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index d45244fe..51a691cc 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -81,6 +81,7 @@ typedef struct dt_provimpl {
 extern dt_provimpl_t dt_dtrace;
 extern dt_provimpl_t dt_cpc;
 extern dt_provimpl_t dt_fbt;
+extern dt_provimpl_t dt_lockstat;
 extern dt_provimpl_t dt_proc;
 extern dt_provimpl_t dt_profile;
 extern dt_provimpl_t dt_rawtp;
-- 
2.40.1




More information about the DTrace-devel mailing list