[DTrace-devel] [PATCH 6/7] Implement the lockstat provider
Kris Van Hees
kris.van.hees at oracle.com
Tue May 9 22:32:53 UTC 2023
Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
libdtrace/Build | 2 +
libdtrace/dt_bpf_maps.h | 8 +-
libdtrace/dt_open.c | 1 +
libdtrace/dt_prov_lockstat.c | 725 +++++++++++++++++++++++++++++++++++
libdtrace/dt_provider.h | 1 +
5 files changed, 735 insertions(+), 2 deletions(-)
create mode 100644 libdtrace/dt_prov_lockstat.c
diff --git a/libdtrace/Build b/libdtrace/Build
index 299cb128..3ebdb5a5 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -49,6 +49,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
dt_prov_cpc.c \
dt_prov_dtrace.c \
dt_prov_fbt.c \
+ dt_prov_lockstat.c \
dt_prov_proc.c \
dt_prov_profile.c \
dt_prov_rawtp.c \
@@ -94,6 +95,7 @@ dt_prov_cpc.c_CFLAGS := -Wno-pedantic
dt_prov_dtrace.c_CFLAGS := -Wno-pedantic
dt_prov_fbt.c_CFLAGS := -Wno-pedantic
dt_prov_proc.c_CFLAGS := -Wno-pedantic
+dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
dt_prov_profile.c_CFLAGS := -Wno-pedantic
dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
dt_prov_sched.c_CFLAGS := -Wno-pedantic
diff --git a/libdtrace/dt_bpf_maps.h b/libdtrace/dt_bpf_maps.h
index 68b34b82..0dd36b16 100644
--- a/libdtrace/dt_bpf_maps.h
+++ b/libdtrace/dt_bpf_maps.h
@@ -34,8 +34,12 @@ struct dt_bpf_specs {
typedef struct dt_bpf_cpuinfo dt_bpf_cpuinfo_t;
struct dt_bpf_cpuinfo {
cpuinfo_t ci;
- uint64_t buf_drops;
- uint64_t agg_drops;
+ uint64_t buf_drops; /* CPU principal drop counter */
+ uint64_t agg_drops; /* CPU aggregation drop counter */
+ uint64_t lockstat_lock; /* lockstat: lock being traced */
+ uint64_t lockstat_bfrom; /* lockstat: block time start */
+ uint64_t lockstat_btime; /* lockstat: block time */
+ uint64_t lockstat_stime; /* lockstat: spin time */
};
#ifdef __cplusplus
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 04364807..d71cc4be 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -67,6 +67,7 @@ static const dt_provimpl_t *dt_providers[] = {
&dt_dtrace, /* list dt_dtrace first */
&dt_cpc,
&dt_fbt,
+ &dt_lockstat,
&dt_proc,
&dt_profile,
&dt_rawtp,
diff --git a/libdtrace/dt_prov_lockstat.c b/libdtrace/dt_prov_lockstat.c
new file mode 100644
index 00000000..2cfb7915
--- /dev/null
+++ b/libdtrace/dt_prov_lockstat.c
@@ -0,0 +1,725 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ *
+ * The 'lockstat' SDT provider for DTrace specific probes.
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <bpf_asm.h>
+
+#include "dt_dctx.h"
+#include "dt_cg.h"
+#include "dt_bpf.h"
+#include "dt_provider.h"
+#include "dt_probe.h"
+#include "dt_pt_regs.h"
+
+static const char prvname[] = "lockstat";
+static const char modname[] = "vmlinux";
+
+/*
+ * The lockstat-provider probes make use of probes that are already provided by
+ * other providers. As such, the lockstat probes are 'dependent probes'
+ * because they depend on underlying probes to get triggered and they also
+ * depend on argument data provided by the underlying probe to manufacture
+ * their own arguments.
+ *
+ * As a type of SDT probes, lockstat probes are defined with a signature (list
+ * of arguments - possibly empty) that may use translator support to provide
+ * the actual argument values. Therefore, obtaining the value of arguments for
+ * a probe goes through two layers of processing:
+ *
+ * (1) the arguments of the underlying probe are reworked to match the
+ * expected layout of raw arguments for the lockstat probe
+ * (2) an argument mapping table (and supporting translators) is used to get
+ * the value of an arguument based on the raw variable data of the
+ * lockstat probe
+ *
+ * To accomplish this, lockstat probes generate a trampoline that rewrites the
+ * arguments of the underlying probe. (The dependent probe support code in the
+ * underlying probe saves the arguments of the underying probe in the mstate
+ * before executing the trampoline and clauses of the dependent probe, and it
+ * restores them afterwards in case there are multiple dependent probes.)
+ *
+ * Because lockstat probes dependent on an underlying probe that may be too
+ * generic, the trampoline code can include a pre-condition (much like a
+ * predicate) that can bypass execution unless the condition is met.
+ */
+
+/*
+ * Probe dependencies
+ *
+ * Lockstat probes are implemented based on probes made available by other
+ * providers. THe probe dependency table associates each lockstat probe with
+ * one or more probe specifications (possibly containing wildcards). Each
+ * matching probe will have the lockstat probe added as a dependent probe.
+ */
+typedef struct probe_dep {
+ const char *name; /* probe name */
+ dtrace_probespec_t spec; /* spec type */
+ const char *str; /* spec string */
+} probe_dep_t;
+
+static probe_dep_t probes[] = {
+ { "adaptive-acquire",
+ DTRACE_PROBESPEC_FUNC, "fbt::mutex_lock" },
+ { "adaptive-acquire-error",
+ DTRACE_PROBESPEC_FUNC, "fbt::mutex_lock" },
+ { "adaptive-block",
+ DTRACE_PROBESPEC_FUNC, "fbt::mutex_lock" },
+ { "adaptive-block",
+ DTRACE_PROBESPEC_FUNC, "fbt::schedule_preempt_disabled" },
+ { "adaptive-release",
+ DTRACE_PROBESPEC_FUNC, "fbt::mutex_unlock" },
+ { "adaptive-spin",
+ DTRACE_PROBESPEC_FUNC, "fbt::mutex_lock" },
+ { "adaptive-spin",
+ DTRACE_PROBESPEC_NAME, "fbt::_raw_spin_lock:entry" },
+ { "rw-acquire",
+ DTRACE_PROBESPEC_FUNC, "fbt::_raw_read_lock*" },
+ { "rw-acquire",
+ DTRACE_PROBESPEC_FUNC, "fbt::_raw_read_trylock*" },
+ { "rw-acquire",
+ DTRACE_PROBESPEC_FUNC, "fbt::_raw_write_lock*" },
+ { "rw-acquire",
+ DTRACE_PROBESPEC_FUNC, "fbt::_raw_write_trylock*" },
+ { "rw-release",
+ DTRACE_PROBESPEC_FUNC, "fbt::_raw_read_unlock*" },
+ { "rw-release",
+ DTRACE_PROBESPEC_FUNC, "fbt::_raw_write_unlock*" },
+ { "rw-spin",
+ DTRACE_PROBESPEC_FUNC, "fbt::queued_read_lock_slowpath" },
+ { "rw-spin",
+ DTRACE_PROBESPEC_FUNC, "fbt::queued_write_lock_slowpath" },
+ { "spin-acquire",
+ DTRACE_PROBESPEC_FUNC, "fbt::_raw_spin_lock*" },
+ { "spin-acquire",
+ DTRACE_PROBESPEC_FUNC, "fbt::_raw_spin_trylock*" },
+ { "spin-release",
+ DTRACE_PROBESPEC_FUNC, "fbt::_raw_spin_unlock*" },
+ { "spin-spin",
+ DTRACE_PROBESPEC_FUNC, "fbt::queued_spin_lock_*" },
+ { "spin-spin",
+ DTRACE_PROBESPEC_FUNC, "fbt::native_queued_spin_lock_*" },
+};
+
+/*
+ * Probe signature specifications
+ *
+ * This table *must* group the arguments of probes. I.e. the arguments of a
+ * given probe must be listed in consecutive records.
+ * A single probe entry that mentions only name of the probe indicates a probe
+ * that provides no arguments.
+ */
+typedef struct probe_arg {
+ const char *name; /* name of probe */
+ int argno; /* argument number */
+ dt_argdesc_t argdesc; /* argument description */
+} probe_arg_t;
+
+static probe_arg_t probe_args[] = {
+ { "adaptive-acquire", 0, { 0, 0, "struct mutex *" } },
+ { "adaptive-acquire-error", 0, { 0, 0, "struct mutex *" } },
+ { "adaptive-acquire-error", 1, { 1, 0, "int" } },
+ { "adaptive-block", 0, { 0, 0, "struct mutex *" } },
+ { "adaptive-block", 1, { 1, 0, "uint64_t" } },
+ { "adaptive-release", 0, { 0, 0, "struct mutex *" } },
+ { "adaptive-spin", 0, { 0, 0, "struct mutex *" } },
+ { "adaptive-spin", 1, { 1, 0, "uint64_t" } },
+ { "rw-acquire", 0, { 0, 0, "struct rwlock *" } },
+ { "rw-acquire", 1, { 1, 0, "int" } },
+ { "rw-release", 0, { 0, 0, "struct rwlock *" } },
+ { "rw-release", 1, { 1, 0, "int" } },
+ { "rw-spin", 0, { 0, 0, "struct rwlock *" } },
+ { "rw-spin", 1, { 1, 0, "uint64_t" } },
+ { "rw-spin", 2, { 2, 0, "int" } },
+ { "spin-acquire", 0, { 0, 0, "spinlock_t *" } },
+ { "spin-release", 0, { 0, 0, "spinlock_t *" } },
+ { "spin-spin", 0, { 0, 0, "spinlock_t *" } },
+ { "spin-spin", 1, { 1, 0, "uint64_t" } },
+};
+
+static const dtrace_pattr_t pattr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
+/*
+ * Provide all the "lockstat" SDT probes.
+ */
+static int populate(dtrace_hdl_t *dtp)
+{
+ dt_provider_t *prv;
+ int i;
+ int n = 0;
+
+ prv = dt_provider_create(dtp, prvname, &dt_lockstat, &pattr);
+ if (prv == NULL)
+ return 0;
+
+ /*
+ * Create "lockstat" probes based on the probe_args list. Since each
+ * probe will have at least one entry (with argno == 0), we can use
+ * those entries to identify the probe names.
+ */
+ for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
+ probe_arg_t *arg = &probe_args[i];
+
+ if (arg->argno == 0 &&
+ dt_probe_insert(dtp, prv, prvname, modname, "", arg->name,
+ NULL))
+ n++;
+ }
+
+ return n;
+}
+
+static int add_dependency(dtrace_hdl_t *dtp, dt_probe_t *uprp, void *arg)
+{
+ dt_probe_t *prp = arg;
+
+ dt_probe_add_dependent(dtp, uprp, prp);
+ dt_probe_enable(dtp, uprp);
+
+ return 0;
+}
+
+static void enable(dtrace_hdl_t *dtp, dt_probe_t *prp)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(probes); i++) {
+ probe_dep_t *dep = &probes[i];
+ dtrace_probedesc_t pd;
+
+ if (strcmp(prp->desc->prb, dep->name) != 0)
+ continue;
+
+ if (dtrace_str2desc(dtp, dep->spec, dep->str, &pd) == -1)
+ return;
+
+ dt_probe_iter(dtp, &pd, add_dependency, NULL, prp);
+
+ free((void *)pd.prv);
+ free((void *)pd.mod);
+ free((void *)pd.fun);
+ free((void *)pd.prb);
+ }
+
+ /*
+ * Finally, ensure we're in the list of enablings as well.
+ * (This ensures that, among other things, the probes map
+ * gains entries for us.)
+ */
+ if (!dt_in_list(&dtp->dt_enablings, prp))
+ dt_list_append(&dtp->dt_enablings, prp);
+}
+
+/*
+ * Generate a BPF trampoline for a SDT probe.
+ *
+ * The trampoline function is called when a SDT probe triggers, and it must
+ * satisfy the following prototype:
+ *
+ * int dt_lockstat(void *data)
+ *
+ * The trampoline will populate a dt_dctx_t struct and then call the function
+ * that implements the compiled D clause. It returns the value that it gets
+ * back from that function.
+ */
+#define IS_SPIN (1 << 0)
+#define IS_READ (1 << 1)
+#define IS_WRITE (1 << 2)
+#define IS_MUTEX (1 << 5)
+
+static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
+{
+ dtrace_hdl_t *dtp = pcb->pcb_hdl;
+ dt_irlist_t *dlp = &pcb->pcb_ir;
+ dt_probe_t *prp = pcb->pcb_probe;
+ dt_probe_t *uprp = pcb->pcb_parent_probe;
+ dt_ident_t *idp;
+
+ assert(uprp != NULL);
+
+ if (strcmp(prp->desc->prb, "adaptive-acquire") == 0 ||
+ strcmp(prp->desc->prb, "adaptive-release") == 0) {
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /* Store the lock address. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+ return 1;
+ } else {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /* Set arg0 = stored lock. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+ /* Clear the lock address and continue. */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+ }
+ } else if (strcmp(prp->desc->prb, "adaptive-acquire-error") == 0) {
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /* Store the lock address. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+ return 1;
+ } else {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /* Set arg1 = underlying arg0. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_1));
+
+ /* Set arg0 = stored lock. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+ /* Clear the lock address and continue. */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+ }
+ } else if (strcmp(prp->desc->prb, "adaptive-block") == 0) {
+ /*
+ * - mutex_lock:entry inits lockstat_btime (0) and stores lock.
+ * - schedule_preempt_disabled:entry sets lockstat_bfrom
+ * - schedule_preempt_disabled:return increments lockstat_bfrom
+ * - mutex_lock:return sets the adaptive-block arguments
+ */
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
+ /* Store the lock address. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+ /* Initialize lockstat_btime. */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_btime), 0));
+ } else {
+ /* Store the start time. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_bfrom), BPF_REG_0));
+ }
+
+ return 1;
+ } else {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ if (strcmp(uprp->desc->fun, "mutex_lock") != 0) {
+ /* Increment the block time. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_bfrom)));
+ emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
+ emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_btime), BPF_REG_0));
+
+ return 1;
+ } else {
+ /*
+ * If lockstat_btime = 0, bail.
+ * Otherwise arg1 = lockstat_btime.
+ */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_btime)));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_1));
+
+ /* Set arg0 = stored lock */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+ /* Clear the lock address and continue. */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+ }
+ }
+ } else if (strcmp(prp->desc->prb, "adaptive-spin") == 0) {
+ /*
+ * - mutex_lock:entry stores lock and inits lockstat_stime (0).
+ * - _raw_spin_lock:entry sets lockstat_stime
+ * - mutex_lock:return sets the adaptive-spin arguments
+ */
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
+ /* Store the lock address. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+ /* Initialize lockstat_stime. */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), 0));
+ } else {
+ /* Store the start time in lockstat_stime. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
+ }
+
+ return 1;
+ } else {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /*
+ * If lockstat_stime is 0, bail.
+ * Otherwise, arg1 = time - lockstat_stime.
+ */
+ emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
+ emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
+
+ /* Set arg0 = stored lock */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+ /* Clear the lock address and continue. */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+ }
+ } else if (strcmp(prp->desc->prb, "rw-acquire") == 0) {
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /* Store the lock address. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+ return 1;
+ } else {
+ uint_t lbl_reset = dt_irlist_label(dlp);;
+ int kind = 1; /* reader (default) */
+
+ if (strstr(uprp->desc->fun, "_write_") != NULL)
+ kind = 0; /* writer */
+
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ if (strstr(uprp->desc->fun, "_trylock") != NULL) {
+ /* The return value (arg1) must be 1. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(1)));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, lbl_reset));
+ }
+
+ /* Set arg0 = stored lock, arg1 = kind. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), kind));
+
+ /* Clear the lock address and continue. */
+ emitl(dlp, lbl_reset,
+ BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+ }
+ } else if (strcmp(prp->desc->prb, "rw-spin") == 0) {
+ /*
+ * - *_lock_slowpath:entry stores lock and sets lockstat_stime
+ * - *_lock_slowpath:return sets the rw-spin arguments
+ */
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /* Store the lock address. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+ /* Store the start time in lockstat_stime. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
+
+ return 1;
+ } else {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /*
+ * If lockstat_stime is 0, bail.
+ * Otherwise, arg1 = time - lockstat_stime.
+ */
+ emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
+ emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
+
+ /* Set arg0 = stored lock */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+ /* Clear the lock address and continue. */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+ }
+ } else if (strcmp(prp->desc->prb, "spin-acquire") == 0) {
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /* Store the lock address. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+ return 1;
+ } else {
+ uint_t lbl_reset = dt_irlist_label(dlp);;
+
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ if (strstr(uprp->desc->fun, "_trylock") != NULL) {
+ /* The return value (arg1) must be 1. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(1)));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, lbl_reset));
+ }
+
+ /* Set arg0 = stored lock. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+ /* Clear the lock address and continue. */
+ emitl(dlp, lbl_reset,
+ BPF_STORE_IMM(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+ }
+ } else if (strcmp(prp->desc->prb, "spin-spin") == 0) {
+ /*
+ * - *_lock_slowpath:entry stores lock and sets lockstat_stime
+ * - *_lock_slowpath:return sets the rw-spin arguments
+ */
+ if (strcmp(uprp->desc->prb, "entry") == 0) {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /* Store the lock address. */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(0)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_0, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), BPF_REG_1));
+
+ /* Store the start time in lockstat_stime. */
+ emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime), BPF_REG_0));
+
+ return 1;
+ } else {
+ /* Get the (pre-CPU) cpuinfo struct. */
+ idp = dt_dlib_get_map(dtp, "cpuinfo");
+ assert(idp != NULL);
+ dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
+ emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
+ emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
+
+ /*
+ * If lockstat_stime is 0, bail.
+ * Otherwise, arg1 = time - lockstat_stime.
+ */
+ emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
+ emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_stime)));
+ emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
+ emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(1), BPF_REG_0));
+
+ /* Set arg0 = stored lock */
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock)));
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_1));
+
+ /* Clear the lock address and continue. */
+ emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_6, offsetof(dt_bpf_cpuinfo_t, lockstat_lock), 0));
+ }
+ }
+
+ return 0;
+}
+
+static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
+ int *argcp, dt_argdesc_t **argvp)
+{
+ int i;
+ int pidx = -1;
+ int argc = 0;
+ dt_argdesc_t *argv = NULL;
+
+ for (i = 0; i < ARRAY_SIZE(probe_args); i++) {
+ probe_arg_t *arg = &probe_args[i];
+
+ if (strcmp(arg->name, prp->desc->prb) == 0) {
+ if (pidx == -1) {
+ pidx = i;
+
+ if (arg->argdesc.native == NULL)
+ break;
+ }
+
+ argc++;
+ }
+ }
+
+ if (argc == 0)
+ goto done;
+
+ argv = dt_zalloc(dtp, argc * sizeof(dt_argdesc_t));
+ if (!argv)
+ return -ENOMEM;
+
+ for (i = pidx; i < pidx + argc; i++) {
+ probe_arg_t *arg = &probe_args[i];
+
+ argv[arg->argno] = arg->argdesc;
+ }
+
+done:
+ *argcp = argc;
+ *argvp = argv;
+
+ return 0;
+}
+
+dt_provimpl_t dt_lockstat = {
+ .name = prvname,
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ .populate = &populate,
+ .enable = &enable,
+ .trampoline = &trampoline,
+ .probe_info = &probe_info,
+};
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index d45244fe..51a691cc 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -81,6 +81,7 @@ typedef struct dt_provimpl {
extern dt_provimpl_t dt_dtrace;
extern dt_provimpl_t dt_cpc;
extern dt_provimpl_t dt_fbt;
+extern dt_provimpl_t dt_lockstat;
extern dt_provimpl_t dt_proc;
extern dt_provimpl_t dt_profile;
extern dt_provimpl_t dt_rawtp;
--
2.40.1
More information about the DTrace-devel
mailing list