[DTrace-devel] [PATCH 6/7] Implement the lockstat provider

Eugene Loh eugene.loh at oracle.com
Sat May 13 19:32:58 UTC 2023


The trampoline() function is over 400 lines long.  I think it can be 
much shorter and much more readable.  The issue right now is that there 
are a number of intricate instruction sequences to generate.  Since they 
are opaque, there are comments to explain them.  Perhaps all that code 
was "easily generated" via cut-and-paste, but the human cost of reading 
and maintaining the code is rather high.

Importantly, however, these sequences repeat.  A lot.  So there is 
tremendous potential for consolidation.

A prime candidate is the instruction sequence
         /* Get the (pre-CPU) cpuinfo struct. */
         idp = dt_dlib_get_map(dtp, "cpuinfo");
         assert(idp != NULL);
         dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id);
         emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
         emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE));
         emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0));
         emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem));
         emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl));
It is not trivial and yet appears verbatim 16 times. Consolidation can 
save over 150 lines.  There are all sorts of ways this replication can 
be avoided.  For example, codegen for each probe has an entry branch and 
a return branch, and this cpuinfo computation is replicated for each;  
just executing it once for the entire "if (entry) else" pair already 
saves a factor-of-two.  But each probe code itself replicates this code. 
So, just generate this sequence of instructions once, at the top of 
trampoline(), and then rip out the 16 replicas.

Regarding its comment, what even does "pre-CPU" mean?  How about 
replacing the comment with:
         /* %r0 = bpf_map_lookup_elem("cpuinfo", 0); */

Anyhow, there are other code sequences, some of which are repeated only 
2-3 times but others 8x.  Plus, some are minor variations of each 
other.  Even when a sequence is only 2 instructions long, it's tedious 
and error-prone to check the same sequence 8 or more times.

How about defining some macros that:
*)  do the tedious code generation
*)  are so clearly named that comments are not necessary
*)  can be reused many times.

This separates verification of codegen sequences from verification of 
the logic.

Here is quick attempt to show what I mean:

/* %r0 = bpf_map_lookup_elem("cpuinfo", 0); */
#define INIT_CPUINFO \
     idp = dt_dlib_get_map(dtp, "cpuinfo"); \
     assert(idp != NULL); \
     dt_cg_xsetx(dlp, idp, DT_LBL_NONE, BPF_REG_1, idp->di_id); \
     emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP)); \
     emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_BASE)); \
     emit(dlp, BPF_STORE_IMM(BPF_DW, BPF_REG_2, 0, 0)); \
     emit(dlp, BPF_CALL_HELPER(BPF_FUNC_map_lookup_elem)); \
     emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_0, 0, exitlbl)); \
     reg_cpuinfo = BPF_REG_0;

#define CPUINFO_EQUAL_ARG(member, argno) \
     emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, DMST_ARG(argno)));
     emit(dlp, BPF_STORE(BPF_DW, reg_cpuinfo, offsetof(dt_bpf_cpuinfo_t, 
member), BPF_REG_1));

#define CPUINFO_EQUAL_CONSTANT(member, constant) \
     emit(dlp, BPF_STORE_IMM(BPF_DW, reg_cpuinfo, 
offsetof(dt_bpf_cpuinfo_t, member), constant));

#define CPUINFO_EQUAL_TIME(member) \
     emit(dlp, BPF_MOV_REG(BPF_REG_6, reg_cpuinfo)); \
     reg_cpuinfo = BPF_REG_6; \
     emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns)); \
     emit(dlp, BPF_STORE(BPF_DW, reg_cpuinfo, offsetof(dt_bpf_cpuinfo_t, 
member), BPF_REG_0));

#define ARG_EQUAL_CPUINFO(argno, member) \
     emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, reg_cpuinfo, 
offsetof(dt_cpf_cpuinfo_t, member))); \
     emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(argno), BPF_REG_1));

#define ARG_EQUAL_ARG(argno1, argno2) \
     emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_DW, BPF_REG_7, 
DMST_ARG(argno2)));
     emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(argno1), BPF_REG_1));

#define ARG_EQUAL_CONSTANT(argno, constant) \
     emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(argno), 
constant));

#define ARG_EQUAL_TIME_SINCE(argno, member) \
     emit(dlp, BPF_MOV_REG(BPF_REG_6, reg_cpuinfo)); \
     reg_cpuinfo = BPF_REG_6; \
     emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
     emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, reg_cpuinfo, 
offsetof(dt_bpf_cpuinfo_t, member)));
     emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));
     emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
     emit(dlp, BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(argno), BPF_REG_0));

#define IF_CPUINFO_IS_ZERO_EXIT(member) \
     emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, reg_cpuinfo, 
offsetof(dt_bpf_cpuinfo_t, member))); \
     emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, BPF_REG_1, 0, exitlbl));

static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
{
     dtrace_hdl_t    *dtp = pcb->pcb_hdl;
     dt_irlist_t    *dlp = &pcb->pcb_ir;
     dt_probe_t    *prp = pcb->pcb_probe;
     dt_probe_t    *uprp = pcb->pcb_parent_probe;
     dt_ident_t    *idp;
         int reg_cpuinfo = -1;

     assert(uprp != NULL);

     INIT_CPUINFO

     if (strcmp(prp->desc->prb, "adaptive-acquire") == 0 ||
         strcmp(prp->desc->prb, "adaptive-release") == 0) {
         if (strcmp(uprp->desc->prb, "entry") == 0) {
             CPUINFO_EQUAL_ARG(lockstat_lock, 0)
             return 1;
         } else {
             ARG_EQUAL_CPUINFO(0, lockstat_lock)
             CPUINFO_EQUAL_CONSTANT(lockstat_lock, 0)
             return 0;
         }
     } else if (strcmp(prp->desc->prb, "adaptive-acquire-error") == 0) {
         if (strcmp(uprp->desc->prb, "entry") == 0) {
             CPUINFO_EQUAL_ARG(lockstat_lock, 0)
             return 1;
         } else {
             ARG_EQUAL_ARG(1, 0)
             ARG_EQUAL_CPUINFO(0, lockstat_lock)
             CPUINFO_EQUAL_CONSTANT(lockstat_lock, 0)
             return 0;
         }
     } else if (strcmp(prp->desc->prb, "adaptive-block") == 0) {
         if (strcmp(uprp->desc->prb, "entry") == 0) {
             if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
                 CPUINFO_EQUAL_ARG(lockstat_lock, 0)
                 CPUINFO_EQUAL_CONSTANT(lockstat_btime, 0)
             } else {
                 CPUINFO_EQUAL_TIME(lockstat_bfrom)
             }
             return 1;
         } else {
             if (strcmp(uprp->desc->fun, "mutex_lock") != 0) {
                 /* Increment the block time. */
                 emit(dlp, BPF_MOV_REG(BPF_REG_6, BPF_REG_0));
                 emit(dlp, BPF_CALL_HELPER(BPF_FUNC_ktime_get_ns));
                 emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_6, 
offsetof(dt_bpf_cpuinfo_t, lockstat_bfrom)));
                 emit(dlp, BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1));
                 emit(dlp, BPF_XADD_REG(BPF_DW, BPF_REG_6, 
offsetof(dt_bpf_cpuinfo_t, lockstat_btime), BPF_REG_0));

                 return 1;
             } else {
                 IF_CPUINFO_IS_ZERO_EXIT(lockstat_btime)
                 ARG_EQUAL_CPUINFO(1, lockstat_btime)
                 ARG_EQUAL_CPUINFO(0, lockstat_lock)
                 CPUINFO_EQUAL_CONSTANT(lockstat_lock, 0)
                 return 0;
             }
         }
     } else if (strcmp(prp->desc->prb, "adaptive-spin") == 0) {
         if (strcmp(uprp->desc->prb, "entry") == 0) {
             if (strcmp(uprp->desc->fun, "mutex_lock") == 0) {
                 CPUINFO_EQUAL_ARG(lockstat_lock, 0)
                 CPUINFO_EQUAL_CONSTANT(lockstat_stime, 0)
             } else {
                 CPUINFO_EQUAL_TIME(lockstat_stime)
             }
             return 1;
         } else {
             ARG_EQUAL_TIME_SINCE(1, lockstat_stime)
             ARG_EQUAL_CPUINFO(0, lockstat_lock)
             CPUINFO_EQUAL_CONSTANT(lockstat_lock, 0)
             return 0;
         }
     } else if (strcmp(prp->desc->prb, "rw-acquire") == 0) {
         if (strcmp(uprp->desc->prb, "entry") == 0) {
             CPUINFO_EQUAL_ARG(lockstat_lock, 0)
             return 1;
         } else {
             uint_t    lbl_reset = dt_irlist_label(dlp);;
             int    kind = 1;    /* reader (default) */

             if (strstr(uprp->desc->fun, "_write_") != NULL)
                 kind = 0;    /* writer */

             if (strstr(uprp->desc->fun, "_trylock") != NULL) {
                 /* The return value (arg1) must be 1. */
                 emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, 
DMST_ARG(1)));
                 emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, 
lbl_reset));
             }

             ARG_EQUAL_CPUINFO(0, lockstat_lock)

             /* Set arg1 = kind. */
             emit(dlp,  BPF_STORE_IMM(BPF_DW, BPF_REG_7, DMST_ARG(1), 
kind));

             emitl(dlp, lbl_reset, BPF_NOP());
             CPUINFO_EQUAL_CONSTANT(lockstat_lock, 0)
             return 0;
         }
     } else if (strcmp(prp->desc->prb, "rw-spin") == 0) {
         if (strcmp(uprp->desc->prb, "entry") == 0) {
             CPUINFO_EQUAL_ARG(lockstat_lock, 0)
             CPUINFO_EQUAL_TIME(lockstat_stime)
             return 1;
         } else {
             ARG_EQUAL_TIME_SINCE(1, lockstat_stime)
             ARG_EQUAL_CPUINFO(0, lockstat_lock)
             CPUINFO_EQUAL_CONSTANT(lockstat_lock, 0)
             return 0;
         }
     } else if (strcmp(prp->desc->prb, "spin-acquire") == 0) {
         if (strcmp(uprp->desc->prb, "entry") == 0) {
             CPUINFO_EQUAL_ARG(lockstat_lock, 0)
             return 1;
         } else {
             uint_t    lbl_reset = dt_irlist_label(dlp);;

             if (strstr(uprp->desc->fun, "_trylock") != NULL) {
                 /* The return value (arg1) must be 1. */
                 emit(dlp,  BPF_LOAD(BPF_DW, BPF_REG_1, BPF_REG_7, 
DMST_ARG(1)));
                 emit(dlp,  BPF_BRANCH_IMM(BPF_JNE, BPF_REG_1, 1, 
lbl_reset));
             }

             ARG_EQUAL_CPUINFO(0, lockstat_lock)
             emitl(dlp, lbl_reset, BPF_NOP());
             CPUINFO_EQUAL_CONSTANT(lockstat_lock, 0)
             return 0;
         }
     } else if (strcmp(prp->desc->prb, "spin-spin") == 0) {
         if (strcmp(uprp->desc->prb, "entry") == 0) {
             CPUINFO_EQUAL_ARG(lockstat_lock, 0)
             CPUINFO_EQUAL_TIME(lockstat_stime)
             return 1;
         } else {
             ARG_EQUAL_TIME_SINCE(1, lockstat_stime)
             ARG_EQUAL_CPUINFO(0, lockstat_lock)
             CPUINFO_EQUAL_CONSTANT(lockstat_lock, 0)
             return 0;
         }
     }

     return 0;
}




More information about the DTrace-devel mailing list