[DTrace-devel] [PATCH] Implement linking in of precompiled BPF code
Eugene Loh
eugene.loh at oracle.com
Wed Apr 8 15:31:19 PDT 2020
On 04/08/2020 08:00 AM, Kris Van Hees wrote:
> Some functionality for D programs is provided as function written in
> C and precompiled into BPF instructions (at DTrace build time). These
> functions are available as an ELF object (bpf_dlib.o). WHen a compiled
WHen -> When
> D program requires the use of a precompiled function, that function and
> all its dependencies are linked into the program. When a function is
> used more than once, only one copy will be linked in (obviously).
Remove "obviously"? (Not necessary and sounds funny.)
> The linking of code (implemented as a resolving of relocations that
> reference as-yet undefined symbols) takes place right after the D code
> has been compiled into a DIFO. It involves a three-phase approach:
>
> 1. We perform a recursive walk of the dependency tree encoded in the
> relocation tables. The main program lists the functions it depends
> on in the relocation tables, and each function in term lists all of
> its dependencies in its own relocation table. As we follow the chain
> of dependent functions, we calculate the total number of instructions
> that the final linked program will provide (sum of all unique
> functions), the total number of relocations that the final linked
> program will provide, and the offset of the entry point into each
> function is recorded in the identifier for each of these functions.
>
> 2. We again perform a recursive walk of the dependency tree encoded in
> the relocation tables. This time, we copy the instructions of each
> function, along with its relocations (after adjusting the offset
> based on the new entry address for the function, and setting the
> data to the entry point of the called function). We also construct
> a combined string table.
>
> 3. Finally we process the combined relocation table and patch the code
> accordingly.
>
> A disassembler listing of the linked program can be requested by using
> the -xdisasm=n option along with -S, where bit 2 is set in 'n'.
So you want to stick with the terminology set by DT_TREEDUMP_PASS.
Okay. But FWIW/IMHO, that makes stuff like the above sentence,
referencing a user-visible option, confusing. A lot of people think bit
0 is 0x1, bit 2 is 0x4, etc. Might it be shorter and at least as
correct just to say, "by using -S with -xdisasm=2." If they are
otherwise using -xdisasm, they'll know they can bit-or other choices in
as well.
> An earlier prototype of the BPF code linking process was implemented by
> Eugene Loh.
Thanks for the credit. The current version is quite different, and I
don't understand it well enough (yet) to give a careful review. Just a
few minor things below.
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
> libdtrace/dt_bpf.c | 16 +--
> libdtrace/dt_cc.c | 248 +++++++++++++++++++++++++++++++++++++++++++
> libdtrace/dt_dlibs.c | 45 +++++++-
> libdtrace/dt_impl.h | 8 +-
> 4 files changed, 302 insertions(+), 15 deletions(-)
>
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 2c90411b..97d33ae2 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -211,15 +211,15 @@ dt_bpf_reloc_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
> uint32_t val = 0;
>
> switch (idp->di_kind) {
> - case DT_IDENT_FUNC:
> + case DT_IDENT_FUNC: /* built-in function */
> continue;
> - case DT_IDENT_PTR:
> + case DT_IDENT_PTR: /* BPF map */
> if (rp->dofr_type == R_BPF_64_64)
> text[ioff].src_reg = BPF_PSEUDO_MAP_FD;
>
> val = idp->di_id;
> break;
> - case DT_IDENT_SCALAR:
> + case DT_IDENT_SCALAR: /* built-in constant */
> switch (idp->di_id) {
> case DT_CONST_EPID:
> val = epid;
> @@ -230,7 +230,7 @@ dt_bpf_reloc_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
> }
>
> break;
> - case DT_IDENT_SYMBOL:
> + case DT_IDENT_SYMBOL: /* BPF function (pre-compiled) */
> continue;
> }
>
> @@ -289,9 +289,10 @@ dt_bpf_load_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
> char *p, *q;
>
> rc = dt_bpf_error(dtp,
> - "BPF program load for '%s:%s:%s:%s' failed: %s\n",
> - pdp->prv, pdp->mod, pdp->fun, pdp->prb,
> - strerror(errno));
> + "BPF program load for '%s:%s:%s:%s' failed: "
> + "%s\n",
> + pdp->prv, pdp->mod, pdp->fun, pdp->prb,
> + strerror(errno));
>
> /*
> * If there is BPF verifier output, print it with a "BPF: "
> @@ -350,7 +351,6 @@ dt_bpf_stmt(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, dtrace_stmtdesc_t *sdp,
> void *data)
> {
> dtrace_probedesc_t *pdp = &sdp->dtsd_ecbdesc->dted_probe;
> - dtrace_actdesc_t *ap = sdp->dtsd_action;
> dtrace_probeinfo_t pip;
> dt_probe_t *prp;
> int fd, rc;
> diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
> index c8c6dd0c..fe7082e8 100644
> --- a/libdtrace/dt_cc.c
> +++ b/libdtrace/dt_cc.c
> @@ -2178,6 +2178,238 @@ out:
> return (err ? NULL : rv);
> }
>
> +static uint_t
> +dt_link_layout(dtrace_hdl_t *dtp, const dtrace_difo_t *dp, uint_t *insc,
> + uint_t *relc)
> +{
> + uint_t pc = *insc;
> + uint_t len = dp->dtdo_brelen;
> + const dof_relodesc_t *rp = dp->dtdo_breltab;
> +
> + (*insc) += dp->dtdo_len;
> + (*relc) += len;
> + for (; len != 0; len--, rp++) {
> + char *name = &dp->dtdo_strtab[rp->dofr_name];
> + dt_ident_t *idp = dt_dlib_get_func(dtp, name);
> + dtrace_difo_t *rdp;
> +
> + if (idp == NULL || /* not found */
> + idp->di_kind != DT_IDENT_SYMBOL || /* not interesting */
Would it be possible to be not much more verbose than "not interesting"
and yet more illuminating? Here and the other like occurrences of "not
interesting."
> + idp->di_flags & DT_IDFLG_REF) /* already seen */
> + continue;
> +
> + idp->di_flags |= DT_IDFLG_REF;
> +
> + rdp = dt_dlib_get_func_difo(idp);
> + idp->di_id = dt_link_layout(dtp, rdp, insc, relc);
> + }
> +
> + return pc;
> +}
> +
> +static uint_t
> +dt_link_construct(dtrace_hdl_t *dtp, dtrace_difo_t *dp,
> + const dtrace_difo_t *sdp, dt_strtab_t *stab,
> + uint_t *pcp, uint_t *rcp)
> +{
> + uint_t pc = *pcp;
> + uint_t rc = *rcp;
> + uint_t vlen = sdp->dtdo_varlen;
> + dtrace_difv_t *vp = sdp->dtdo_vartab;
> + uint_t len = sdp->dtdo_brelen;
> + const dof_relodesc_t *rp = sdp->dtdo_breltab;
> + dof_relodesc_t *nrp = &dp->dtdo_breltab[rc];
> +
> + /*
> + * Make sure there is enough room in the destination instruction buffer
> + * and then copy the executable code for the current function into the
> + * final instruction buffer at the given program counter (instruction
> + * index).
> + */
> + assert(pc + sdp->dtdo_len <= dp->dtdo_len);
> + memcpy(dp->dtdo_buf + pc, sdp->dtdo_buf,
> + sdp->dtdo_len * sizeof(struct bpf_insn));
> + (*pcp) += sdp->dtdo_len;
> +
> + /*
> + * Populate the (new) string table with any variable names for this
> + * DIFO.
> + */
> + for (; vlen != 0; vlen--, vp++) {
> + const char *name = &sdp->dtdo_strtab[vp->dtdv_name];
> +
> + vp->dtdv_name = dt_strtab_insert(stab, name);
> + }
> +
> + /*
> + * Next we copy the relocation table. We do that before we run through
> + * it to process dependencies because we want relocations to be in
> + * strict ascending order by offset in the final relocation table. The
> + * disassembler depends on that, as does the relocation resolver
> + * implementation.
> + */
> + (*rcp) += len;
> + for (; len != 0; len--, rp++, nrp++) {
> + const char *name = &sdp->dtdo_strtab[rp->dofr_name];
> + dt_ident_t *idp = dt_dlib_get_func(dtp, name);
> +
> + nrp->dofr_name = dt_strtab_insert(stab, name);
> + nrp->dofr_type = rp->dofr_type;
> + nrp->dofr_offset = rp->dofr_offset +
> + pc * sizeof(struct bpf_insn);
> + nrp->dofr_data = rp->dofr_data;
> +
> + if (idp == NULL || /* not found */
> + idp->di_kind != DT_IDENT_SYMBOL) /* not interesting */
> + continue;
> +
> + nrp->dofr_data = idp->di_id; /* set value */
> + }
> +
> + /*
> + * Now go through the relocations one more, performing the actual
one -> once
> + * work of add executable code (and relocations) for dependencies.
add->adding
> + */
> + len = sdp->dtdo_brelen;
> + rp = sdp->dtdo_breltab;
> + nrp = &dp->dtdo_breltab[rc];
> + for (; len != 0; len--, rp++, nrp++) {
> + char *name = &sdp->dtdo_strtab[rp->dofr_name];
> + dt_ident_t *idp = dt_dlib_get_func(dtp, name);
> + dtrace_difo_t *rdp;
> +
> + if (idp == NULL || /* not found */
> + idp->di_kind != DT_IDENT_SYMBOL || /* not interesting */
> + idp->di_flags & DT_IDFLG_REF) /* already seen */
> + continue;
> +
> + idp->di_flags |= DT_IDFLG_REF;
> +
> + rdp = dt_dlib_get_func_difo(idp);
> + idp->di_id = dt_link_construct(dtp, dp, rdp, stab, pcp, rcp);
> + nrp->dofr_data = idp->di_id; /* set value */
> + }
> +
> + return pc;
> +}
> +
> +static uint_t
> +dt_link_resolve(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
> +{
> + struct bpf_insn *buf = dp->dtdo_buf;
> + uint_t len = dp->dtdo_brelen;
> + const dof_relodesc_t *rp = dp->dtdo_breltab;
> +
> + for (; len != 0; len--, rp++) {
> + uint_t idx;
> +
> + /*
> + * We are only relocating call instructions, so ignore any
> + * relocations of a type other than R_BPF_64_32 (cannot refer
> + * to a call instruction), and also ignore any relocations for
> + * non-call instructions.
> + */
> + if (rp->dofr_type != R_BPF_64_32)
> + continue;
> +
> + idx = rp->dofr_offset / sizeof(struct bpf_insn);
> + if (!BPF_IS_CALL(buf[idx]))
> + continue;
> +
> + buf[idx].imm = rp->dofr_data - idx - 1;
> + }
> +}
> +
> +static int
> +dt_link_stmt(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, dtrace_stmtdesc_t *sdp,
> + void *data)
> +{
> + uint_t insc = 0;
> + uint_t relc = 0;
> + dtrace_difo_t *dp = sdp->dtsd_action->dtad_difo;
> + dtrace_difo_t *fdp;
> + dt_strtab_t *stab;
> +
> + /*
> + * Determine the layout of the final (linked) DIFO, and calculate the
> + * total isntruction and relocation record counts..
> + */
> + dt_link_layout(dtp, dp, &insc, &relc);
> + dt_dlib_reset(dtp, B_TRUE);
> +
> + /*
> + * Allocate memory for constructing the final DIFO.
> + */
> + fdp = dt_zalloc(dtp, sizeof(dtrace_difo_t));
> + if (fdp == NULL)
> + goto fail;
> + fdp->dtdo_buf = dt_calloc(dtp, insc, sizeof(struct bpf_insn));
> + if (fdp->dtdo_buf == NULL)
> + goto fail;
> + fdp->dtdo_len = insc;
> + fdp->dtdo_breltab = dt_calloc(dtp, relc, sizeof(dof_relodesc_t));
> + if (fdp->dtdo_breltab == NULL)
> + goto fail;
> + fdp->dtdo_brelen = relc;
> +
> + /*
> + * Construct the final DIFO (instruction buffer, relocation table, and
> + * string table.
> + */
> + insc = relc = 0;
> + stab = dt_strtab_create(BUFSIZ);
> + if (stab == NULL)
> + goto fail;
> +
> + dt_link_construct(dtp, fdp, dp, stab, &insc, &relc);
> + dt_dlib_reset(dtp, B_FALSE);
> +
> + /*
> + * Replace the program DIFO instruction buffer, BPF relocation table,
> + * and string table with the new versions.
> + */
> + dt_free(dtp, dp->dtdo_buf);
> + dp->dtdo_buf = fdp->dtdo_buf;
> + dt_free(dtp, dp->dtdo_strtab);
> + dp->dtdo_len = fdp->dtdo_len;
> + dt_free(dtp, dp->dtdo_breltab);
> + dp->dtdo_breltab = fdp->dtdo_breltab;
> + dp->dtdo_brelen = fdp->dtdo_brelen;
> +
> + /*
> + * Write out the new string table.
> + */
> + dp->dtdo_strlen = dt_strtab_size(stab);
> + dp->dtdo_strtab = dt_zalloc(dtp, fdp->dtdo_strlen);
> + if (dp->dtdo_strtab == NULL)
> + goto fail;
> + dt_strtab_write(stab, (dt_strtab_write_f *)dt_strtab_copystr,
> + dp->dtdo_strtab);
> + dt_strtab_destroy(stab);
> +
> + /*
> + * Resolve the function relocation records.
> + */
> + dt_link_resolve(dtp, dp);
> +
> + dt_free(dtp, fdp);
> +
> + return 0;
> +
> +fail:
> + dt_free(dtp, fdp->dtdo_breltab);
> + dt_free(dtp, fdp->dtdo_buf);
> + dt_free(dtp, fdp);
> +
> + return dt_set_errno(dtp, EDT_NOMEM);
> +}
> +
> +static int
> +dt_link(dtrace_hdl_t *dtp, dtrace_prog_t *pgp)
> +{
> + return dtrace_stmt_iter(dtp, pgp, dt_link_stmt, NULL);
> +}
> +
> dtrace_prog_t *
> dtrace_program_strcompile(dtrace_hdl_t *dtp, const char *s,
> dtrace_probespec_t spec, uint_t cflags, int argc, char *const argv[])
> @@ -2192,6 +2424,14 @@ dtrace_program_strcompile(dtrace_hdl_t *dtp, const char *s,
> if (cflags & DTRACE_C_DIFV && DT_DISASM(dtp, 1))
> dt_dis_program(dtp, rv, stderr);
>
> + if (dt_link(dtp, rv) != 0) {
> + dt_program_destroy(dtp, rv);
> + return NULL;
> + }
> +
> + if (cflags & DTRACE_C_DIFV && DT_DISASM(dtp, 2))
> + dt_dis_program(dtp, rv, stderr);
> +
> return rv;
> }
>
> @@ -2209,6 +2449,14 @@ dtrace_program_fcompile(dtrace_hdl_t *dtp, FILE *fp,
> if (cflags & DTRACE_C_DIFV && DT_DISASM(dtp, 1))
> dt_dis_program(dtp, rv, stderr);
>
> + if (dt_link(dtp, rv) != 0) {
> + dt_program_destroy(dtp, rv);
> + return NULL;
> + }
> +
> + if (cflags & DTRACE_C_DIFV && DT_DISASM(dtp, 2))
> + dt_dis_program(dtp, rv, stderr);
> +
> return rv;
> }
>
> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> index c3b2b902..58e01221 100644
> --- a/libdtrace/dt_dlibs.c
> +++ b/libdtrace/dt_dlibs.c
> @@ -111,11 +111,9 @@ dt_dlib_add_sym(dtrace_hdl_t *dtp, const char *name, int kind, void *data)
> {
> dt_idhash_t *dhp = dtp->dt_bpfsyms;
> dt_ident_t *idp;
> - uint_t id = DT_IDENT_UNDEF;
>
> - dt_idhash_nextid(dhp, &id);
> - idp = dt_idhash_insert(dhp, name, kind, DT_IDFLG_BPF, id, dt_bpf_attr,
> - DT_VERS_2_0, NULL, NULL, 0);
> + idp = dt_idhash_insert(dhp, name, kind, DT_IDFLG_BPF, DT_IDENT_UNDEF,
> + dt_bpf_attr, DT_VERS_2_0, NULL, NULL, 0);
> dt_ident_set_data(idp, data);
>
> return idp;
> @@ -172,6 +170,45 @@ dt_dlib_get_var(dtrace_hdl_t *dtp, const char *name)
> return dt_dlib_get_sym(dtp, name, DT_IDENT_SCALAR);
> }
>
> +/*
> + * Return the DIFO for an external symbol.
> + */
> +dtrace_difo_t *
> +dt_dlib_get_func_difo(const dt_ident_t *idp)
> +{
> + assert(idp->di_kind == DT_IDENT_SYMBOL);
> + assert(idp->di_data != NULL);
> +
> + return ((dt_bpf_func_t *)idp->di_data)->difo;
> +}
> +
> +/*
> + * Reset the reference bit on BPF external symbols.
> + */
> +static int
> +dt_dlib_idreset(dt_idhash_t *dhp, dt_ident_t *idp, boolean_t *ref_only)
> +{
> + if (idp->di_kind == DT_IDENT_SYMBOL) {
> + idp->di_flags &= ~DT_IDFLG_REF;
> +
> + if (!*ref_only)
> + idp->di_id = DT_IDENT_UNDEF;
> + }
> + return 0;
> +}
> +
> +/*
> + * Reset all BPF external symbols. This removes the reference bit from the
> + * identifier flags to indicate that the symbols are not referenced by any
> + * code anymore.
> + */
> +void
> +dt_dlib_reset(dtrace_hdl_t *dtp, boolean_t ref_only)
> +{
> + dt_idhash_iter(dtp->dt_bpfsyms, (dt_idhash_f *)dt_dlib_idreset,
> + &ref_only);
> +}
> +
> /*
> * Compare function (used by qsort) to compare two BPF functions based on their
> * offset in the .text section. We are sorting in ascending offset order.
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index ca8b7851..efc87134 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -568,9 +568,9 @@ enum {
> EDT_DIFVERS, /* library has newer DIF version than driver */
> EDT_BADAGG, /* unrecognized aggregating action */
> EDT_FIO, /* error occurred while reading from input stream */
> - EDT_BPFINVAL, /* invalid DIF program */
> - EDT_BPFSIZE, /* invalid DIF size */
> - EDT_BPFFAULT, /* failed to copyin DIF program */
> + EDT_BPFINVAL, /* invalid BPF program */
> + EDT_BPFSIZE, /* BPF program too large */
> + EDT_BPFFAULT, /* BPF program with invalid pointer */
> EDT_BPF, /* BPF error */
> EDT_BADPROBE, /* bad probe description */
> EDT_BADPGLOB, /* bad probe description globbing pattern */
> @@ -705,6 +705,8 @@ extern void dt_dlib_init(dtrace_hdl_t *dtp);
> extern dt_ident_t *dt_dlib_get_func(dtrace_hdl_t *, const char *);
> extern dt_ident_t *dt_dlib_get_map(dtrace_hdl_t *, const char *);
> extern dt_ident_t *dt_dlib_get_var(dtrace_hdl_t *, const char *);
> +extern dtrace_difo_t *dt_dlib_get_func_difo(const dt_ident_t *);
> +extern void dt_dlib_reset(dtrace_hdl_t *dtp, boolean_t);
> extern int dt_load_libs(dtrace_hdl_t *dtp);
>
> extern void *dt_compile(dtrace_hdl_t *dtp, int context,
More information about the DTrace-devel
mailing list