[DTrace-devel] [PATCH] Implement linking in of precompiled BPF code

Eugene Loh eugene.loh at oracle.com
Wed Apr 8 15:31:19 PDT 2020


On 04/08/2020 08:00 AM, Kris Van Hees wrote:

> Some functionality for D programs is provided as function written in
> C and precompiled into BPF instructions (at DTrace build time).  These
> functions are available as an ELF object (bpf_dlib.o).  WHen a compiled

WHen -> When

> D program requires the use of a precompiled function, that function and
> all its dependencies are linked into the program.  When a function is
> used more than once, only one copy will be linked in (obviously).

Remove "obviously"?  (Not necessary and sounds funny.)

> The linking of code (implemented as a resolving of relocations that
> reference as-yet undefined symbols) takes place right after the D code
> has been compiled into a DIFO.  It involves a three-phase approach:
>
> 1. We perform a recursive walk of the dependency tree encoded in the
>     relocation tables.  The main program lists the functions it depends
>     on in the relocation tables, and each function in term lists all of
>     its dependencies in its own relocation table.  As we follow the chain
>     of dependent functions, we calculate the total number of instructions
>     that the final linked program will provide (sum of all unique
>     functions), the total number of relocations that the final linked
>     program will provide, and the offset of the entry point into each
>     function is recorded in the identifier for each of these functions.
>
> 2. We again perform a recursive walk of the dependency tree encoded in
>     the relocation tables.  This time, we copy the instructions of each
>     function, along with its relocations (after adjusting the offset
>     based on the new entry address for the function, and setting the
>     data to the entry point of the called function).  We also construct
>     a combined string table.
>
> 3. Finally we process the combined relocation table and patch the code
>     accordingly.
>
> A disassembler listing of the linked program can be requested by using
> the -xdisasm=n option along with -S, where bit 2 is set in 'n'.

So you want to stick with the terminology set by DT_TREEDUMP_PASS. 
Okay.  But FWIW/IMHO, that makes stuff like the above sentence, 
referencing a user-visible option, confusing.  A lot of people think bit 
0 is 0x1, bit 2 is 0x4, etc.  Might it be shorter and at least as 
correct just to say, "by using -S with -xdisasm=2."  If they are 
otherwise using -xdisasm, they'll know they can bit-or other choices in 
as well.

> An earlier prototype of the BPF code linking process was implemented by
> Eugene Loh.

Thanks for the credit.  The current version is quite different, and I 
don't understand it well enough (yet) to give a careful review. Just a 
few minor things below.

> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
> ---
>   libdtrace/dt_bpf.c   |  16 +--
>   libdtrace/dt_cc.c    | 248 +++++++++++++++++++++++++++++++++++++++++++
>   libdtrace/dt_dlibs.c |  45 +++++++-
>   libdtrace/dt_impl.h  |   8 +-
>   4 files changed, 302 insertions(+), 15 deletions(-)
>
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 2c90411b..97d33ae2 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -211,15 +211,15 @@ dt_bpf_reloc_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
>   		uint32_t	val = 0;
>   
>   		switch (idp->di_kind) {
> -		case DT_IDENT_FUNC:
> +		case DT_IDENT_FUNC:	/* built-in function */
>   			continue;
> -		case DT_IDENT_PTR:
> +		case DT_IDENT_PTR:	/* BPF map */
>   			if (rp->dofr_type == R_BPF_64_64)
>   				text[ioff].src_reg = BPF_PSEUDO_MAP_FD;
>   
>   			val = idp->di_id;
>   			break;
> -		case DT_IDENT_SCALAR:
> +		case DT_IDENT_SCALAR:	/* built-in constant */
>   			switch (idp->di_id) {
>   			case DT_CONST_EPID:
>   				val = epid;
> @@ -230,7 +230,7 @@ dt_bpf_reloc_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
>   			}
>   
>   			break;
> -		case DT_IDENT_SYMBOL:
> +		case DT_IDENT_SYMBOL:	/* BPF function (pre-compiled) */
>   			continue;
>   		}
>   
> @@ -289,9 +289,10 @@ dt_bpf_load_prog(dtrace_hdl_t *dtp, const dt_probe_t *prp,
>   		char				*p, *q;
>   
>   		rc = dt_bpf_error(dtp,
> -			"BPF program load for '%s:%s:%s:%s' failed: %s\n",
> -			pdp->prv, pdp->mod, pdp->fun, pdp->prb,
> -			strerror(errno));
> +				  "BPF program load for '%s:%s:%s:%s' failed: "
> +				  "%s\n",
> +				  pdp->prv, pdp->mod, pdp->fun, pdp->prb,
> +				  strerror(errno));
>   
>   		/*
>   		 * If there is BPF verifier output, print it with a "BPF: "
> @@ -350,7 +351,6 @@ dt_bpf_stmt(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, dtrace_stmtdesc_t *sdp,
>   	    void *data)
>   {
>   	dtrace_probedesc_t	*pdp = &sdp->dtsd_ecbdesc->dted_probe;
> -	dtrace_actdesc_t	*ap = sdp->dtsd_action;
>   	dtrace_probeinfo_t	pip;
>   	dt_probe_t		*prp;
>   	int			fd, rc;
> diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
> index c8c6dd0c..fe7082e8 100644
> --- a/libdtrace/dt_cc.c
> +++ b/libdtrace/dt_cc.c
> @@ -2178,6 +2178,238 @@ out:
>   	return (err ? NULL : rv);
>   }
>   
> +static uint_t
> +dt_link_layout(dtrace_hdl_t *dtp, const dtrace_difo_t *dp, uint_t *insc,
> +	       uint_t *relc)
> +{
> +	uint_t			pc = *insc;
> +	uint_t			len = dp->dtdo_brelen;
> +	const dof_relodesc_t	*rp = dp->dtdo_breltab;
> +
> +	(*insc) += dp->dtdo_len;
> +	(*relc) += len;
> +	for (; len != 0; len--, rp++) {
> +		char            *name = &dp->dtdo_strtab[rp->dofr_name];
> +		dt_ident_t      *idp = dt_dlib_get_func(dtp, name);
> +		dtrace_difo_t   *rdp;
> +
> +		if (idp == NULL ||			/* not found */
> +		    idp->di_kind != DT_IDENT_SYMBOL ||	/* not interesting */

Would it be possible to be not much more verbose than "not interesting" 
and yet more illuminating?  Here and the other like occurrences of "not 
interesting."

> +		    idp->di_flags & DT_IDFLG_REF)       /* already seen */
> +			continue;
> +
> +		idp->di_flags |= DT_IDFLG_REF;
> +
> +		rdp = dt_dlib_get_func_difo(idp);
> +		idp->di_id = dt_link_layout(dtp, rdp, insc, relc);
> +	}
> +
> +	return pc;
> +}
> +
> +static uint_t
> +dt_link_construct(dtrace_hdl_t *dtp, dtrace_difo_t *dp,
> +		  const dtrace_difo_t *sdp, dt_strtab_t *stab,
> +		  uint_t *pcp, uint_t *rcp)
> +{
> +	uint_t			pc = *pcp;
> +	uint_t			rc = *rcp;
> +	uint_t			vlen = sdp->dtdo_varlen;
> +	dtrace_difv_t		*vp = sdp->dtdo_vartab;
> +	uint_t			len = sdp->dtdo_brelen;
> +	const dof_relodesc_t	*rp = sdp->dtdo_breltab;
> +	dof_relodesc_t		*nrp = &dp->dtdo_breltab[rc];
> +
> +	/*
> +	 * Make sure there is enough room in the destination instruction buffer
> +	 * and then copy the executable code for the current function into the
> +	 * final instruction buffer at the given program counter (instruction
> +	 * index).
> +	 */
> +	assert(pc + sdp->dtdo_len <= dp->dtdo_len);
> +	memcpy(dp->dtdo_buf + pc, sdp->dtdo_buf,
> +		sdp->dtdo_len * sizeof(struct bpf_insn));
> +	(*pcp) += sdp->dtdo_len;
> +
> +	/*
> +	 * Populate the (new) string table with any variable names for this
> +	 * DIFO.
> +	 */
> +	for (; vlen != 0; vlen--, vp++) {
> +		const char	*name = &sdp->dtdo_strtab[vp->dtdv_name];
> +
> +		vp->dtdv_name = dt_strtab_insert(stab, name);
> +	}
> +
> +	/*
> +	 * Next we copy the relocation table.  We do that before we run through
> +	 * it to process dependencies because we want relocations to be in
> +	 * strict ascending order by offset in the final relocation table.  The
> +	 * disassembler depends on that, as does the relocation resolver
> +	 * implementation.
> +	 */
> +	(*rcp) += len;
> +	for (; len != 0; len--, rp++, nrp++) {
> +		const char	*name = &sdp->dtdo_strtab[rp->dofr_name];
> +		dt_ident_t	*idp = dt_dlib_get_func(dtp, name);
> +
> +		nrp->dofr_name = dt_strtab_insert(stab, name);
> +		nrp->dofr_type = rp->dofr_type;
> +		nrp->dofr_offset = rp->dofr_offset +
> +				   pc * sizeof(struct bpf_insn);
> +		nrp->dofr_data = rp->dofr_data;
> +
> +		if (idp == NULL ||			/* not found */
> +		    idp->di_kind != DT_IDENT_SYMBOL)	/* not interesting */
> +			continue;
> +
> +		nrp->dofr_data = idp->di_id;		/* set value */
> +	}
> +
> +	/*
> +	 * Now go through the relocations one more, performing the actual
one -> once
> +	 * work of add executable code (and relocations) for dependencies.
add->adding
> +	 */
> +	len = sdp->dtdo_brelen;
> +	rp = sdp->dtdo_breltab;
> +	nrp = &dp->dtdo_breltab[rc];
> +	for (; len != 0; len--, rp++, nrp++) {
> +		char            *name = &sdp->dtdo_strtab[rp->dofr_name];
> +		dt_ident_t      *idp = dt_dlib_get_func(dtp, name);
> +		dtrace_difo_t   *rdp;
> +
> +		if (idp == NULL ||			/* not found */
> +		    idp->di_kind != DT_IDENT_SYMBOL ||	/* not interesting */
> +		    idp->di_flags & DT_IDFLG_REF)       /* already seen */
> +			continue;
> +
> +		idp->di_flags |= DT_IDFLG_REF;
> +
> +		rdp = dt_dlib_get_func_difo(idp);
> +		idp->di_id = dt_link_construct(dtp, dp, rdp, stab, pcp, rcp);
> +		nrp->dofr_data = idp->di_id;		/* set value */
> +	}
> +
> +	return pc;
> +}
> +
> +static uint_t
> +dt_link_resolve(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
> +{
> +	struct bpf_insn		*buf = dp->dtdo_buf;
> +	uint_t			len = dp->dtdo_brelen;
> +	const dof_relodesc_t	*rp = dp->dtdo_breltab;
> +
> +	for (; len != 0; len--, rp++) {
> +		uint_t	idx;
> +
> +		/*
> +		 * We are only relocating call instructions, so ignore any
> +		 * relocations of a type other than R_BPF_64_32 (cannot refer
> +		 * to a call instruction), and also ignore any relocations for
> +		 * non-call instructions.
> +		 */
> +		if (rp->dofr_type != R_BPF_64_32)
> +			continue;
> +
> +		idx = rp->dofr_offset / sizeof(struct bpf_insn);
> +		if (!BPF_IS_CALL(buf[idx]))
> +			continue;
> +
> +		buf[idx].imm = rp->dofr_data - idx - 1;
> +	}
> +}
> +
> +static int
> +dt_link_stmt(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, dtrace_stmtdesc_t *sdp,
> +	     void *data)
> +{
> +	uint_t		insc = 0;
> +	uint_t		relc = 0;
> +	dtrace_difo_t	*dp = sdp->dtsd_action->dtad_difo;
> +	dtrace_difo_t	*fdp;
> +	dt_strtab_t	*stab;
> +
> +	/*
> +	 * Determine the layout of the final (linked) DIFO, and calculate the
> +	 * total isntruction and relocation record counts..
> +	 */
> +	dt_link_layout(dtp, dp, &insc, &relc);
> +	dt_dlib_reset(dtp, B_TRUE);
> +
> +	/*
> +	 * Allocate memory for constructing the final DIFO.
> +	 */
> +	fdp = dt_zalloc(dtp, sizeof(dtrace_difo_t));
> +	if (fdp == NULL)
> +		goto fail;
> +	fdp->dtdo_buf = dt_calloc(dtp, insc, sizeof(struct bpf_insn));
> +	if (fdp->dtdo_buf == NULL)
> +		goto fail;
> +	fdp->dtdo_len = insc;
> +	fdp->dtdo_breltab = dt_calloc(dtp, relc, sizeof(dof_relodesc_t));
> +	if (fdp->dtdo_breltab == NULL)
> +		goto fail;
> +	fdp->dtdo_brelen = relc;
> +
> +	/*
> +	 * Construct the final DIFO (instruction buffer, relocation table, and
> +	 * string table.
> +	 */
> +	insc = relc = 0;
> +	stab = dt_strtab_create(BUFSIZ);
> +	if (stab == NULL)
> +		goto fail;
> +
> +	dt_link_construct(dtp, fdp, dp, stab, &insc, &relc);
> +	dt_dlib_reset(dtp, B_FALSE);
> +
> +	/*
> +	 * Replace the program DIFO instruction buffer, BPF relocation table,
> +	 * and string table with the new versions.
> +	 */
> +	dt_free(dtp, dp->dtdo_buf);
> +	dp->dtdo_buf = fdp->dtdo_buf;
> +	dt_free(dtp, dp->dtdo_strtab);
> +	dp->dtdo_len = fdp->dtdo_len;
> +	dt_free(dtp, dp->dtdo_breltab);
> +	dp->dtdo_breltab = fdp->dtdo_breltab;
> +	dp->dtdo_brelen = fdp->dtdo_brelen;
> +
> +	/*
> +	 * Write out the new string table.
> +	 */
> +	dp->dtdo_strlen = dt_strtab_size(stab);
> +	dp->dtdo_strtab = dt_zalloc(dtp, fdp->dtdo_strlen);
> +	if (dp->dtdo_strtab == NULL)
> +		goto fail;
> +	dt_strtab_write(stab, (dt_strtab_write_f *)dt_strtab_copystr,
> +			dp->dtdo_strtab);
> +	dt_strtab_destroy(stab);
> +
> +	/*
> +	 * Resolve the function relocation records.
> +	 */
> +	dt_link_resolve(dtp, dp);
> +
> +	dt_free(dtp, fdp);
> +
> +	return 0;
> +
> +fail:
> +	dt_free(dtp, fdp->dtdo_breltab);
> +	dt_free(dtp, fdp->dtdo_buf);
> +	dt_free(dtp, fdp);
> +
> +	return dt_set_errno(dtp, EDT_NOMEM);
> +}
> +
> +static int
> +dt_link(dtrace_hdl_t *dtp, dtrace_prog_t *pgp)
> +{
> +	return dtrace_stmt_iter(dtp, pgp, dt_link_stmt, NULL);
> +}
> +
>   dtrace_prog_t *
>   dtrace_program_strcompile(dtrace_hdl_t *dtp, const char *s,
>       dtrace_probespec_t spec, uint_t cflags, int argc, char *const argv[])
> @@ -2192,6 +2424,14 @@ dtrace_program_strcompile(dtrace_hdl_t *dtp, const char *s,
>   	if (cflags & DTRACE_C_DIFV && DT_DISASM(dtp, 1))
>   		dt_dis_program(dtp, rv, stderr);
>   
> +	if (dt_link(dtp, rv) != 0) {
> +		dt_program_destroy(dtp, rv);
> +		return NULL;
> +	}
> +
> +	if (cflags & DTRACE_C_DIFV && DT_DISASM(dtp, 2))
> +		dt_dis_program(dtp, rv, stderr);
> +
>   	return rv;
>   }
>   
> @@ -2209,6 +2449,14 @@ dtrace_program_fcompile(dtrace_hdl_t *dtp, FILE *fp,
>   	if (cflags & DTRACE_C_DIFV && DT_DISASM(dtp, 1))
>   		dt_dis_program(dtp, rv, stderr);
>   
> +	if (dt_link(dtp, rv) != 0) {
> +		dt_program_destroy(dtp, rv);
> +		return NULL;
> +	}
> +
> +	if (cflags & DTRACE_C_DIFV && DT_DISASM(dtp, 2))
> +		dt_dis_program(dtp, rv, stderr);
> +
>   	return rv;
>   }
>   
> diff --git a/libdtrace/dt_dlibs.c b/libdtrace/dt_dlibs.c
> index c3b2b902..58e01221 100644
> --- a/libdtrace/dt_dlibs.c
> +++ b/libdtrace/dt_dlibs.c
> @@ -111,11 +111,9 @@ dt_dlib_add_sym(dtrace_hdl_t *dtp, const char *name, int kind, void *data)
>   {
>   	dt_idhash_t	*dhp = dtp->dt_bpfsyms;
>   	dt_ident_t	*idp;
> -	uint_t		id = DT_IDENT_UNDEF;
>   
> -	dt_idhash_nextid(dhp, &id);
> -	idp = dt_idhash_insert(dhp, name, kind, DT_IDFLG_BPF, id, dt_bpf_attr,
> -			       DT_VERS_2_0, NULL, NULL, 0);
> +	idp = dt_idhash_insert(dhp, name, kind, DT_IDFLG_BPF, DT_IDENT_UNDEF,
> +			       dt_bpf_attr, DT_VERS_2_0, NULL, NULL, 0);
>   	dt_ident_set_data(idp, data);
>   
>   	return idp;
> @@ -172,6 +170,45 @@ dt_dlib_get_var(dtrace_hdl_t *dtp, const char *name)
>   	return dt_dlib_get_sym(dtp, name, DT_IDENT_SCALAR);
>   }
>   
> +/*
> + * Return the DIFO for an external symbol.
> + */
> +dtrace_difo_t *
> +dt_dlib_get_func_difo(const dt_ident_t *idp)
> +{
> +	assert(idp->di_kind == DT_IDENT_SYMBOL);
> +	assert(idp->di_data != NULL);
> +
> +	return ((dt_bpf_func_t *)idp->di_data)->difo;
> +}
> +
> +/*
> + * Reset the reference bit on BPF external symbols.
> + */
> +static int
> +dt_dlib_idreset(dt_idhash_t *dhp, dt_ident_t *idp, boolean_t *ref_only)
> +{
> +	if (idp->di_kind == DT_IDENT_SYMBOL) {
> +		idp->di_flags &= ~DT_IDFLG_REF;
> +
> +		if (!*ref_only)
> +			idp->di_id = DT_IDENT_UNDEF;
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Reset all BPF external symbols.  This removes the reference bit from the
> + * identifier flags to indicate that the symbols are not referenced by any
> + * code anymore.
> + */
> +void
> +dt_dlib_reset(dtrace_hdl_t *dtp, boolean_t ref_only)
> +{
> +	dt_idhash_iter(dtp->dt_bpfsyms, (dt_idhash_f *)dt_dlib_idreset,
> +		       &ref_only);
> +}
> +
>   /*
>    * Compare function (used by qsort) to compare two BPF functions based on their
>    * offset in the .text section.  We are sorting in ascending offset order.
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index ca8b7851..efc87134 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -568,9 +568,9 @@ enum {
>   	EDT_DIFVERS,		/* library has newer DIF version than driver */
>   	EDT_BADAGG,		/* unrecognized aggregating action */
>   	EDT_FIO,		/* error occurred while reading from input stream */
> -	EDT_BPFINVAL,		/* invalid DIF program */
> -	EDT_BPFSIZE,		/* invalid DIF size */
> -	EDT_BPFFAULT,		/* failed to copyin DIF program */
> +	EDT_BPFINVAL,		/* invalid BPF program */
> +	EDT_BPFSIZE,		/* BPF program too large */
> +	EDT_BPFFAULT,		/* BPF program with invalid pointer */
>   	EDT_BPF,		/* BPF error */
>   	EDT_BADPROBE,		/* bad probe description */
>   	EDT_BADPGLOB,		/* bad probe description globbing pattern */
> @@ -705,6 +705,8 @@ extern void dt_dlib_init(dtrace_hdl_t *dtp);
>   extern dt_ident_t *dt_dlib_get_func(dtrace_hdl_t *, const char *);
>   extern dt_ident_t *dt_dlib_get_map(dtrace_hdl_t *, const char *);
>   extern dt_ident_t *dt_dlib_get_var(dtrace_hdl_t *, const char *);
> +extern dtrace_difo_t *dt_dlib_get_func_difo(const dt_ident_t *);
> +extern void dt_dlib_reset(dtrace_hdl_t *dtp, boolean_t);
>   extern int dt_load_libs(dtrace_hdl_t *dtp);
>   
>   extern void *dt_compile(dtrace_hdl_t *dtp, int context,




More information about the DTrace-devel mailing list