[DTrace-devel] [PATCH v5 1/6] usdt: get arg types and xlations into DTrace from the DOF

Kris Van Hees kris.van.hees at oracle.com
Tue Nov 5 16:56:33 UTC 2024


On Tue, Nov 05, 2024 at 12:06:03AM +0000, Nick Alcock wrote:
> This change propagates native and xlated arg types and mapping info all
> the way from the DOF, through the parser and dtprobed and the DOF stash,
> into DTrace, where they end up in the pid_probespec_t for USDT probes.
> We don't do anything with them once they get there in this commit: no
> user-visible impacts expected.
> 
> We bump the DOF_PARSED_VERSION since we add three new types of record to the
> dof_parser_t, all optional, covering native and xlated args and arg mapping
> tables; to make life easier for consumers we emit them in a defined order
> (documented in dof_parser.h), and add arg counts to the DIT_PROBE record
> that precedes them indicating which will be present and how many args are in
> them.  This means we retain the property that you can always tell which
> records within a provider are coming next purely from records you already
> have: there's no need to make decisions once the records turn up.  The DOF
> stash hardly changes: all that happens is that the parsed data written to
> each per-probe file gains some extra types of record (it can have
> DIT_ARGS_NATIVE, DIT_ARGS_XLAT and DIT_ARGS_MAP entries following the
> DIT_PROBE record now).
> 
> As usual with DOF_PARSED_VERSION bumps, DTraces that cross this change will
> refuse to read probes added by dtprobeds that are on the other side of it.
> (Restarting dtprobed will reparse all the probes in the stash to match the
> new layout, and newly-started DTraces will pick that up -- so this only
> affects running DTraces picking up new probes, which DTrace cannot do yet:
> so no visible effects are expected.)
> 
> The code is a bit repetitive, with nargs, xargs and arg mapping all handled
> separately even though the code is very similar.  In future maybe we could
> unify them (but my attempts led to code that was much harder to read than
> the original).
> 
> Signed-off-by: Nick Alcock <nick.alcock at oracle.com>

Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

> ---
>  dtprobed/dof_stash.c   |  15 +++--
>  dtprobed/dtprobed.c    |  10 ++-
>  include/dtrace/pid.h   |   7 ++
>  libcommon/dof_parser.c | 150 +++++++++++++++++++++++++++++++----------
>  libcommon/dof_parser.h |  64 ++++++++++++++++--
>  libdtrace/dt_pid.c     |  59 ++++++++++++++++
>  6 files changed, 256 insertions(+), 49 deletions(-)
> 
> diff --git a/dtprobed/dof_stash.c b/dtprobed/dof_stash.c
> index 0e639076f655..8bf465678217 100644
> --- a/dtprobed/dof_stash.c
> +++ b/dtprobed/dof_stash.c
> @@ -59,8 +59,10 @@
>   *    startup if the dof_parsed_t structure changed.  The file consists of a
>   *    uint64_t version number (DOF_PARSED_VERSION), then a stream of
>   *    dof_parsed_t records, the first of type DIT_PROVIDER, the second
> - *    DIT_PROBE, then as many struct dof_parsed_t's of type DIT_TRACEPOINT as
> - *    the DIT_PROBE record specifies.
> + *    DIT_PROBE, then optionally some DIT_*ARGS records (if the count of native
> + *    args in the probe is >0, you get a DIT_NATIVE_ARGS: if the count of xlated
> + *    args is >0, you get DIT_XLAT_ARGS and DIT_MAP_ARGS), then as many struct
> + *    dof_parsed_t's of type DIT_TRACEPOINT as the DIT_PROBE record specifies.
>   *
>   * /run/dtrace/probes/: Per-probe info, written by dtprobed, read by DTrace.
>   *
> @@ -640,9 +642,14 @@ dof_stash_write_parsed(pid_t pid, dev_t dev, ino_t ino, dt_list_t *accum)
>  			break;
>  		}
>  
> -		/* Tracepoint: add to already-open file.  */
> +		/* Args info or tracepoint: add to already-open file.  */
> +		case DIT_ARGS_NATIVE:
> +		case DIT_ARGS_XLAT:
> +		case DIT_ARGS_MAP:
>  		case DIT_TRACEPOINT:
> -			assert(state == DIT_PROBE || state == DIT_TRACEPOINT);
> +			assert(state == DIT_PROBE || state == DIT_ARGS_NATIVE ||
> +			       state == DIT_ARGS_XLAT || state == DIT_ARGS_MAP ||
> +			       state == DIT_TRACEPOINT);
>  			state = accump->parsed->type;
>  
>  			if (write_chunk(parsed_fd, accump->parsed, accump->parsed->size) < 0)
> diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
> index f4c6be1e045d..2ca39b26f88a 100644
> --- a/dtprobed/dtprobed.c
> +++ b/dtprobed/dtprobed.c
> @@ -791,16 +791,20 @@ process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dev_t exec_dev,
>  		if (dof_stash_push_parsed(&accum, probe) < 0)
>  			goto oom;
>  
> -		for (j = 0; j < probe->probe.ntp; j++) {
> +		j = 0;
> +		do {
>  			dof_parsed_t *tp = dof_read(pid, in);
>  
>  			errmsg = "no tracepoints in a probe, or parse state corrupt";
> -			if (!tp || tp->type != DIT_TRACEPOINT)
> +			if (!tp || tp->type == DIT_PROVIDER || tp->type == DIT_PROBE)
>  				goto err;
>  
>  			if (dof_stash_push_parsed(&accum, tp) < 0)
>  				goto oom;
> -		}
> +
> +			if (tp->type == DIT_TRACEPOINT)
> +				j++;
> +		} while (j < probe->probe.ntp);
>  	}
>  
>  	if (!reparsing)
> diff --git a/include/dtrace/pid.h b/include/dtrace/pid.h
> index 0129674adf0c..25bfacfdbfe2 100644
> --- a/include/dtrace/pid.h
> +++ b/include/dtrace/pid.h
> @@ -35,6 +35,13 @@ typedef struct pid_probespec {
>  	ino_t pps_inum;				/* object inode */
>  	char *pps_fn;				/* object full filename */
>  	uint64_t pps_off;			/* probe offset (in object) */
> +	int pps_nargc;				/* number of native args */
> +	int pps_xargc;				/* number of xlated and mapped args */
> +	char *pps_nargv;			/* array of native args */
> +	size_t pps_nargvlen;			/* (high estimate of) length of array */
> +	char *pps_xargv;			/* array of xlated args */
> +	size_t pps_xargvlen;			/* (high estimate of) length of array */
> +	int8_t *pps_argmap;			/* mapped arg indexes */
>  
>  	/*
>  	 * Fields below this point do not apply to underlying probes.
> diff --git a/libcommon/dof_parser.c b/libcommon/dof_parser.c
> index d6631a1afdcb..1792a8bfcc84 100644
> --- a/libcommon/dof_parser.c
> +++ b/libcommon/dof_parser.c
> @@ -807,10 +807,11 @@ validate_provider(int out, dof_hdr_t *dof, dof_sec_t *sec)
>  		}
>  
>  		dt_dbg_dof("      Probe %d %s:%s:%s:%s with %d offsets, "
> -			   "%d is-enabled offsets\n", j,
> +			   "%d is-enabled offsets, %i args, %i nargs, argidx %i\n", j,
>  			   strtab + prov->dofpv_name, "",
>  			   strtab + prb->dofpr_func, strtab + prb->dofpr_name,
> -			   prb->dofpr_noffs, prb->dofpr_nenoffs);
> +			   prb->dofpr_noffs, prb->dofpr_nenoffs,
> +			   prb->dofpr_xargc, prb->dofpr_nargc, prb->dofpr_argidx);
>  	}
>  
>  	return 0;
> @@ -879,12 +880,26 @@ validate_probe(int out, dtrace_helper_probedesc_t *dhpb)
>  	return 0;
>  }
>  
> +static size_t
> +strings_len(const char *strtab, size_t count)
> +{
> +	size_t len = 0;
> +
> +	for (; count > 0; count--) {
> +		size_t this_len = strlen(strtab) + 1;
> +
> +		len += this_len;
> +		strtab += this_len;
> +	}
> +	return len;
> +}
> +
>  static void
>  emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
>  {
>  	int		i;
> -	dof_parsed_t	*probe_msg;
> -	size_t		probe_msg_size;
> +	dof_parsed_t	*msg;
> +	size_t		msg_size;
>  	char		*ptr;
>  
>  	dt_dbg_dof("      Creating probe %s:%s:%s:%s\n", dhpb->dthpb_prov,
> @@ -893,35 +908,106 @@ emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
>  	if (validate_probe(out, dhpb) != 0)
>  		return;
>  
> -	probe_msg_size = offsetof(dof_parsed_t, probe.name) +
> -	    strlen(dhpb->dthpb_mod) + 1 + strlen(dhpb->dthpb_func) + 1 +
> -	    strlen(dhpb->dthpb_name) + 1;
> +	/*
> +	 * Compute the size of all the optional elements first, to fill out the
> +	 * flags.
> +	 */
>  
> -	probe_msg = malloc(probe_msg_size);
> -	if (!probe_msg) {
> -		dof_error(out, ENOMEM, "Out of memory allocating probe");
> -		return;
> -	}
> +	msg_size = offsetof(dof_parsed_t, probe.name) +
> +		   strlen(dhpb->dthpb_mod) + 1 +
> +		   strlen(dhpb->dthpb_func) + 1 +
> +		   strlen(dhpb->dthpb_name) + 1;
>  
> -	memset(probe_msg, 0, probe_msg_size);
> +	msg = malloc(msg_size);
> +	if (!msg)
> +		goto oom;
>  
> -	probe_msg->size = probe_msg_size;
> -	probe_msg->type = DIT_PROBE;
> -	probe_msg->probe.ntp = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
> -	ptr = stpcpy(probe_msg->probe.name, dhpb->dthpb_mod);
> +	memset(msg, 0, msg_size);
> +
> +	msg->size = msg_size;
> +	msg->type = DIT_PROBE;
> +	msg->probe.ntp = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
> +	msg->probe.nargc = dhpb->dthpb_nargc;
> +	msg->probe.xargc = dhpb->dthpb_xargc;
> +
> +	ptr = stpcpy(msg->probe.name, dhpb->dthpb_mod);
>  	ptr++;
>  	ptr = stpcpy(ptr, dhpb->dthpb_func);
>  	ptr++;
>  	strcpy(ptr, dhpb->dthpb_name);
> -	dof_parser_write_one(out, probe_msg, probe_msg_size);
> +	dof_parser_write_one(out, msg, msg_size);
>  
> -	free(probe_msg);
> +	free(msg);
>  
> -	/* XXX TODO translated args
> -	   pp->ftp_nargs = dhpb->dthpb_xargc;
> -	   pp->ftp_xtypes = dhpb->dthpb_xtypes;
> -	   pp->ftp_ntypes = dhpb->dthpb_ntypes;
> -	*/
> +	/*
> +	 * Emit info on all native and translated args in turn.
> +	 *
> +	 * FIXME: this code is a bit repetitious.
> +	 *
> +	 * First native args (if any).
> +	 */
> +
> +	if (dhpb->dthpb_nargc > 0) {
> +		size_t	nargs_size;
> +
> +		nargs_size = strings_len(dhpb->dthpb_ntypes, dhpb->dthpb_nargc);
> +		msg_size = offsetof(dof_parsed_t, nargs.args) + nargs_size;
> +
> +		msg = malloc(msg_size);
> +		if (!msg)
> +			goto oom;
> +
> +		memset(msg, 0, msg_size);
> +
> +		msg->size = msg_size;
> +		msg->type = DIT_ARGS_NATIVE;
> +		memcpy(msg->nargs.args, dhpb->dthpb_ntypes, nargs_size);
> +		dof_parser_write_one(out, msg, msg_size);
> +
> +		free(msg);
> +
> +		/* Then translated args. */
> +
> +		if (dhpb->dthpb_xargc > 0) {
> +			size_t	xargs_size, map_size;
> +
> +			xargs_size = strings_len(dhpb->dthpb_xtypes,
> +						 dhpb->dthpb_xargc);
> +			msg_size = offsetof(dof_parsed_t, xargs.args) +
> +				   xargs_size;
> +
> +			msg = malloc(msg_size);
> +			if (!msg)
> +				goto oom;
> +
> +			memset(msg, 0, msg_size);
> +
> +			msg->size = msg_size;
> +			msg->type = DIT_ARGS_XLAT;
> +			memcpy(msg->xargs.args, dhpb->dthpb_xtypes, xargs_size);
> +			dof_parser_write_one(out, msg, msg_size);
> +
> +			free(msg);
> +
> +			/* Then the mapping table. */
> +
> +			map_size = dhpb->dthpb_xargc * sizeof(int8_t);
> +			msg_size = offsetof(dof_parsed_t, argmap.argmap) +
> +				   map_size;
> +
> +			msg = malloc(msg_size);
> +			if (!msg)
> +				goto oom;
> +
> +			memset(msg, 0, msg_size);
> +
> +			msg->size = msg_size;
> +			msg->type = DIT_ARGS_MAP;
> +			memcpy(msg->argmap.argmap, dhpb->dthpb_args, map_size);
> +			dof_parser_write_one(out, msg, msg_size);
> +			free(msg);
> +		}
> +	}
>  
>  	/*
>  	 * Emit info on each tracepoint in turn.
> @@ -938,18 +1024,10 @@ emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
>  	for (i = 0; i < dhpb->dthpb_nenoffs; i++)
>  		emit_tp(out, dhpb->dthpb_base, dhpb->dthpb_enoffs[i], 1);
>  
> -	/*
> -	 * XXX later:
> -	 * If the arguments are shuffled around we set the argument remapping
> -	 * table. Later, when the probe fires, we only remap the arguments
> -	 * if the table is non-NULL.
> -	 *
> -	for (i = 0; i < dhpb->dthpb_xargc; i++) {
> -		if (dhpb->dthpb_args[i] != i) {
> -			pp->ftp_argmap = dhpb->dthpb_args;
> -			break;
> -		}
> -	} */
> +	return;
> + oom:
> +	dof_error(out, ENOMEM, "Out of memory allocating %zi bytes for probe",
> +		  msg_size);
>  }
>  
>  static void
> diff --git a/libcommon/dof_parser.h b/libcommon/dof_parser.h
> index d3a6836f15fd..8f42d00551e3 100644
> --- a/libcommon/dof_parser.h
> +++ b/libcommon/dof_parser.h
> @@ -15,10 +15,19 @@
>  #include <dtrace/helpers.h>
>  
>  /*
> - * Result of DOF probe parsing.  We receive a provider info structure, followed
> - * by N probe info structures each of which is followed by possibly many
> - * tracepoint info structures, all tagged.  Things not yet used for probe
> - * creation (like args, translated types, etc) are not returned.
> + * Result of DOF probe parsing.  The order of elements in the parsed stream
> + * is:
> + *
> + * DIT_PROVIDER (at least 1, which contains...)
> + *   DIT_PROBE (at least 1, each of which has...)
> + *     DIT_ARGS_NATIVE (1, optional)
> + *     DIT_ARGS_XLAT (1, optional)
> + *     DIT_ARGS_MAP (1, optional)
> + *     DIT_TRACEPOINT (any number >= 1)
> + *
> + * The dof_parsed.provider.flags word indicates the presence of the
> + * various optional args records in the following stream (you can rely on
> + * them if it simplifies things, but you don't have to).
>   *
>   * On error, a DIT_ERR structure is returned with an error message.
>   */
> @@ -27,7 +36,10 @@ typedef enum dof_parsed_info {
>  	DIT_PROVIDER = 0,
>  	DIT_PROBE = 1,
>  	DIT_TRACEPOINT = 2,
> -	DIT_ERR = 3
> +	DIT_ERR = 3,
> +	DIT_ARGS_NATIVE = 4,
> +	DIT_ARGS_XLAT = 5,
> +	DIT_ARGS_MAP = 6,
>  } dof_parsed_info_t;
>  
>  /*
> @@ -37,7 +49,7 @@ typedef enum dof_parsed_info {
>   * start which is the version of the dof_parseds within it.  The data flowing
>   * over the stream from the seccomped parser has no such prefix.
>   */
> -#define DOF_PARSED_VERSION 1
> +#define DOF_PARSED_VERSION 2
>  
>  typedef struct dof_parsed {
>  	/*
> @@ -59,18 +71,58 @@ typedef struct dof_parsed {
>  			 */
>  			char name[1];
>  		} provider;
> +
>  		struct dpi_probe_info {
>  			/*
>  			 * Number of tracepoints that follow.
>  			 */
>  			size_t ntp;
>  
> +			/*
> +			 * Number of native arguments that follow (if > 0, a
> +			 * DIT_ARGS_NATIVE will be received).
> +			 */
> +			size_t nargc;
> +
> +			/*
> +			 * Number of xlated arguments that follow (if > 0, a
> +			 * DIT_ARGS_XLAT and DIT_ARGS_MAP will be received).
> +			 */
> +			size_t xargc;
> +
>  			/*
>  			 * Probe module, function, and name (\0-separated).
>  			 */
>  			char name[1];
>  		} probe;
>  
> +		/* V2+ only.  */
> +		struct dpi_probe_args_native_info {
> +			/*
> +			 * Array of native args.  nargc in length.
> +			 */
> +			char args[1];
> +		} nargs;
> +
> +		/* V2+ only.  */
> +		struct dpi_probe_args_xlat_info {
> +			/*
> +			 * Array of translated args.  xargc in length.
> +			 */
> +			char args[1];
> +		} xargs;
> +
> +		/*
> +		 * V2+ only.
> +		 */
> +		struct dpi_probe_args_map_info {
> +			/*
> +			 * Mapping from native arg index to xlated arg index.
> +			 * xargc in length.
> +			 */
> +			int8_t argmap[1];
> +		} argmap;
> +
>  		struct dpi_tracepoint_info {
>  			/*
>  			 * Offset of this tracepoint.
> diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
> index 41a87955ec1b..71ac1754d343 100644
> --- a/libdtrace/dt_pid.c
> +++ b/libdtrace/dt_pid.c
> @@ -866,6 +866,9 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
>  		uint64_t *dof_version;
>  		char *prv, *mod, *fun, *prb;
>  		dof_parsed_t *provider, *probe;
> +		ssize_t nargvlen = 0, xargvlen = 0;
> +		char *nargv = NULL, *xargv = NULL;
> +		int8_t *argmap = NULL;
>  
>  		/*
>  		 * Regular files only: in particular, skip . and ..,
> @@ -917,6 +920,47 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
>  		p += probe->size;
>  		seen_size += probe->size;
>  
> +		/*
> +		 * Assume the order given in dof_parser.h, for simplicity.
> +		 */
> +		if (probe->probe.nargc > 0) {
> +			dof_parsed_t *args = (dof_parsed_t *) p;
> +
> +			if (!validate_dof_record(path, args, DIT_ARGS_NATIVE,
> +						 dof_buf_size, seen_size))
> +				goto parse_err;
> +
> +			nargv = args->nargs.args;
> +			nargvlen = args->size - offsetof(dof_parsed_t, nargs.args);
> +			assert(nargvlen >= 0);
> +
> +			p += args->size;
> +			seen_size += args->size;
> +		}
> +		if (probe->probe.xargc > 0) {
> +			dof_parsed_t *args = (dof_parsed_t *) p;
> +
> +			if (!validate_dof_record(path, args, DIT_ARGS_XLAT,
> +						 dof_buf_size, seen_size))
> +				goto parse_err;
> +
> +			xargv = args->xargs.args;
> +			xargvlen = args->size - offsetof(dof_parsed_t, xargs.args);
> +			assert(xargvlen >= 0);
> +
> +			p += args->size;
> +			seen_size += args->size;
> +
> +			if (!validate_dof_record(path, args, DIT_ARGS_MAP,
> +						 dof_buf_size, seen_size))
> +				goto parse_err;
> +
> +			argmap = args->argmap.argmap;
> +
> +			p += args->size;
> +			seen_size += args->size;
> +		}
> +
>  		/*
>  		 * Now the parsed DOF for this probe's tracepoints.
>  		 */
> @@ -967,6 +1011,21 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
>  			psp.pps_off = tp->tracepoint.addr - pmp->pr_file->first_segment->pr_vaddr;
>  			psp.pps_nameoff = 0;
>  
> +			if (nargv) {
> +				psp.pps_nargc = probe->probe.nargc;
> +				psp.pps_nargvlen = nargvlen;
> +				psp.pps_nargv = nargv;
> +			}
> +
> +			if (xargv) {
> +				psp.pps_xargc = probe->probe.xargc;
> +				psp.pps_xargvlen = xargvlen;
> +				psp.pps_xargv = xargv;
> +			}
> +
> +			if (argmap)
> +				psp.pps_argmap = argmap;
> +
>  			dt_dprintf("providing %s:%s:%s:%s for pid %d\n", psp.pps_prv,
>  				   psp.pps_mod, psp.pps_fun, psp.pps_prb, psp.pps_pid);
>  			if (pvp->impl->provide_probe(dtp, &psp) < 0) {
> -- 
> 2.46.0.278.g36e3a12567
> 



More information about the DTrace-devel mailing list