[DTrace-devel] [PATCH v4 1/5] usdt: get arg types and xlations into DTrace from the DOF

Kris Van Hees kris.van.hees at oracle.com
Sat Nov 2 00:14:49 UTC 2024


On Fri, Nov 01, 2024 at 07:46:04PM -0400, Kris Van Hees via DTrace-devel wrote:
> Two comments below (one tiny thing, and one explanatory comment that I *think*
> I have a better suggestion for, but you may want to rephrase it).

Actually, another issue (see below).

> On Fri, Nov 01, 2024 at 03:57:08PM +0000, Nick Alcock via DTrace-devel wrote:
> > This change propagates native and xlated arg types and mapping info all
> > the way from the DOF, through the parser and dtprobed and the DOF stash,
> > into DTrace, where they end up in the pid_probespec_t for USDT probes.
> > We don't do anything with them once they get there in this commit: no
> > user-visible impacts expected.
> > 
> > We bump the DOF_PARSED_VERSION since we add three new types of record to the
> > dof_parser_t, all optional, covering native and xlated args and arg mapping
> > tables; to make life easier for consumers we emit them in a defined order
> > (documented in dof_parser.h), and add arg counts to the DIT_PROBE record
> > that precedes them indicating which will be present and how many args are in
> > them.  This means we retain the property that you can always tell which
> > records within a provider are coming next purely from records you already
> > have: there's no need to make decisions once the records turn up.  The DOF
> > stash hardly changes: all that happens is that the parsed data written to
> > each per-probe file gains some extra types of record (it can have
> > DIT_ARGS_NATIVE, DIT_ARGS_XLAT and DIT_ARGS_MAP entries following the
> > DIT_PROBE record now).
> > 
> > As usual with DOF_PARSED_VERSION bumps, DTraces that cross this change will
> > refuse to read probes added by dtprobeds that are on the other side of it.
> > (Restarting dtprobed will reparse all the probes in the stash to match the
> > new layout, and newly-started DTraces will pick that up -- so this only
> > affects running DTraces picking up new probes, which DTrace cannot do yet:
> > so no visible effects are expected.)
> > 
> > The code is a bit repetitive, with nargs, xargs and arg mapping all handled
> > separately even though the code is very similar.  In future maybe we could
> > unify them (but my attempts led to code that was much harder to read than
> > the original).
> > 
> > Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
> > ---
> >  dtprobed/dof_stash.c   |  15 +++--
> >  dtprobed/dtprobed.c    |  10 ++-
> >  include/dtrace/pid.h   |   7 ++
> >  libcommon/dof_parser.c | 150 +++++++++++++++++++++++++++++++----------
> >  libcommon/dof_parser.h |  64 ++++++++++++++++--
> >  libdtrace/dt_pid.c     |  59 ++++++++++++++++
> >  6 files changed, 256 insertions(+), 49 deletions(-)
> > 
> > diff --git a/dtprobed/dof_stash.c b/dtprobed/dof_stash.c
> > index 0e639076f655..8bf465678217 100644
> > --- a/dtprobed/dof_stash.c
> > +++ b/dtprobed/dof_stash.c
> > @@ -59,8 +59,10 @@
> >   *    startup if the dof_parsed_t structure changed.  The file consists of a
> >   *    uint64_t version number (DOF_PARSED_VERSION), then a stream of
> >   *    dof_parsed_t records, the first of type DIT_PROVIDER, the second
> > - *    DIT_PROBE, then as many struct dof_parsed_t's of type DIT_TRACEPOINT as
> > - *    the DIT_PROBE record specifies.
> > + *    DIT_PROBE, then optionally some DIT_*ARGS records (if the count of native
> > + *    args in the probe is >0, you get a DIT_NATIVE_ARGS: if the count of xlated
> > + *    args is >0, you get DIT_XLAT_ARGS and DIT_MAP_ARGS), then as many struct
> > + *    dof_parsed_t's of type DIT_TRACEPOINT as the DIT_PROBE record specifies.
> >   *
> >   * /run/dtrace/probes/: Per-probe info, written by dtprobed, read by DTrace.
> >   *
> > @@ -640,9 +642,14 @@ dof_stash_write_parsed(pid_t pid, dev_t dev, ino_t ino, dt_list_t *accum)
> >  			break;
> >  		}
> >  
> > -		/* Tracepoint: add to already-open file.  */
> > +		/* Args info or tracepoint: add to already-open file.  */
> > +		case DIT_ARGS_NATIVE:
> > +		case DIT_ARGS_XLAT:
> > +		case DIT_ARGS_MAP:
> >  		case DIT_TRACEPOINT:
> > -			assert(state == DIT_PROBE || state == DIT_TRACEPOINT);
> > +			assert(state == DIT_PROBE || state == DIT_ARGS_NATIVE ||
> > +			       state == DIT_ARGS_XLAT || state == DIT_ARGS_MAP ||
> > +			       state == DIT_TRACEPOINT);
> >  			state = accump->parsed->type;
> >  
> >  			if (write_chunk(parsed_fd, accump->parsed, accump->parsed->size) < 0)
> > diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
> > index f4c6be1e045d..2ca39b26f88a 100644
> > --- a/dtprobed/dtprobed.c
> > +++ b/dtprobed/dtprobed.c
> > @@ -791,16 +791,20 @@ process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dev_t exec_dev,
> >  		if (dof_stash_push_parsed(&accum, probe) < 0)
> >  			goto oom;
> >  
> > -		for (j = 0; j < probe->probe.ntp; j++) {
> > +		j = 0;
> > +		do {
> >  			dof_parsed_t *tp = dof_read(pid, in);
> >  
> >  			errmsg = "no tracepoints in a probe, or parse state corrupt";
> > -			if (!tp || tp->type != DIT_TRACEPOINT)
> > +			if (!tp || tp->type == DIT_PROVIDER || tp->type == DIT_PROBE)
> >  				goto err;
> >  
> >  			if (dof_stash_push_parsed(&accum, tp) < 0)
> >  				goto oom;
> > -		}
> > +
> > +			if (tp->type == DIT_TRACEPOINT)
> > +				j++;
> > +		} while (j < probe->probe.ntp);
> >  	}
> >  
> >  	if (!reparsing)
> > diff --git a/include/dtrace/pid.h b/include/dtrace/pid.h
> > index 0129674adf0c..25bfacfdbfe2 100644
> > --- a/include/dtrace/pid.h
> > +++ b/include/dtrace/pid.h
> > @@ -35,6 +35,13 @@ typedef struct pid_probespec {
> >  	ino_t pps_inum;				/* object inode */
> >  	char *pps_fn;				/* object full filename */
> >  	uint64_t pps_off;			/* probe offset (in object) */
> > +	int pps_nargc;				/* number of native args */
> > +	int pps_xargc;				/* number of xlated and mapped args */
> > +	char *pps_nargv;			/* array of native args */
> > +	size_t pps_nargvlen;			/* (high estimate of) length of array */
> > +	char *pps_xargv;			/* array of xlated args */
> > +	size_t pps_xargvlen;			/* (high estimate of) length of array */
> > +	int8_t *pps_argmap;			/* mapped arg indexes */
> >  
> >  	/*
> >  	 * Fields below this point do not apply to underlying probes.
> > diff --git a/libcommon/dof_parser.c b/libcommon/dof_parser.c
> > index d6631a1afdcb..dd3a6cd2b887 100644
> > --- a/libcommon/dof_parser.c
> > +++ b/libcommon/dof_parser.c
> > @@ -807,10 +807,11 @@ validate_provider(int out, dof_hdr_t *dof, dof_sec_t *sec)
> >  		}
> >  
> >  		dt_dbg_dof("      Probe %d %s:%s:%s:%s with %d offsets, "
> > -			   "%d is-enabled offsets\n", j,
> > +			   "%d is-enabled offsets, %i args, %i nargs, argidx %i\n", j,
> >  			   strtab + prov->dofpv_name, "",
> >  			   strtab + prb->dofpr_func, strtab + prb->dofpr_name,
> > -			   prb->dofpr_noffs, prb->dofpr_nenoffs);
> > +			   prb->dofpr_noffs, prb->dofpr_nenoffs,
> > +			   prb->dofpr_xargc, prb->dofpr_nargc, prb->dofpr_argidx);
> >  	}
> >  
> >  	return 0;
> > @@ -879,12 +880,26 @@ validate_probe(int out, dtrace_helper_probedesc_t *dhpb)
> >  	return 0;
> >  }
> >  
> > +static size_t
> > +strings_len(const char *strtab, size_t count)
> > +{
> > +	size_t len = 0;
> > +
> > +	for (; count > 0; count--) {
> > +		size_t this_len = strlen(strtab) + 1;
> > +
> > +		len += this_len;
> > +		strtab += this_len;
> > +	}
> > +	return len;
> > +}

What if the strtab blob does not contain enough strings?  There is no check
here to ensure that you do not start reading past the end of data?

> > +
> >  static void
> >  emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
> >  {
> >  	int		i;
> > -	dof_parsed_t	*probe_msg;
> > -	size_t		probe_msg_size;
> > +	dof_parsed_t	*msg;
> > +	size_t		msg_size;
> >  	char		*ptr;
> >  
> >  	dt_dbg_dof("      Creating probe %s:%s:%s:%s\n", dhpb->dthpb_prov,
> > @@ -893,35 +908,106 @@ emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
> >  	if (validate_probe(out, dhpb) != 0)
> >  		return;
> >  
> > -	probe_msg_size = offsetof(dof_parsed_t, probe.name) +
> > -	    strlen(dhpb->dthpb_mod) + 1 + strlen(dhpb->dthpb_func) + 1 +
> > -	    strlen(dhpb->dthpb_name) + 1;
> > +	/*
> > +	 * Compute the size of all the optional elements first, to fill out the
> > +	 * flags.
> > +	 */
> >  
> > -	probe_msg = malloc(probe_msg_size);
> > -	if (!probe_msg) {
> > -		dof_error(out, ENOMEM, "Out of memory allocating probe");
> > -		return;
> > -	}
> > +	msg_size = offsetof(dof_parsed_t, probe.name) +
> > +		   strlen(dhpb->dthpb_mod) + 1 +
> > +		   strlen(dhpb->dthpb_func) + 1 +
> > +		   strlen(dhpb->dthpb_name) + 1;
> >  
> > -	memset(probe_msg, 0, probe_msg_size);
> > +	msg = malloc(msg_size);
> > +	if (!msg)
> > +		goto oom;
> >  
> > -	probe_msg->size = probe_msg_size;
> > -	probe_msg->type = DIT_PROBE;
> > -	probe_msg->probe.ntp = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
> > -	ptr = stpcpy(probe_msg->probe.name, dhpb->dthpb_mod);
> > +	memset(msg, 0, msg_size);
> > +
> > +	msg->size = msg_size;
> > +	msg->type = DIT_PROBE;
> > +	msg->probe.ntp = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
> > +	msg->probe.nargc = dhpb->dthpb_nargc;
> > +	msg->probe.xargc = dhpb->dthpb_xargc;
> > +
> > +	ptr = stpcpy(msg->probe.name, dhpb->dthpb_mod);
> >  	ptr++;
> >  	ptr = stpcpy(ptr, dhpb->dthpb_func);
> >  	ptr++;
> >  	strcpy(ptr, dhpb->dthpb_name);
> > -	dof_parser_write_one(out, probe_msg, probe_msg_size);
> > +	dof_parser_write_one(out, msg, msg_size);
> >  
> > -	free(probe_msg);
> > +	free(msg);
> >  
> > -	/* XXX TODO translated args
> > -	   pp->ftp_nargs = dhpb->dthpb_xargc;
> > -	   pp->ftp_xtypes = dhpb->dthpb_xtypes;
> > -	   pp->ftp_ntypes = dhpb->dthpb_ntypes;
> > -	*/
> > +	/*
> > +	 * Emit info on all native and translated args in turn.
> > +	 *
> > +	 * FIXME: this code is a bit repetitious.
> > +	 *
> > +	 * First native args (if any).
> > +	 */
> > +
> > +	if (dhpb->dthpb_nargc > 0) {
> > +		size_t	nargs_size;
> > +
> > +		nargs_size = strings_len(dhpb->dthpb_ntypes, dhpb->dthpb_nargc);

No validation that there are nargc type strings.

> > +		msg_size = offsetof(dof_parsed_t, nargs.args) + nargs_size;
> > +
> > +		msg = malloc(msg_size);
> > +		if (!msg)
> > +			goto oom;
> > +
> > +		memset(msg, 0, msg_size);
> > +
> > +		msg->size = msg_size;
> > +		msg->type = DIT_ARGS_NATIVE;
> > +		memcpy(msg->nargs.args, dhpb->dthpb_ntypes, nargs_size);
> > +		dof_parser_write_one(out, msg, msg_size);
> > +
> > +		free(msg);
> > +
> > +		/* Then translated args. */
> > +
> > +		if (dhpb->dthpb_xargc > 0) {
> > +			size_t	xargs_size, map_size;
> > +
> > +			xargs_size = strings_len(dhpb->dthpb_xtypes,
> > +						 dhpb->dthpb_xargc);

No validation that there are nargc type strings.

> > +			msg_size = offsetof(dof_parsed_t, xargs.args) +
> > +				   xargs_size;
> > +
> > +			msg = malloc(msg_size);
> > +			if (!msg)
> > +				goto oom;
> > +
> > +			memset(msg, 0, msg_size);
> > +
> > +			msg->size = msg_size;
> > +			msg->type = DIT_ARGS_XLAT;
> > +			memcpy(msg->xargs.args, dhpb->dthpb_xtypes, xargs_size);
> > +			dof_parser_write_one(out, msg, msg_size);
> > +
> > +			free(msg);
> > +
> > +			/* Then the remapping table. */
> 
> remapping -> mapping
> 
> > +
> > +			map_size = dhpb->dthpb_xargc * sizeof(int8_t);
> > +			msg_size = offsetof(dof_parsed_t, argmap.argmap) +
> > +				   map_size;
> > +
> > +			msg = malloc(msg_size);
> > +			if (!msg)
> > +				goto oom;
> > +
> > +			memset(msg, 0, msg_size);
> > +
> > +			msg->size = msg_size;
> > +			msg->type = DIT_ARGS_MAP;
> > +			memcpy(msg->argmap.argmap, dhpb->dthpb_args, map_size);

Is there any validation anywhere that there are map_size bytes to read from
dhpb->dthpb_args?  Is there any validation anywhere here (or in a later patch)
that the entries are valid (between 0 and nargc)?

> > +			dof_parser_write_one(out, msg, msg_size);
> > +			free(msg);
> > +		}
> > +	}
> >  
> >  	/*
> >  	 * Emit info on each tracepoint in turn.
> > @@ -938,18 +1024,10 @@ emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
> >  	for (i = 0; i < dhpb->dthpb_nenoffs; i++)
> >  		emit_tp(out, dhpb->dthpb_base, dhpb->dthpb_enoffs[i], 1);
> >  
> > -	/*
> > -	 * XXX later:
> > -	 * If the arguments are shuffled around we set the argument remapping
> > -	 * table. Later, when the probe fires, we only remap the arguments
> > -	 * if the table is non-NULL.
> > -	 *
> > -	for (i = 0; i < dhpb->dthpb_xargc; i++) {
> > -		if (dhpb->dthpb_args[i] != i) {
> > -			pp->ftp_argmap = dhpb->dthpb_args;
> > -			break;
> > -		}
> > -	} */
> > +	return;
> > + oom:
> > +	dof_error(out, ENOMEM, "Out of memory allocating %zi bytes for probe",
> > +		  msg_size);
> >  }
> >  
> >  static void
> > diff --git a/libcommon/dof_parser.h b/libcommon/dof_parser.h
> > index d3a6836f15fd..8f42d00551e3 100644
> > --- a/libcommon/dof_parser.h
> > +++ b/libcommon/dof_parser.h
> > @@ -15,10 +15,19 @@
> >  #include <dtrace/helpers.h>
> >  
> >  /*
> > - * Result of DOF probe parsing.  We receive a provider info structure, followed
> > - * by N probe info structures each of which is followed by possibly many
> > - * tracepoint info structures, all tagged.  Things not yet used for probe
> > - * creation (like args, translated types, etc) are not returned.
> > + * Result of DOF probe parsing.  The order of elements in the parsed stream
> > + * is:
> > + *
> > + * DIT_PROVIDER (at least 1, which contains...)
> > + *   DIT_PROBE (at least 1, each of which has...)
> > + *     DIT_ARGS_NATIVE (1, optional)
> > + *     DIT_ARGS_XLAT (1, optional)
> > + *     DIT_ARGS_MAP (1, optional)
> > + *     DIT_TRACEPOINT (any number >= 1)
> > + *
> > + * The dof_parsed.provider.flags word indicates the presence of the
> > + * various optional args records in the following stream (you can rely on
> > + * them if it simplifies things, but you don't have to).
> >   *
> >   * On error, a DIT_ERR structure is returned with an error message.
> >   */
> > @@ -27,7 +36,10 @@ typedef enum dof_parsed_info {
> >  	DIT_PROVIDER = 0,
> >  	DIT_PROBE = 1,
> >  	DIT_TRACEPOINT = 2,
> > -	DIT_ERR = 3
> > +	DIT_ERR = 3,
> > +	DIT_ARGS_NATIVE = 4,
> > +	DIT_ARGS_XLAT = 5,
> > +	DIT_ARGS_MAP = 6,
> >  } dof_parsed_info_t;
> >  
> >  /*
> > @@ -37,7 +49,7 @@ typedef enum dof_parsed_info {
> >   * start which is the version of the dof_parseds within it.  The data flowing
> >   * over the stream from the seccomped parser has no such prefix.
> >   */
> > -#define DOF_PARSED_VERSION 1
> > +#define DOF_PARSED_VERSION 2
> >  
> >  typedef struct dof_parsed {
> >  	/*
> > @@ -59,18 +71,58 @@ typedef struct dof_parsed {
> >  			 */
> >  			char name[1];
> >  		} provider;
> > +
> >  		struct dpi_probe_info {
> >  			/*
> >  			 * Number of tracepoints that follow.
> >  			 */
> >  			size_t ntp;
> >  
> > +			/*
> > +			 * Number of native arguments that follow (if > 0, a
> > +			 * DIT_ARGS_NATIVE will be received).
> > +			 */
> > +			size_t nargc;
> > +
> > +			/*
> > +			 * Number of xlated arguments that follow (if > 0, a
> > +			 * DIT_ARGS_XLAT and DIT_ARGS_MAP will be received).
> > +			 */
> > +			size_t xargc;
> > +
> >  			/*
> >  			 * Probe module, function, and name (\0-separated).
> >  			 */
> >  			char name[1];
> >  		} probe;
> >  
> > +		/* V2+ only.  */
> > +		struct dpi_probe_args_native_info {
> > +			/*
> > +			 * Array of native args.  nargc in length.
> > +			 */
> > +			char args[1];
> > +		} nargs;
> > +
> > +		/* V2+ only.  */
> > +		struct dpi_probe_args_xlat_info {
> > +			/*
> > +			 * Array of translated args.  xargc in length.
> > +			 */
> > +			char args[1];
> > +		} xargs;
> > +
> > +		/*
> > +		 * V2+ only.
> > +		 */
> > +		struct dpi_probe_args_map_info {
> > +			/*
> > +			 * Mapping from native arg index to xlated arg index.
> > +			 * xargc in length.
> 
> This comment is confusing...  I think it might be better served by really
> spelling out which is which.  I.e. each of the xargc elements specifies the
> native arg index that the translated arg gets its value from.
> 
> > +			 */
> > +			int8_t argmap[1];
> > +		} argmap;
> > +
> >  		struct dpi_tracepoint_info {
> >  			/*
> >  			 * Offset of this tracepoint.
> > diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
> > index 41a87955ec1b..71ac1754d343 100644
> > --- a/libdtrace/dt_pid.c
> > +++ b/libdtrace/dt_pid.c
> > @@ -866,6 +866,9 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> >  		uint64_t *dof_version;
> >  		char *prv, *mod, *fun, *prb;
> >  		dof_parsed_t *provider, *probe;
> > +		ssize_t nargvlen = 0, xargvlen = 0;
> > +		char *nargv = NULL, *xargv = NULL;
> > +		int8_t *argmap = NULL;
> >  
> >  		/*
> >  		 * Regular files only: in particular, skip . and ..,
> > @@ -917,6 +920,47 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> >  		p += probe->size;
> >  		seen_size += probe->size;
> >  
> > +		/*
> > +		 * Assume the order given in dof_parser.h, for simplicity.
> > +		 */
> > +		if (probe->probe.nargc > 0) {
> > +			dof_parsed_t *args = (dof_parsed_t *) p;
> > +
> > +			if (!validate_dof_record(path, args, DIT_ARGS_NATIVE,
> > +						 dof_buf_size, seen_size))
> > +				goto parse_err;
> > +
> > +			nargv = args->nargs.args;
> > +			nargvlen = args->size - offsetof(dof_parsed_t, nargs.args);
> > +			assert(nargvlen >= 0);
> > +
> > +			p += args->size;
> > +			seen_size += args->size;
> > +		}
> > +		if (probe->probe.xargc > 0) {
> > +			dof_parsed_t *args = (dof_parsed_t *) p;
> > +
> > +			if (!validate_dof_record(path, args, DIT_ARGS_XLAT,
> > +						 dof_buf_size, seen_size))
> > +				goto parse_err;
> > +
> > +			xargv = args->xargs.args;
> > +			xargvlen = args->size - offsetof(dof_parsed_t, xargs.args);
> > +			assert(xargvlen >= 0);
> > +
> > +			p += args->size;
> > +			seen_size += args->size;
> > +
> > +			if (!validate_dof_record(path, args, DIT_ARGS_MAP,
> > +						 dof_buf_size, seen_size))
> > +				goto parse_err;
> > +
> > +			argmap = args->argmap.argmap;
> > +
> > +			p += args->size;
> > +			seen_size += args->size;
> > +		}
> > +
> >  		/*
> >  		 * Now the parsed DOF for this probe's tracepoints.
> >  		 */
> > @@ -967,6 +1011,21 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> >  			psp.pps_off = tp->tracepoint.addr - pmp->pr_file->first_segment->pr_vaddr;
> >  			psp.pps_nameoff = 0;
> >  
> > +			if (nargv) {
> > +				psp.pps_nargc = probe->probe.nargc;
> > +				psp.pps_nargvlen = nargvlen;
> > +				psp.pps_nargv = nargv;
> > +			}
> > +
> > +			if (xargv) {
> > +				psp.pps_xargc = probe->probe.xargc;
> > +				psp.pps_xargvlen = xargvlen;
> > +				psp.pps_xargv = xargv;
> > +			}
> > +
> > +			if (argmap)
> > +				psp.pps_argmap = argmap;
> > +
> >  			dt_dprintf("providing %s:%s:%s:%s for pid %d\n", psp.pps_prv,
> >  				   psp.pps_mod, psp.pps_fun, psp.pps_prb, psp.pps_pid);
> >  			if (pvp->impl->provide_probe(dtp, &psp) < 0) {
> > -- 
> > 2.46.0.278.g36e3a12567
> > 
> > 
> > _______________________________________________
> > DTrace-devel mailing list
> > DTrace-devel at oss.oracle.com
> > https://oss.oracle.com/mailman/listinfo/dtrace-devel
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list