[DTrace-devel] [PATCH v4 1/5] usdt: get arg types and xlations into DTrace from the DOF
Kris Van Hees
kris.van.hees at oracle.com
Sat Nov 2 00:14:49 UTC 2024
On Fri, Nov 01, 2024 at 07:46:04PM -0400, Kris Van Hees via DTrace-devel wrote:
> Two comments below (one tiny thing, and one explanatory comment that I *think*
> I have a better suggestion for, but you may want to rephrase it).
Actually, another issue (see below).
> On Fri, Nov 01, 2024 at 03:57:08PM +0000, Nick Alcock via DTrace-devel wrote:
> > This change propagates native and xlated arg types and mapping info all
> > the way from the DOF, through the parser and dtprobed and the DOF stash,
> > into DTrace, where they end up in the pid_probespec_t for USDT probes.
> > We don't do anything with them once they get there in this commit: no
> > user-visible impacts expected.
> >
> > We bump the DOF_PARSED_VERSION since we add three new types of record to the
> > dof_parser_t, all optional, covering native and xlated args and arg mapping
> > tables; to make life easier for consumers we emit them in a defined order
> > (documented in dof_parser.h), and add arg counts to the DIT_PROBE record
> > that precedes them indicating which will be present and how many args are in
> > them. This means we retain the property that you can always tell which
> > records within a provider are coming next purely from records you already
> > have: there's no need to make decisions once the records turn up. The DOF
> > stash hardly changes: all that happens is that the parsed data written to
> > each per-probe file gains some extra types of record (it can have
> > DIT_ARGS_NATIVE, DIT_ARGS_XLAT and DIT_ARGS_MAP entries following the
> > DIT_PROBE record now).
> >
> > As usual with DOF_PARSED_VERSION bumps, DTraces that cross this change will
> > refuse to read probes added by dtprobeds that are on the other side of it.
> > (Restarting dtprobed will reparse all the probes in the stash to match the
> > new layout, and newly-started DTraces will pick that up -- so this only
> > affects running DTraces picking up new probes, which DTrace cannot do yet:
> > so no visible effects are expected.)
> >
> > The code is a bit repetitive, with nargs, xargs and arg mapping all handled
> > separately even though the code is very similar. In future maybe we could
> > unify them (but my attempts led to code that was much harder to read than
> > the original).
> >
> > Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
> > ---
> > dtprobed/dof_stash.c | 15 +++--
> > dtprobed/dtprobed.c | 10 ++-
> > include/dtrace/pid.h | 7 ++
> > libcommon/dof_parser.c | 150 +++++++++++++++++++++++++++++++----------
> > libcommon/dof_parser.h | 64 ++++++++++++++++--
> > libdtrace/dt_pid.c | 59 ++++++++++++++++
> > 6 files changed, 256 insertions(+), 49 deletions(-)
> >
> > diff --git a/dtprobed/dof_stash.c b/dtprobed/dof_stash.c
> > index 0e639076f655..8bf465678217 100644
> > --- a/dtprobed/dof_stash.c
> > +++ b/dtprobed/dof_stash.c
> > @@ -59,8 +59,10 @@
> > * startup if the dof_parsed_t structure changed. The file consists of a
> > * uint64_t version number (DOF_PARSED_VERSION), then a stream of
> > * dof_parsed_t records, the first of type DIT_PROVIDER, the second
> > - * DIT_PROBE, then as many struct dof_parsed_t's of type DIT_TRACEPOINT as
> > - * the DIT_PROBE record specifies.
> > + * DIT_PROBE, then optionally some DIT_*ARGS records (if the count of native
> > + * args in the probe is >0, you get a DIT_NATIVE_ARGS: if the count of xlated
> > + * args is >0, you get DIT_XLAT_ARGS and DIT_MAP_ARGS), then as many struct
> > + * dof_parsed_t's of type DIT_TRACEPOINT as the DIT_PROBE record specifies.
> > *
> > * /run/dtrace/probes/: Per-probe info, written by dtprobed, read by DTrace.
> > *
> > @@ -640,9 +642,14 @@ dof_stash_write_parsed(pid_t pid, dev_t dev, ino_t ino, dt_list_t *accum)
> > break;
> > }
> >
> > - /* Tracepoint: add to already-open file. */
> > + /* Args info or tracepoint: add to already-open file. */
> > + case DIT_ARGS_NATIVE:
> > + case DIT_ARGS_XLAT:
> > + case DIT_ARGS_MAP:
> > case DIT_TRACEPOINT:
> > - assert(state == DIT_PROBE || state == DIT_TRACEPOINT);
> > + assert(state == DIT_PROBE || state == DIT_ARGS_NATIVE ||
> > + state == DIT_ARGS_XLAT || state == DIT_ARGS_MAP ||
> > + state == DIT_TRACEPOINT);
> > state = accump->parsed->type;
> >
> > if (write_chunk(parsed_fd, accump->parsed, accump->parsed->size) < 0)
> > diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
> > index f4c6be1e045d..2ca39b26f88a 100644
> > --- a/dtprobed/dtprobed.c
> > +++ b/dtprobed/dtprobed.c
> > @@ -791,16 +791,20 @@ process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dev_t exec_dev,
> > if (dof_stash_push_parsed(&accum, probe) < 0)
> > goto oom;
> >
> > - for (j = 0; j < probe->probe.ntp; j++) {
> > + j = 0;
> > + do {
> > dof_parsed_t *tp = dof_read(pid, in);
> >
> > errmsg = "no tracepoints in a probe, or parse state corrupt";
> > - if (!tp || tp->type != DIT_TRACEPOINT)
> > + if (!tp || tp->type == DIT_PROVIDER || tp->type == DIT_PROBE)
> > goto err;
> >
> > if (dof_stash_push_parsed(&accum, tp) < 0)
> > goto oom;
> > - }
> > +
> > + if (tp->type == DIT_TRACEPOINT)
> > + j++;
> > + } while (j < probe->probe.ntp);
> > }
> >
> > if (!reparsing)
> > diff --git a/include/dtrace/pid.h b/include/dtrace/pid.h
> > index 0129674adf0c..25bfacfdbfe2 100644
> > --- a/include/dtrace/pid.h
> > +++ b/include/dtrace/pid.h
> > @@ -35,6 +35,13 @@ typedef struct pid_probespec {
> > ino_t pps_inum; /* object inode */
> > char *pps_fn; /* object full filename */
> > uint64_t pps_off; /* probe offset (in object) */
> > + int pps_nargc; /* number of native args */
> > + int pps_xargc; /* number of xlated and mapped args */
> > + char *pps_nargv; /* array of native args */
> > + size_t pps_nargvlen; /* (high estimate of) length of array */
> > + char *pps_xargv; /* array of xlated args */
> > + size_t pps_xargvlen; /* (high estimate of) length of array */
> > + int8_t *pps_argmap; /* mapped arg indexes */
> >
> > /*
> > * Fields below this point do not apply to underlying probes.
> > diff --git a/libcommon/dof_parser.c b/libcommon/dof_parser.c
> > index d6631a1afdcb..dd3a6cd2b887 100644
> > --- a/libcommon/dof_parser.c
> > +++ b/libcommon/dof_parser.c
> > @@ -807,10 +807,11 @@ validate_provider(int out, dof_hdr_t *dof, dof_sec_t *sec)
> > }
> >
> > dt_dbg_dof(" Probe %d %s:%s:%s:%s with %d offsets, "
> > - "%d is-enabled offsets\n", j,
> > + "%d is-enabled offsets, %i args, %i nargs, argidx %i\n", j,
> > strtab + prov->dofpv_name, "",
> > strtab + prb->dofpr_func, strtab + prb->dofpr_name,
> > - prb->dofpr_noffs, prb->dofpr_nenoffs);
> > + prb->dofpr_noffs, prb->dofpr_nenoffs,
> > + prb->dofpr_xargc, prb->dofpr_nargc, prb->dofpr_argidx);
> > }
> >
> > return 0;
> > @@ -879,12 +880,26 @@ validate_probe(int out, dtrace_helper_probedesc_t *dhpb)
> > return 0;
> > }
> >
> > +static size_t
> > +strings_len(const char *strtab, size_t count)
> > +{
> > + size_t len = 0;
> > +
> > + for (; count > 0; count--) {
> > + size_t this_len = strlen(strtab) + 1;
> > +
> > + len += this_len;
> > + strtab += this_len;
> > + }
> > + return len;
> > +}
What if the strtab blob does not contain enough strings? There is no check
here to ensure that you do not start reading past the end of data?
> > +
> > static void
> > emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
> > {
> > int i;
> > - dof_parsed_t *probe_msg;
> > - size_t probe_msg_size;
> > + dof_parsed_t *msg;
> > + size_t msg_size;
> > char *ptr;
> >
> > dt_dbg_dof(" Creating probe %s:%s:%s:%s\n", dhpb->dthpb_prov,
> > @@ -893,35 +908,106 @@ emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
> > if (validate_probe(out, dhpb) != 0)
> > return;
> >
> > - probe_msg_size = offsetof(dof_parsed_t, probe.name) +
> > - strlen(dhpb->dthpb_mod) + 1 + strlen(dhpb->dthpb_func) + 1 +
> > - strlen(dhpb->dthpb_name) + 1;
> > + /*
> > + * Compute the size of all the optional elements first, to fill out the
> > + * flags.
> > + */
> >
> > - probe_msg = malloc(probe_msg_size);
> > - if (!probe_msg) {
> > - dof_error(out, ENOMEM, "Out of memory allocating probe");
> > - return;
> > - }
> > + msg_size = offsetof(dof_parsed_t, probe.name) +
> > + strlen(dhpb->dthpb_mod) + 1 +
> > + strlen(dhpb->dthpb_func) + 1 +
> > + strlen(dhpb->dthpb_name) + 1;
> >
> > - memset(probe_msg, 0, probe_msg_size);
> > + msg = malloc(msg_size);
> > + if (!msg)
> > + goto oom;
> >
> > - probe_msg->size = probe_msg_size;
> > - probe_msg->type = DIT_PROBE;
> > - probe_msg->probe.ntp = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
> > - ptr = stpcpy(probe_msg->probe.name, dhpb->dthpb_mod);
> > + memset(msg, 0, msg_size);
> > +
> > + msg->size = msg_size;
> > + msg->type = DIT_PROBE;
> > + msg->probe.ntp = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
> > + msg->probe.nargc = dhpb->dthpb_nargc;
> > + msg->probe.xargc = dhpb->dthpb_xargc;
> > +
> > + ptr = stpcpy(msg->probe.name, dhpb->dthpb_mod);
> > ptr++;
> > ptr = stpcpy(ptr, dhpb->dthpb_func);
> > ptr++;
> > strcpy(ptr, dhpb->dthpb_name);
> > - dof_parser_write_one(out, probe_msg, probe_msg_size);
> > + dof_parser_write_one(out, msg, msg_size);
> >
> > - free(probe_msg);
> > + free(msg);
> >
> > - /* XXX TODO translated args
> > - pp->ftp_nargs = dhpb->dthpb_xargc;
> > - pp->ftp_xtypes = dhpb->dthpb_xtypes;
> > - pp->ftp_ntypes = dhpb->dthpb_ntypes;
> > - */
> > + /*
> > + * Emit info on all native and translated args in turn.
> > + *
> > + * FIXME: this code is a bit repetitious.
> > + *
> > + * First native args (if any).
> > + */
> > +
> > + if (dhpb->dthpb_nargc > 0) {
> > + size_t nargs_size;
> > +
> > + nargs_size = strings_len(dhpb->dthpb_ntypes, dhpb->dthpb_nargc);
No validation that there are nargc type strings.
> > + msg_size = offsetof(dof_parsed_t, nargs.args) + nargs_size;
> > +
> > + msg = malloc(msg_size);
> > + if (!msg)
> > + goto oom;
> > +
> > + memset(msg, 0, msg_size);
> > +
> > + msg->size = msg_size;
> > + msg->type = DIT_ARGS_NATIVE;
> > + memcpy(msg->nargs.args, dhpb->dthpb_ntypes, nargs_size);
> > + dof_parser_write_one(out, msg, msg_size);
> > +
> > + free(msg);
> > +
> > + /* Then translated args. */
> > +
> > + if (dhpb->dthpb_xargc > 0) {
> > + size_t xargs_size, map_size;
> > +
> > + xargs_size = strings_len(dhpb->dthpb_xtypes,
> > + dhpb->dthpb_xargc);
No validation that there are nargc type strings.
> > + msg_size = offsetof(dof_parsed_t, xargs.args) +
> > + xargs_size;
> > +
> > + msg = malloc(msg_size);
> > + if (!msg)
> > + goto oom;
> > +
> > + memset(msg, 0, msg_size);
> > +
> > + msg->size = msg_size;
> > + msg->type = DIT_ARGS_XLAT;
> > + memcpy(msg->xargs.args, dhpb->dthpb_xtypes, xargs_size);
> > + dof_parser_write_one(out, msg, msg_size);
> > +
> > + free(msg);
> > +
> > + /* Then the remapping table. */
>
> remapping -> mapping
>
> > +
> > + map_size = dhpb->dthpb_xargc * sizeof(int8_t);
> > + msg_size = offsetof(dof_parsed_t, argmap.argmap) +
> > + map_size;
> > +
> > + msg = malloc(msg_size);
> > + if (!msg)
> > + goto oom;
> > +
> > + memset(msg, 0, msg_size);
> > +
> > + msg->size = msg_size;
> > + msg->type = DIT_ARGS_MAP;
> > + memcpy(msg->argmap.argmap, dhpb->dthpb_args, map_size);
Is there any validation anywhere that there are map_size bytes to read from
dhpb->dthpb_args? Is there any validation anywhere here (or in a later patch)
that the entries are valid (between 0 and nargc)?
> > + dof_parser_write_one(out, msg, msg_size);
> > + free(msg);
> > + }
> > + }
> >
> > /*
> > * Emit info on each tracepoint in turn.
> > @@ -938,18 +1024,10 @@ emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
> > for (i = 0; i < dhpb->dthpb_nenoffs; i++)
> > emit_tp(out, dhpb->dthpb_base, dhpb->dthpb_enoffs[i], 1);
> >
> > - /*
> > - * XXX later:
> > - * If the arguments are shuffled around we set the argument remapping
> > - * table. Later, when the probe fires, we only remap the arguments
> > - * if the table is non-NULL.
> > - *
> > - for (i = 0; i < dhpb->dthpb_xargc; i++) {
> > - if (dhpb->dthpb_args[i] != i) {
> > - pp->ftp_argmap = dhpb->dthpb_args;
> > - break;
> > - }
> > - } */
> > + return;
> > + oom:
> > + dof_error(out, ENOMEM, "Out of memory allocating %zi bytes for probe",
> > + msg_size);
> > }
> >
> > static void
> > diff --git a/libcommon/dof_parser.h b/libcommon/dof_parser.h
> > index d3a6836f15fd..8f42d00551e3 100644
> > --- a/libcommon/dof_parser.h
> > +++ b/libcommon/dof_parser.h
> > @@ -15,10 +15,19 @@
> > #include <dtrace/helpers.h>
> >
> > /*
> > - * Result of DOF probe parsing. We receive a provider info structure, followed
> > - * by N probe info structures each of which is followed by possibly many
> > - * tracepoint info structures, all tagged. Things not yet used for probe
> > - * creation (like args, translated types, etc) are not returned.
> > + * Result of DOF probe parsing. The order of elements in the parsed stream
> > + * is:
> > + *
> > + * DIT_PROVIDER (at least 1, which contains...)
> > + * DIT_PROBE (at least 1, each of which has...)
> > + * DIT_ARGS_NATIVE (1, optional)
> > + * DIT_ARGS_XLAT (1, optional)
> > + * DIT_ARGS_MAP (1, optional)
> > + * DIT_TRACEPOINT (any number >= 1)
> > + *
> > + * The dof_parsed.provider.flags word indicates the presence of the
> > + * various optional args records in the following stream (you can rely on
> > + * them if it simplifies things, but you don't have to).
> > *
> > * On error, a DIT_ERR structure is returned with an error message.
> > */
> > @@ -27,7 +36,10 @@ typedef enum dof_parsed_info {
> > DIT_PROVIDER = 0,
> > DIT_PROBE = 1,
> > DIT_TRACEPOINT = 2,
> > - DIT_ERR = 3
> > + DIT_ERR = 3,
> > + DIT_ARGS_NATIVE = 4,
> > + DIT_ARGS_XLAT = 5,
> > + DIT_ARGS_MAP = 6,
> > } dof_parsed_info_t;
> >
> > /*
> > @@ -37,7 +49,7 @@ typedef enum dof_parsed_info {
> > * start which is the version of the dof_parseds within it. The data flowing
> > * over the stream from the seccomped parser has no such prefix.
> > */
> > -#define DOF_PARSED_VERSION 1
> > +#define DOF_PARSED_VERSION 2
> >
> > typedef struct dof_parsed {
> > /*
> > @@ -59,18 +71,58 @@ typedef struct dof_parsed {
> > */
> > char name[1];
> > } provider;
> > +
> > struct dpi_probe_info {
> > /*
> > * Number of tracepoints that follow.
> > */
> > size_t ntp;
> >
> > + /*
> > + * Number of native arguments that follow (if > 0, a
> > + * DIT_ARGS_NATIVE will be received).
> > + */
> > + size_t nargc;
> > +
> > + /*
> > + * Number of xlated arguments that follow (if > 0, a
> > + * DIT_ARGS_XLAT and DIT_ARGS_MAP will be received).
> > + */
> > + size_t xargc;
> > +
> > /*
> > * Probe module, function, and name (\0-separated).
> > */
> > char name[1];
> > } probe;
> >
> > + /* V2+ only. */
> > + struct dpi_probe_args_native_info {
> > + /*
> > + * Array of native args. nargc in length.
> > + */
> > + char args[1];
> > + } nargs;
> > +
> > + /* V2+ only. */
> > + struct dpi_probe_args_xlat_info {
> > + /*
> > + * Array of translated args. xargc in length.
> > + */
> > + char args[1];
> > + } xargs;
> > +
> > + /*
> > + * V2+ only.
> > + */
> > + struct dpi_probe_args_map_info {
> > + /*
> > + * Mapping from native arg index to xlated arg index.
> > + * xargc in length.
>
> This comment is confusing... I think it might be better served by really
> spelling out which is which. I.e. each of the xargc elements specifies the
> native arg index that the translated arg gets its value from.
>
> > + */
> > + int8_t argmap[1];
> > + } argmap;
> > +
> > struct dpi_tracepoint_info {
> > /*
> > * Offset of this tracepoint.
> > diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
> > index 41a87955ec1b..71ac1754d343 100644
> > --- a/libdtrace/dt_pid.c
> > +++ b/libdtrace/dt_pid.c
> > @@ -866,6 +866,9 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> > uint64_t *dof_version;
> > char *prv, *mod, *fun, *prb;
> > dof_parsed_t *provider, *probe;
> > + ssize_t nargvlen = 0, xargvlen = 0;
> > + char *nargv = NULL, *xargv = NULL;
> > + int8_t *argmap = NULL;
> >
> > /*
> > * Regular files only: in particular, skip . and ..,
> > @@ -917,6 +920,47 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> > p += probe->size;
> > seen_size += probe->size;
> >
> > + /*
> > + * Assume the order given in dof_parser.h, for simplicity.
> > + */
> > + if (probe->probe.nargc > 0) {
> > + dof_parsed_t *args = (dof_parsed_t *) p;
> > +
> > + if (!validate_dof_record(path, args, DIT_ARGS_NATIVE,
> > + dof_buf_size, seen_size))
> > + goto parse_err;
> > +
> > + nargv = args->nargs.args;
> > + nargvlen = args->size - offsetof(dof_parsed_t, nargs.args);
> > + assert(nargvlen >= 0);
> > +
> > + p += args->size;
> > + seen_size += args->size;
> > + }
> > + if (probe->probe.xargc > 0) {
> > + dof_parsed_t *args = (dof_parsed_t *) p;
> > +
> > + if (!validate_dof_record(path, args, DIT_ARGS_XLAT,
> > + dof_buf_size, seen_size))
> > + goto parse_err;
> > +
> > + xargv = args->xargs.args;
> > + xargvlen = args->size - offsetof(dof_parsed_t, xargs.args);
> > + assert(xargvlen >= 0);
> > +
> > + p += args->size;
> > + seen_size += args->size;
> > +
> > + if (!validate_dof_record(path, args, DIT_ARGS_MAP,
> > + dof_buf_size, seen_size))
> > + goto parse_err;
> > +
> > + argmap = args->argmap.argmap;
> > +
> > + p += args->size;
> > + seen_size += args->size;
> > + }
> > +
> > /*
> > * Now the parsed DOF for this probe's tracepoints.
> > */
> > @@ -967,6 +1011,21 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> > psp.pps_off = tp->tracepoint.addr - pmp->pr_file->first_segment->pr_vaddr;
> > psp.pps_nameoff = 0;
> >
> > + if (nargv) {
> > + psp.pps_nargc = probe->probe.nargc;
> > + psp.pps_nargvlen = nargvlen;
> > + psp.pps_nargv = nargv;
> > + }
> > +
> > + if (xargv) {
> > + psp.pps_xargc = probe->probe.xargc;
> > + psp.pps_xargvlen = xargvlen;
> > + psp.pps_xargv = xargv;
> > + }
> > +
> > + if (argmap)
> > + psp.pps_argmap = argmap;
> > +
> > dt_dprintf("providing %s:%s:%s:%s for pid %d\n", psp.pps_prv,
> > psp.pps_mod, psp.pps_fun, psp.pps_prb, psp.pps_pid);
> > if (pvp->impl->provide_probe(dtp, &psp) < 0) {
> > --
> > 2.46.0.278.g36e3a12567
> >
> >
> > _______________________________________________
> > DTrace-devel mailing list
> > DTrace-devel at oss.oracle.com
> > https://oss.oracle.com/mailman/listinfo/dtrace-devel
>
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list