[DTrace-devel] [PATCH v5 1/6] usdt: get arg types and xlations into DTrace from the DOF
Kris Van Hees
kris.van.hees at oracle.com
Tue Nov 5 16:56:33 UTC 2024
On Tue, Nov 05, 2024 at 12:06:03AM +0000, Nick Alcock wrote:
> This change propagates native and xlated arg types and mapping info all
> the way from the DOF, through the parser and dtprobed and the DOF stash,
> into DTrace, where they end up in the pid_probespec_t for USDT probes.
> We don't do anything with them once they get there in this commit: no
> user-visible impacts expected.
>
> We bump the DOF_PARSED_VERSION since we add three new types of record to the
> dof_parser_t, all optional, covering native and xlated args and arg mapping
> tables; to make life easier for consumers we emit them in a defined order
> (documented in dof_parser.h), and add arg counts to the DIT_PROBE record
> that precedes them indicating which will be present and how many args are in
> them. This means we retain the property that you can always tell which
> records within a provider are coming next purely from records you already
> have: there's no need to make decisions once the records turn up. The DOF
> stash hardly changes: all that happens is that the parsed data written to
> each per-probe file gains some extra types of record (it can have
> DIT_ARGS_NATIVE, DIT_ARGS_XLAT and DIT_ARGS_MAP entries following the
> DIT_PROBE record now).
>
> As usual with DOF_PARSED_VERSION bumps, DTraces that cross this change will
> refuse to read probes added by dtprobeds that are on the other side of it.
> (Restarting dtprobed will reparse all the probes in the stash to match the
> new layout, and newly-started DTraces will pick that up -- so this only
> affects running DTraces picking up new probes, which DTrace cannot do yet:
> so no visible effects are expected.)
>
> The code is a bit repetitive, with nargs, xargs and arg mapping all handled
> separately even though the code is very similar. In future maybe we could
> unify them (but my attempts led to code that was much harder to read than
> the original).
>
> Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> dtprobed/dof_stash.c | 15 +++--
> dtprobed/dtprobed.c | 10 ++-
> include/dtrace/pid.h | 7 ++
> libcommon/dof_parser.c | 150 +++++++++++++++++++++++++++++++----------
> libcommon/dof_parser.h | 64 ++++++++++++++++--
> libdtrace/dt_pid.c | 59 ++++++++++++++++
> 6 files changed, 256 insertions(+), 49 deletions(-)
>
> diff --git a/dtprobed/dof_stash.c b/dtprobed/dof_stash.c
> index 0e639076f655..8bf465678217 100644
> --- a/dtprobed/dof_stash.c
> +++ b/dtprobed/dof_stash.c
> @@ -59,8 +59,10 @@
> * startup if the dof_parsed_t structure changed. The file consists of a
> * uint64_t version number (DOF_PARSED_VERSION), then a stream of
> * dof_parsed_t records, the first of type DIT_PROVIDER, the second
> - * DIT_PROBE, then as many struct dof_parsed_t's of type DIT_TRACEPOINT as
> - * the DIT_PROBE record specifies.
> + * DIT_PROBE, then optionally some DIT_*ARGS records (if the count of native
> + * args in the probe is >0, you get a DIT_NATIVE_ARGS: if the count of xlated
> + * args is >0, you get DIT_XLAT_ARGS and DIT_MAP_ARGS), then as many struct
> + * dof_parsed_t's of type DIT_TRACEPOINT as the DIT_PROBE record specifies.
> *
> * /run/dtrace/probes/: Per-probe info, written by dtprobed, read by DTrace.
> *
> @@ -640,9 +642,14 @@ dof_stash_write_parsed(pid_t pid, dev_t dev, ino_t ino, dt_list_t *accum)
> break;
> }
>
> - /* Tracepoint: add to already-open file. */
> + /* Args info or tracepoint: add to already-open file. */
> + case DIT_ARGS_NATIVE:
> + case DIT_ARGS_XLAT:
> + case DIT_ARGS_MAP:
> case DIT_TRACEPOINT:
> - assert(state == DIT_PROBE || state == DIT_TRACEPOINT);
> + assert(state == DIT_PROBE || state == DIT_ARGS_NATIVE ||
> + state == DIT_ARGS_XLAT || state == DIT_ARGS_MAP ||
> + state == DIT_TRACEPOINT);
> state = accump->parsed->type;
>
> if (write_chunk(parsed_fd, accump->parsed, accump->parsed->size) < 0)
> diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
> index f4c6be1e045d..2ca39b26f88a 100644
> --- a/dtprobed/dtprobed.c
> +++ b/dtprobed/dtprobed.c
> @@ -791,16 +791,20 @@ process_dof(pid_t pid, int out, int in, dev_t dev, ino_t inum, dev_t exec_dev,
> if (dof_stash_push_parsed(&accum, probe) < 0)
> goto oom;
>
> - for (j = 0; j < probe->probe.ntp; j++) {
> + j = 0;
> + do {
> dof_parsed_t *tp = dof_read(pid, in);
>
> errmsg = "no tracepoints in a probe, or parse state corrupt";
> - if (!tp || tp->type != DIT_TRACEPOINT)
> + if (!tp || tp->type == DIT_PROVIDER || tp->type == DIT_PROBE)
> goto err;
>
> if (dof_stash_push_parsed(&accum, tp) < 0)
> goto oom;
> - }
> +
> + if (tp->type == DIT_TRACEPOINT)
> + j++;
> + } while (j < probe->probe.ntp);
> }
>
> if (!reparsing)
> diff --git a/include/dtrace/pid.h b/include/dtrace/pid.h
> index 0129674adf0c..25bfacfdbfe2 100644
> --- a/include/dtrace/pid.h
> +++ b/include/dtrace/pid.h
> @@ -35,6 +35,13 @@ typedef struct pid_probespec {
> ino_t pps_inum; /* object inode */
> char *pps_fn; /* object full filename */
> uint64_t pps_off; /* probe offset (in object) */
> + int pps_nargc; /* number of native args */
> + int pps_xargc; /* number of xlated and mapped args */
> + char *pps_nargv; /* array of native args */
> + size_t pps_nargvlen; /* (high estimate of) length of array */
> + char *pps_xargv; /* array of xlated args */
> + size_t pps_xargvlen; /* (high estimate of) length of array */
> + int8_t *pps_argmap; /* mapped arg indexes */
>
> /*
> * Fields below this point do not apply to underlying probes.
> diff --git a/libcommon/dof_parser.c b/libcommon/dof_parser.c
> index d6631a1afdcb..1792a8bfcc84 100644
> --- a/libcommon/dof_parser.c
> +++ b/libcommon/dof_parser.c
> @@ -807,10 +807,11 @@ validate_provider(int out, dof_hdr_t *dof, dof_sec_t *sec)
> }
>
> dt_dbg_dof(" Probe %d %s:%s:%s:%s with %d offsets, "
> - "%d is-enabled offsets\n", j,
> + "%d is-enabled offsets, %i args, %i nargs, argidx %i\n", j,
> strtab + prov->dofpv_name, "",
> strtab + prb->dofpr_func, strtab + prb->dofpr_name,
> - prb->dofpr_noffs, prb->dofpr_nenoffs);
> + prb->dofpr_noffs, prb->dofpr_nenoffs,
> + prb->dofpr_xargc, prb->dofpr_nargc, prb->dofpr_argidx);
> }
>
> return 0;
> @@ -879,12 +880,26 @@ validate_probe(int out, dtrace_helper_probedesc_t *dhpb)
> return 0;
> }
>
> +static size_t
> +strings_len(const char *strtab, size_t count)
> +{
> + size_t len = 0;
> +
> + for (; count > 0; count--) {
> + size_t this_len = strlen(strtab) + 1;
> +
> + len += this_len;
> + strtab += this_len;
> + }
> + return len;
> +}
> +
> static void
> emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
> {
> int i;
> - dof_parsed_t *probe_msg;
> - size_t probe_msg_size;
> + dof_parsed_t *msg;
> + size_t msg_size;
> char *ptr;
>
> dt_dbg_dof(" Creating probe %s:%s:%s:%s\n", dhpb->dthpb_prov,
> @@ -893,35 +908,106 @@ emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
> if (validate_probe(out, dhpb) != 0)
> return;
>
> - probe_msg_size = offsetof(dof_parsed_t, probe.name) +
> - strlen(dhpb->dthpb_mod) + 1 + strlen(dhpb->dthpb_func) + 1 +
> - strlen(dhpb->dthpb_name) + 1;
> + /*
> + * Compute the size of all the optional elements first, to fill out the
> + * flags.
> + */
>
> - probe_msg = malloc(probe_msg_size);
> - if (!probe_msg) {
> - dof_error(out, ENOMEM, "Out of memory allocating probe");
> - return;
> - }
> + msg_size = offsetof(dof_parsed_t, probe.name) +
> + strlen(dhpb->dthpb_mod) + 1 +
> + strlen(dhpb->dthpb_func) + 1 +
> + strlen(dhpb->dthpb_name) + 1;
>
> - memset(probe_msg, 0, probe_msg_size);
> + msg = malloc(msg_size);
> + if (!msg)
> + goto oom;
>
> - probe_msg->size = probe_msg_size;
> - probe_msg->type = DIT_PROBE;
> - probe_msg->probe.ntp = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
> - ptr = stpcpy(probe_msg->probe.name, dhpb->dthpb_mod);
> + memset(msg, 0, msg_size);
> +
> + msg->size = msg_size;
> + msg->type = DIT_PROBE;
> + msg->probe.ntp = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
> + msg->probe.nargc = dhpb->dthpb_nargc;
> + msg->probe.xargc = dhpb->dthpb_xargc;
> +
> + ptr = stpcpy(msg->probe.name, dhpb->dthpb_mod);
> ptr++;
> ptr = stpcpy(ptr, dhpb->dthpb_func);
> ptr++;
> strcpy(ptr, dhpb->dthpb_name);
> - dof_parser_write_one(out, probe_msg, probe_msg_size);
> + dof_parser_write_one(out, msg, msg_size);
>
> - free(probe_msg);
> + free(msg);
>
> - /* XXX TODO translated args
> - pp->ftp_nargs = dhpb->dthpb_xargc;
> - pp->ftp_xtypes = dhpb->dthpb_xtypes;
> - pp->ftp_ntypes = dhpb->dthpb_ntypes;
> - */
> + /*
> + * Emit info on all native and translated args in turn.
> + *
> + * FIXME: this code is a bit repetitious.
> + *
> + * First native args (if any).
> + */
> +
> + if (dhpb->dthpb_nargc > 0) {
> + size_t nargs_size;
> +
> + nargs_size = strings_len(dhpb->dthpb_ntypes, dhpb->dthpb_nargc);
> + msg_size = offsetof(dof_parsed_t, nargs.args) + nargs_size;
> +
> + msg = malloc(msg_size);
> + if (!msg)
> + goto oom;
> +
> + memset(msg, 0, msg_size);
> +
> + msg->size = msg_size;
> + msg->type = DIT_ARGS_NATIVE;
> + memcpy(msg->nargs.args, dhpb->dthpb_ntypes, nargs_size);
> + dof_parser_write_one(out, msg, msg_size);
> +
> + free(msg);
> +
> + /* Then translated args. */
> +
> + if (dhpb->dthpb_xargc > 0) {
> + size_t xargs_size, map_size;
> +
> + xargs_size = strings_len(dhpb->dthpb_xtypes,
> + dhpb->dthpb_xargc);
> + msg_size = offsetof(dof_parsed_t, xargs.args) +
> + xargs_size;
> +
> + msg = malloc(msg_size);
> + if (!msg)
> + goto oom;
> +
> + memset(msg, 0, msg_size);
> +
> + msg->size = msg_size;
> + msg->type = DIT_ARGS_XLAT;
> + memcpy(msg->xargs.args, dhpb->dthpb_xtypes, xargs_size);
> + dof_parser_write_one(out, msg, msg_size);
> +
> + free(msg);
> +
> + /* Then the mapping table. */
> +
> + map_size = dhpb->dthpb_xargc * sizeof(int8_t);
> + msg_size = offsetof(dof_parsed_t, argmap.argmap) +
> + map_size;
> +
> + msg = malloc(msg_size);
> + if (!msg)
> + goto oom;
> +
> + memset(msg, 0, msg_size);
> +
> + msg->size = msg_size;
> + msg->type = DIT_ARGS_MAP;
> + memcpy(msg->argmap.argmap, dhpb->dthpb_args, map_size);
> + dof_parser_write_one(out, msg, msg_size);
> + free(msg);
> + }
> + }
>
> /*
> * Emit info on each tracepoint in turn.
> @@ -938,18 +1024,10 @@ emit_probe(int out, dtrace_helper_probedesc_t *dhpb)
> for (i = 0; i < dhpb->dthpb_nenoffs; i++)
> emit_tp(out, dhpb->dthpb_base, dhpb->dthpb_enoffs[i], 1);
>
> - /*
> - * XXX later:
> - * If the arguments are shuffled around we set the argument remapping
> - * table. Later, when the probe fires, we only remap the arguments
> - * if the table is non-NULL.
> - *
> - for (i = 0; i < dhpb->dthpb_xargc; i++) {
> - if (dhpb->dthpb_args[i] != i) {
> - pp->ftp_argmap = dhpb->dthpb_args;
> - break;
> - }
> - } */
> + return;
> + oom:
> + dof_error(out, ENOMEM, "Out of memory allocating %zi bytes for probe",
> + msg_size);
> }
>
> static void
> diff --git a/libcommon/dof_parser.h b/libcommon/dof_parser.h
> index d3a6836f15fd..8f42d00551e3 100644
> --- a/libcommon/dof_parser.h
> +++ b/libcommon/dof_parser.h
> @@ -15,10 +15,19 @@
> #include <dtrace/helpers.h>
>
> /*
> - * Result of DOF probe parsing. We receive a provider info structure, followed
> - * by N probe info structures each of which is followed by possibly many
> - * tracepoint info structures, all tagged. Things not yet used for probe
> - * creation (like args, translated types, etc) are not returned.
> + * Result of DOF probe parsing. The order of elements in the parsed stream
> + * is:
> + *
> + * DIT_PROVIDER (at least 1, which contains...)
> + * DIT_PROBE (at least 1, each of which has...)
> + * DIT_ARGS_NATIVE (1, optional)
> + * DIT_ARGS_XLAT (1, optional)
> + * DIT_ARGS_MAP (1, optional)
> + * DIT_TRACEPOINT (any number >= 1)
> + *
> + * The dof_parsed.provider.flags word indicates the presence of the
> + * various optional args records in the following stream (you can rely on
> + * them if it simplifies things, but you don't have to).
> *
> * On error, a DIT_ERR structure is returned with an error message.
> */
> @@ -27,7 +36,10 @@ typedef enum dof_parsed_info {
> DIT_PROVIDER = 0,
> DIT_PROBE = 1,
> DIT_TRACEPOINT = 2,
> - DIT_ERR = 3
> + DIT_ERR = 3,
> + DIT_ARGS_NATIVE = 4,
> + DIT_ARGS_XLAT = 5,
> + DIT_ARGS_MAP = 6,
> } dof_parsed_info_t;
>
> /*
> @@ -37,7 +49,7 @@ typedef enum dof_parsed_info {
> * start which is the version of the dof_parseds within it. The data flowing
> * over the stream from the seccomped parser has no such prefix.
> */
> -#define DOF_PARSED_VERSION 1
> +#define DOF_PARSED_VERSION 2
>
> typedef struct dof_parsed {
> /*
> @@ -59,18 +71,58 @@ typedef struct dof_parsed {
> */
> char name[1];
> } provider;
> +
> struct dpi_probe_info {
> /*
> * Number of tracepoints that follow.
> */
> size_t ntp;
>
> + /*
> + * Number of native arguments that follow (if > 0, a
> + * DIT_ARGS_NATIVE will be received).
> + */
> + size_t nargc;
> +
> + /*
> + * Number of xlated arguments that follow (if > 0, a
> + * DIT_ARGS_XLAT and DIT_ARGS_MAP will be received).
> + */
> + size_t xargc;
> +
> /*
> * Probe module, function, and name (\0-separated).
> */
> char name[1];
> } probe;
>
> + /* V2+ only. */
> + struct dpi_probe_args_native_info {
> + /*
> + * Array of native args. nargc in length.
> + */
> + char args[1];
> + } nargs;
> +
> + /* V2+ only. */
> + struct dpi_probe_args_xlat_info {
> + /*
> + * Array of translated args. xargc in length.
> + */
> + char args[1];
> + } xargs;
> +
> + /*
> + * V2+ only.
> + */
> + struct dpi_probe_args_map_info {
> + /*
> + * Mapping from native arg index to xlated arg index.
> + * xargc in length.
> + */
> + int8_t argmap[1];
> + } argmap;
> +
> struct dpi_tracepoint_info {
> /*
> * Offset of this tracepoint.
> diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
> index 41a87955ec1b..71ac1754d343 100644
> --- a/libdtrace/dt_pid.c
> +++ b/libdtrace/dt_pid.c
> @@ -866,6 +866,9 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> uint64_t *dof_version;
> char *prv, *mod, *fun, *prb;
> dof_parsed_t *provider, *probe;
> + ssize_t nargvlen = 0, xargvlen = 0;
> + char *nargv = NULL, *xargv = NULL;
> + int8_t *argmap = NULL;
>
> /*
> * Regular files only: in particular, skip . and ..,
> @@ -917,6 +920,47 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> p += probe->size;
> seen_size += probe->size;
>
> + /*
> + * Assume the order given in dof_parser.h, for simplicity.
> + */
> + if (probe->probe.nargc > 0) {
> + dof_parsed_t *args = (dof_parsed_t *) p;
> +
> + if (!validate_dof_record(path, args, DIT_ARGS_NATIVE,
> + dof_buf_size, seen_size))
> + goto parse_err;
> +
> + nargv = args->nargs.args;
> + nargvlen = args->size - offsetof(dof_parsed_t, nargs.args);
> + assert(nargvlen >= 0);
> +
> + p += args->size;
> + seen_size += args->size;
> + }
> + if (probe->probe.xargc > 0) {
> + dof_parsed_t *args = (dof_parsed_t *) p;
> +
> + if (!validate_dof_record(path, args, DIT_ARGS_XLAT,
> + dof_buf_size, seen_size))
> + goto parse_err;
> +
> + xargv = args->xargs.args;
> + xargvlen = args->size - offsetof(dof_parsed_t, xargs.args);
> + assert(xargvlen >= 0);
> +
> + p += args->size;
> + seen_size += args->size;
> +
> + if (!validate_dof_record(path, args, DIT_ARGS_MAP,
> + dof_buf_size, seen_size))
> + goto parse_err;
> +
> + argmap = args->argmap.argmap;
> +
> + p += args->size;
> + seen_size += args->size;
> + }
> +
> /*
> * Now the parsed DOF for this probe's tracepoints.
> */
> @@ -967,6 +1011,21 @@ dt_pid_create_usdt_probes_proc(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> psp.pps_off = tp->tracepoint.addr - pmp->pr_file->first_segment->pr_vaddr;
> psp.pps_nameoff = 0;
>
> + if (nargv) {
> + psp.pps_nargc = probe->probe.nargc;
> + psp.pps_nargvlen = nargvlen;
> + psp.pps_nargv = nargv;
> + }
> +
> + if (xargv) {
> + psp.pps_xargc = probe->probe.xargc;
> + psp.pps_xargvlen = xargvlen;
> + psp.pps_xargv = xargv;
> + }
> +
> + if (argmap)
> + psp.pps_argmap = argmap;
> +
> dt_dprintf("providing %s:%s:%s:%s for pid %d\n", psp.pps_prv,
> psp.pps_mod, psp.pps_fun, psp.pps_prb, psp.pps_pid);
> if (pvp->impl->provide_probe(dtp, &psp) < 0) {
> --
> 2.46.0.278.g36e3a12567
>
More information about the DTrace-devel
mailing list