[DTrace-devel] [PATCH v2 5/6] pid, uprobes: improve horrendously inefficient uprobe scanning
Kris Van Hees
kris.van.hees at oracle.com
Thu May 18 14:06:26 UTC 2023
On Wed, May 17, 2023 at 08:53:02PM +0100, Nick Alcock via DTrace-devel wrote:
> The uprobe scanning loop was meant to be an efficient loop that scanned
> the set of uprobes only once. Unfortunately, because it is invoked
> repeatedly to create each pid's uprobes in turn, it ends up parsing
> all the probes N times if you invoke dtrace with N distinct processes.
> Since each such parse involves multiple rounds of memory allocation
> and string manipulation to decode encoded probe names etc, this is
> a definite waste of time. Worse yet, since the uprobe list can change
> at any time, there is not even any guarantee that we're parsing the
> same thing each time!
>
> Fix this by having dt_pid_create_usdt_probes() call another function,
> dt_pid_scan_uprobes(), to pull in the list of known DTrace uprobes; it
> does this only once and stores the parsed probespecs in a new list of
> dt_pid_probespecs in the dtp. The probespecs still change a bit on each
> call to dt_pid_create_usdt_probes, because it stuffs a pid in there, but
> everything derived from uprobe_events is parsed only once and (in this
> implementation, currently) never changes thereafter, no matter how many
> probes are created.
>
> Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> include/dtrace/pid.h | 6 +-
> libdtrace/dt_impl.h | 11 +-
> libdtrace/dt_open.c | 3 +
> libdtrace/dt_pid.c | 286 +++++++++++++++++++++++++++++--------------
> libdtrace/dt_pid.h | 3 +-
> 5 files changed, 213 insertions(+), 96 deletions(-)
>
> diff --git a/include/dtrace/pid.h b/include/dtrace/pid.h
> index d200b7fe504b8..5e9668251dfd2 100644
> --- a/include/dtrace/pid.h
> +++ b/include/dtrace/pid.h
> @@ -2,7 +2,7 @@
> * Licensed under the Universal Permissive License v 1.0 as shown at
> * http://oss.oracle.com/licenses/upl.
> *
> - * Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2009, 2023, Oracle and/or its affiliates. All rights reserved.
> */
>
> /*
> @@ -28,9 +28,9 @@ typedef enum pid_probetype {
>
> typedef struct pid_probespec {
> pid_probetype_t pps_type; /* probe type */
> - const char *pps_prv; /* provider (without pid) */
> + char *pps_prv; /* provider (without pid) */
> char *pps_mod; /* probe module (object) */
> - char pps_fun[DTRACE_FUNCNAMELEN]; /* probe function */
> + char *pps_fun; /* probe function */
> char *pps_prb; /* probe name (if provided) */
> dev_t pps_dev; /* object device node */
> ino_t pps_inum; /* object inode */
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index e240cef074162..581bb8a644e7c 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -63,7 +63,7 @@ struct dt_pfdict; /* see <dt_printf.h> */
> struct dt_arg; /* see below */
> struct dt_provider; /* see <dt_provider.h> */
> struct dt_probe; /* see <dt_probe.h> */
> -struct dt_probe; /* see <dt_probe.h> */
> +struct pid_probespec; /* see <pid.h> */
> struct dt_pebset; /* see <dt_peb.h> */
> struct dt_xlator; /* see <dt_xlator.h> */
>
> @@ -312,9 +312,16 @@ struct dtrace_hdl {
> * Array of all known probes, to facilitate probe lookup by probe id.
> */
> struct dt_probe **dt_probes; /* array of probes */
> - uint32_t dt_probes_sz; /* size of array of probes */
> + size_t dt_probes_sz; /* size of array of probes */
> uint32_t dt_probe_id; /* next available probe id */
>
> + /*
> + * uprobes potentially of interest: some may be instantiated as
> + * dtrace probes.
> + */
> + struct pid_probespec *dt_uprobespecs;
> + size_t dt_uprobespecs_sz; /* size of array of uprobes */
> +
> struct dt_probe *dt_error; /* ERROR probe */
>
> dt_htab_t *dt_provs; /* hash table of dt_provider_t's */
> diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
> index 4818f61df2a69..f0dc2575377f2 100644
> --- a/libdtrace/dt_open.c
> +++ b/libdtrace/dt_open.c
> @@ -39,6 +39,7 @@
> #include <dt_probe.h>
> #include <dt_dis.h>
> #include <dt_peb.h>
> +#include <dt_pid.h>
>
> const dt_version_t _dtrace_versions[] = {
> DT_VERS_1_0, /* D API 1.0.0 (PSARC 2001/466) Solaris 10 FCS */
> @@ -1270,6 +1271,8 @@ dtrace_close(dtrace_hdl_t *dtp)
> dt_pfdict_destroy(dtp);
> dt_dof_fini(dtp);
> dt_probe_fini(dtp);
> + dt_pid_free_uprobespecs(dtp);
> +
> /*
> * FIXME:
> * add some dt_prov_fini() to iterate over providers and call provider-specific fini()'s
> diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
> index 4be230ace1bf8..627587b6bb51b 100644
> --- a/libdtrace/dt_pid.c
> +++ b/libdtrace/dt_pid.c
> @@ -27,6 +27,9 @@
> #include <dt_pid.h>
> #include <dt_string.h>
>
> +/*
> + * Information on a PID or USDT probe.
> + */
> typedef struct dt_pid_probe {
> dtrace_hdl_t *dpp_dtp;
> dt_pcb_t *dpp_pcb;
> @@ -95,6 +98,25 @@ dt_pid_error(dtrace_hdl_t *dtp, dt_pcb_t *pcb, dt_proc_t *dpr,
> return 1;
> }
>
> +void
> +dt_pid_free_uprobespecs(dtrace_hdl_t *dtp)
> +{
> + size_t i;
> +
> + if (!dtp->dt_uprobespecs)
> + return;
> +
> + for (i = 0; i < dtp->dt_uprobespecs_sz; i++) {
> + free(dtp->dt_uprobespecs[i].pps_prv);
> + free(dtp->dt_uprobespecs[i].pps_mod);
> + free(dtp->dt_uprobespecs[i].pps_fun);
> + free(dtp->dt_uprobespecs[i].pps_prb);
> + }
> +
> + free(dtp->dt_uprobespecs);
> + dtp->dt_uprobespecs = NULL;
> +}
> +
> static int
> dt_pid_create_fbt_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
> pid_probespec_t *psp, const GElf_Sym *symp, pid_probetype_t type)
> @@ -176,7 +198,7 @@ dt_pid_per_sym(dt_pid_probe_t *pp, const GElf_Sym *symp, const char *func)
> psp->pps_inum = pp->dpp_inum;
> psp->pps_fn = strdup(pp->dpp_fname);
> psp->pps_vaddr = pp->dpp_vaddr;
> - strcpy_safe(psp->pps_fun, sizeof(psp->pps_fun), func);
> + psp->pps_fun = (char *) func;
>
> if (!isdash && gmatch("return", pp->dpp_name)) {
> if (dt_pid_create_fbt_probe(pp->dpp_pr, dtp, psp, symp,
> @@ -571,35 +593,28 @@ dt_pid_create_pid_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp,
> }
>
> /*
> - * Rescan the PID uprobe list and create suitable underlying probes.
> - *
> - * If dpr is set, just set up probes relating to mappings found in that one
> - * process. (dpr must in this case be locked.)
> + * Scan the uprobe list and remember its contents.
> *
> - * If pdp is set, create overlying USDT probes for the specified probe
> - * description.
> - *
> - * Return 0 on success or -1 on error. (Failure to create specific underlying
> - * probes is not an error.)
> + * This avoids us having to rescan the whole thing every time we create every
> + * single probe in turn.
> */
> static int
> -dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> - dtrace_probedesc_t *pdp, dt_pcb_t *pcb)
> +dt_pid_scan_uprobes(dtrace_hdl_t *dtp, dt_pcb_t *pcb)
> {
> - const dt_provider_t *pvp;
> + typedef struct uprobe_line
> + {
> + dt_list_t list;
> + char *line;
> + int is_enabled;
> + } uprobe_line_t;
> + dt_list_t lines = {0};
> + uprobe_line_t *linep, *old_linep;
> + size_t i = 0;
> + int ret = 0;
> +
> FILE *f;
> char *buf = NULL;
> size_t sz;
> - int ret = 0;
> -
> - pvp = dtp->dt_prov_usdt;
> - if (!pvp) {
> - pvp = dt_provider_lookup(dtp, "usdt");
> - assert(pvp != NULL);
> - dtp->dt_prov_usdt = pvp;
> - }
> -
> - assert(pvp->impl != NULL && pvp->impl->provide_probe != NULL);
>
> f = fopen(TRACEFS "uprobe_events", "r");
> if (!f) {
> @@ -609,76 +624,95 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> }
>
> /*
> - * Systemwide probing: not yet implemented.
> + * We are only interested in pid uprobes, not any other uprobes that may
> + * exist. Some of these may be for pid probes, some for usdt: we keep
> + * track of all of them regardless.
> + *
> + * Suck in the list of uprobes in one go, since we need to run over it
> + * twice (once to count pids and allocate space, once to populate them)
> + * and it might change between reads.
> */
> - assert(dpr != NULL);
>
> - dt_dprintf("Scanning for usdt probes matching %i\n", dpr->dpr_pid);
> +#define UPROBE_PREFIX "p:dt_pid/p_"
> +#define UPROBE_IS_ENABLED_PREFIX "p:dt_pid_is_enabled/p_"
> +
> + while (getline(&buf, &sz, f) >= 0) {
> + uprobe_line_t *line;
> + int is_enabled;
> +
> + if (strncmp(buf, UPROBE_PREFIX,
> + strlen(UPROBE_PREFIX)) == 0)
> + is_enabled = 0;
> + else if (strncmp(buf, UPROBE_IS_ENABLED_PREFIX,
> + strlen(UPROBE_IS_ENABLED_PREFIX)) == 0)
> + is_enabled = 1;
> + else
> + continue;
> +
> + line = dt_zalloc(dtp, sizeof (struct uprobe_line));
> + if (!line) {
> + fclose(f);
> + goto err; /* errno is set for us. */
> + }
> +
> + line->line = buf;
> + line->is_enabled = is_enabled;
> + dt_list_append(&lines, line);
> + sz = 0;
> + dtp->dt_uprobespecs_sz++;
> + buf = NULL;
> + }
> + fclose(f);
> +
> + dtp->dt_uprobespecs = dt_calloc(dtp, dtp->dt_uprobespecs_sz,
> + sizeof(pid_probespec_t));
> + if (!dtp->dt_uprobespecs)
> + goto err; /* errno is set for us. */
>
> /*
> - * We are only interested in pid uprobes, not any other uprobes that may
> - * exist. Some of these may be for pid probes, some for usdt: we create
> - * underlying probes for all of them, except that we may only be
> - * interested in probes belonging to mappings in a particular process,
> - * in which case we create probes for that process only.
> + * Now we know how many specs exist, parse and create them.
> */
> - while (getline(&buf, &sz, f) >= 0) {
> - dev_t dev;
> - ino_t inum;
> + for (linep = dt_list_next(&lines); linep != NULL;
> + linep = dt_list_next(linep)) {
> uint64_t off;
> - int is_enabled;
> const char *fmt;
> - unsigned long long dev_ll, inum_ll;
> - char *inum_str = NULL;
> + unsigned long long dev, inum;
> char *spec = NULL;
> char *eprv = NULL, *emod = NULL, *efun = NULL, *eprb = NULL;
> char *prv = NULL, *mod = NULL, *fun = NULL, *prb = NULL;
> - pid_probespec_t psp;
> + pid_probespec_t *psp;
>
> -#define UPROBE_PREFIX "p:dt_pid/p_"
> -#define UPROBE_IS_ENABLED_PREFIX "p:dt_pid_is_enabled/p_"
> #define UPROBE_PROBE_FMT "%llx_%llx_%lx %ms P%m[^= ]=\\1 M%m[^= ]=\\2 F%m[^= ]=\\3 N%m[^= ]=\\4"
> #define UPROBE_FMT UPROBE_PREFIX UPROBE_PROBE_FMT
> #define UPROBE_IS_ENABLED_FMT UPROBE_IS_ENABLED_PREFIX UPROBE_PROBE_FMT
>
> - if (strncmp(buf, UPROBE_PREFIX, strlen(UPROBE_PREFIX)) == 0) {
> - is_enabled = 0;
> + if (!linep->is_enabled)
> fmt = UPROBE_FMT;
> - } else if (strncmp(buf, UPROBE_IS_ENABLED_PREFIX,
> - strlen(UPROBE_IS_ENABLED_PREFIX)) == 0) {
> - is_enabled = 1;
> + else
> fmt = UPROBE_IS_ENABLED_FMT;
> - }
> - else /* Not a DTrace uprobe. */
The above two lines should be one:
} else /* Not a DTrace uprobe. */
> - continue;
>
> - switch (sscanf(buf, fmt, &dev_ll, &inum_ll,
> + switch (sscanf(linep->line, fmt, &dev, &inum,
> &off, &spec, &eprv, &emod, &efun, &eprb)) {
> case 8: /* Includes dtrace probe names: decode them. */
> prv = uprobe_decode_name(eprv);
> mod = uprobe_decode_name(emod);
> fun = uprobe_decode_name(efun);
> prb = uprobe_decode_name(eprb);
> - dev = dev_ll;
> - inum = inum_ll;
> break;
> case 4: /* No dtrace probe name - not a USDT probe. */
> goto next;
> default:
> - if ((strlen(buf) > 0) && (buf[strlen(buf)-1] == '\n'))
> - buf[strlen(buf)-1] = 0;
> - dt_dprintf("Cannot parse %s as a DTrace uprobe name\n", buf);
> + if ((strlen(linep->line) > 0) &&
> + (linep->line[strlen(linep->line)-1] == '\n'))
> + linep->line[strlen(linep->line)-1] = 0;
> + dt_dprintf("Cannot parse %s as a DTrace uprobe name\n",
> + linep->line);
> + dtp->dt_uprobespecs_sz--;
> goto next;
> }
>
> - if (asprintf(&inum_str, "%llx", inum_ll) < 0)
> - goto next;
> -
> - /*
> - * Make the underlying probe, if not already present.
> - */
> - memset(&psp, 0, sizeof(pid_probespec_t));
> - psp.pps_type = is_enabled ? DTPPT_IS_ENABLED : DTPPT_OFFSETS;
> + psp = &dtp->dt_uprobespecs[i++];
> + psp->pps_type = linep->is_enabled ? DTPPT_IS_ENABLED : DTPPT_OFFSETS;
>
> /*
> * These components are only used for creation of an underlying
> @@ -686,26 +720,106 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> * not explicitly listed in the D program, which will never be
> * enabled. In future this may change.
> */
> - psp.pps_prv = prv;
> - psp.pps_mod = mod;
> - strcpy_safe(psp.pps_fun, sizeof(psp.pps_fun), fun);
> - psp.pps_prb = prb;
> + psp->pps_prv = prv;
> + psp->pps_mod = mod;
> + psp->pps_fun = fun;
> + psp->pps_prb = prb;
>
> /*
> * Always used.
> */
> - psp.pps_dev = dev;
> - psp.pps_inum = inum;
> - psp.pps_off = off;
> + psp->pps_dev = dev;
> + psp->pps_inum = inum;
> + psp->pps_off = off;
> + next:
> + free(eprv); free(emod); free(efun); free(eprb);
> + free(spec);
> + }
> +
> + goto out;
> +
> +err:
> + ret = -1;
> +
> +out:
> + old_linep = NULL;
> +
> + for (linep = dt_list_next(&lines); linep != NULL;
> + linep = dt_list_next(linep)) {
> + free(linep->line);
> + free(old_linep);
> + old_linep = linep;
> + }
> + free(old_linep);
> +
> + return ret;
> +}
> +
> +/*
> + * Rescan the PID uprobe list and create suitable underlying probes.
> + *
> + * If dpr is set, just set up probes relating to mappings found in that one
> + * process. (dpr must in this case be locked.)
> + *
> + * If pdp is set, create overlying USDT probes for the specified probe
> + * description.
> + *
> + * Return 0 on success or -1 on error. (Failure to create specific underlying
> + * probes is not an error.)
> + */
> +static int
> +dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> + dtrace_probedesc_t *pdp, dt_pcb_t *pcb)
> +{
> + const dt_provider_t *pvp;
> + size_t i;
> + int ret = 0;
> +
> + /*
> + * Systemwide probing: not yet implemented.
> + */
> + assert(dpr != NULL);
> +
> + dt_dprintf("Scanning for usdt probes matching %i\n", dpr->dpr_pid);
> +
> + /*
> + * For now, we only read the list of probes once. In time we will
> + * reread it whenever necessary.
> + */
> + pvp = dtp->dt_prov_usdt;
> + if (!pvp) {
> + pvp = dt_provider_lookup(dtp, "usdt");
> + assert(pvp != NULL);
> + dtp->dt_prov_usdt = pvp;
> + if (dt_pid_scan_uprobes(dtp, pcb) < 0)
> + return -1; /* errno is set for us. */
> + }
> +
> + assert(pvp->impl != NULL && pvp->impl->provide_probe != NULL);
> +
> + /*
> + * We are only interested in pid uprobes, not any other uprobes that may
> + * exist. Some of these may be for pid probes, some for usdt: we create
> + * underlying probes for all of them, if we are interested in creating
> + * mappings for that process at all.
> + */
> + for (i = 0; i < dtp->dt_uprobespecs_sz; i++) {
> + pid_probespec_t *psp = &dtp->dt_uprobespecs[i];
>
> /*
> * Filter out probes not related to the process of interest.
> */
> if (dpr && dpr->dpr_proc) {
> assert(MUTEX_HELD(&dpr->dpr_lock));
> - if (Pinode_to_file_map(dpr->dpr_proc, dev, inum) == NULL)
> - goto next;
> - psp.pps_pid = Pgetpid(dpr->dpr_proc);
> + if (Pinode_to_file_map(dpr->dpr_proc, psp->pps_dev,
> + psp->pps_inum) == NULL)
> + continue;
> +
> + /*
> + * This is overwritten repeatedly with each relevant PID
> + * in turn.
> + */
> + psp->pps_pid = Pgetpid(dpr->dpr_proc);
> }
>
> /*
> @@ -722,15 +836,15 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
>
> if ((pdp->prv[0] != 0 &&
> strcmp(dt_pid_de_pid(de_pid_prov, pdp->prv),
> - psp.pps_prv) != 0) ||
> - (pdp->mod[0] != 0 && strcmp(pdp->mod, psp.pps_mod) != 0) ||
> - (pdp->fun[0] != 0 && strcmp(pdp->fun, psp.pps_fun) != 0) ||
> - (pdp->prb[0] != 0 && strcmp(pdp->prb, psp.pps_prb) != 0)) {
> + psp->pps_prv) != 0) ||
> + (pdp->mod[0] != 0 && strcmp(pdp->mod, psp->pps_mod) != 0) ||
> + (pdp->fun[0] != 0 && strcmp(pdp->fun, psp->pps_fun) != 0) ||
> + (pdp->prb[0] != 0 && strcmp(pdp->prb, psp->pps_prb) != 0)) {
> dt_dprintf("%s:%s:%s:%s -> %s:%s:%s:%s: match failure\n",
> pdp->prv, pdp->mod, pdp->fun, pdp->prb,
> - psp.pps_prv, psp.pps_mod, psp.pps_fun,
> - psp.pps_prb);
> - goto next;
> + psp->pps_prv, psp->pps_mod, psp->pps_fun,
> + psp->pps_prb);
> + continue;
> }
> }
>
> @@ -744,11 +858,11 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
>
> dt_dprintf("providing %s:%s:%s:%s\n", pdp->prv, pdp->mod,
> pdp->fun, pdp->prb);
> - if (pvp->impl->provide_probe(dtp, &psp) < 0 && pdp) {
> + if (pvp->impl->provide_probe(dtp, psp) < 0 && pdp) {
> dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
> "failed to instantiate %sprobe %s for pid %d: %s",
> - is_enabled ? "is-enabled ": "", pdp->prb,
> - dpr->dpr_pid,
> + psp->pps_type == DTPPT_IS_ENABLED ?
> + "is-enabled ": "", pdp->prb, dpr->dpr_pid,
> dtrace_errmsg(dtp, dtrace_errno(dtp)));
> ret = -1;
> }
> @@ -759,15 +873,7 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
> */
> dt_pid_fix_mod(NULL, pdp, dtp, dpr->dpr_pid);
> }
> -
> - next:
> - free(eprv); free(emod); free(efun); free(eprb);
> - free(prv); free(mod); free(fun); free(prb);
> - free(inum_str);
> - free(spec);
> - continue;
> }
> - free(buf);
>
> return ret;
> }
> diff --git a/libdtrace/dt_pid.h b/libdtrace/dt_pid.h
> index 0576848128159..e06186eb458b3 100644
> --- a/libdtrace/dt_pid.h
> +++ b/libdtrace/dt_pid.h
> @@ -1,6 +1,6 @@
> /*
> * Oracle Linux DTrace.
> - * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
> * Licensed under the Universal Permissive License v 1.0 as shown at
> * http://oss.oracle.com/licenses/upl.
> */
> @@ -21,6 +21,7 @@ extern int dt_pid_create_probes(dtrace_probedesc_t *, dtrace_hdl_t *,
> extern int dt_pid_create_probes_module(dtrace_hdl_t *, dt_proc_t *);
> extern pid_t dt_pid_get_pid(const dtrace_probedesc_t *, dtrace_hdl_t *, dt_pcb_t *,
> dt_proc_t *);
> +extern void dt_pid_free_uprobespecs(dtrace_hdl_t *);
>
> #ifdef __cplusplus
> }
> --
> 2.39.1.268.g9de2f9a303
>
>
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list