[DTrace-devel] [PATCH 5/5] pid, uprobes: improve horrendously inefficient uprobe scanning

Nick Alcock nick.alcock at oracle.com
Mon Apr 24 19:08:16 UTC 2023


The uprobe scanning loop was meant to be an efficient loop that scanned
the set of uprobes only once.  Unfortunately, because it is invoked
repeatedly to create each pid's uprobes in turn, it ends up parsing
all the probes N times if you invoke dtrace with N distinct processes.
Since each such parse involves multiple rounds of memory allocation
and string manipulation to decode encoded probe names etc, this is
a definite waste of time. Worse yet, since the uprobe list can change
at any time, there is not even any guarantee that we're parsing the
same thing each time!

Fix this by having dt_pid_create_usdt_probes() call another function,
dt_pid_rescan_uprobes(), to pull in the list of known DTrace uprobes; it
does this only once and stores the parsed probespecs in a new list of
dt_pid_probespecs in the dtp.  The probespecs still change a bit on
each call to dt_pid_create_usdt_probes, because it stuffs a pid in
there, but everything derived from uprobe_events is parsed only
once and (in this implementation, currently) never changes thereafter,
no matter how many probes are created.

Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
---
 include/dtrace/pid.h |   6 +-
 libdtrace/dt_impl.h  |  11 +-
 libdtrace/dt_open.c  |   3 +
 libdtrace/dt_pid.c   | 286 +++++++++++++++++++++++++++++--------------
 libdtrace/dt_pid.h   |   3 +-
 5 files changed, 213 insertions(+), 96 deletions(-)

diff --git a/include/dtrace/pid.h b/include/dtrace/pid.h
index d200b7fe504b8..5e9668251dfd2 100644
--- a/include/dtrace/pid.h
+++ b/include/dtrace/pid.h
@@ -2,7 +2,7 @@
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  *
- * Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2023, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -28,9 +28,9 @@ typedef enum pid_probetype {
 
 typedef struct pid_probespec {
 	pid_probetype_t pps_type;		/* probe type */
-	const char *pps_prv;			/* provider (without pid) */
+	char *pps_prv;				/* provider (without pid) */
 	char *pps_mod;				/* probe module (object) */
-	char pps_fun[DTRACE_FUNCNAMELEN];	/* probe function */
+	char *pps_fun;				/* probe function */
 	char *pps_prb;				/* probe name (if provided) */
 	dev_t pps_dev;				/* object device node */
 	ino_t pps_inum;				/* object inode */
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index e240cef074162..581bb8a644e7c 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -63,7 +63,7 @@ struct dt_pfdict;		/* see <dt_printf.h> */
 struct dt_arg;			/* see below */
 struct dt_provider;		/* see <dt_provider.h> */
 struct dt_probe;		/* see <dt_probe.h> */
-struct dt_probe;		/* see <dt_probe.h> */
+struct pid_probespec;		/* see <pid.h> */
 struct dt_pebset;		/* see <dt_peb.h> */
 struct dt_xlator;		/* see <dt_xlator.h> */
 
@@ -312,9 +312,16 @@ struct dtrace_hdl {
 	 * Array of all known probes, to facilitate probe lookup by probe id.
 	 */
 	struct dt_probe **dt_probes; /* array of probes */
-	uint32_t dt_probes_sz;	/* size of array of probes */
+	size_t dt_probes_sz;	/* size of array of probes */
 	uint32_t dt_probe_id;	/* next available probe id */
 
+	/*
+	 * uprobes potentially of interest: some may be instantiated as
+	 * dtrace probes.
+	 */
+	struct pid_probespec *dt_uprobespecs;
+	size_t dt_uprobespecs_sz; /* size of array of uprobes */
+
 	struct dt_probe *dt_error; /* ERROR probe */
 
 	dt_htab_t *dt_provs;	/* hash table of dt_provider_t's */
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index 4818f61df2a69..f0dc2575377f2 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -39,6 +39,7 @@
 #include <dt_probe.h>
 #include <dt_dis.h>
 #include <dt_peb.h>
+#include <dt_pid.h>
 
 const dt_version_t _dtrace_versions[] = {
 	DT_VERS_1_0,	/* D API 1.0.0 (PSARC 2001/466) Solaris 10 FCS */
@@ -1270,6 +1271,8 @@ dtrace_close(dtrace_hdl_t *dtp)
 	dt_pfdict_destroy(dtp);
 	dt_dof_fini(dtp);
 	dt_probe_fini(dtp);
+	dt_pid_free_uprobespecs(dtp);
+
 	/*
 	 * FIXME:
 	 * add some dt_prov_fini() to iterate over providers and call provider-specific fini()'s
diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
index 4be230ace1bf8..319441c0bff69 100644
--- a/libdtrace/dt_pid.c
+++ b/libdtrace/dt_pid.c
@@ -27,6 +27,9 @@
 #include <dt_pid.h>
 #include <dt_string.h>
 
+/*
+ * Information on a PID or USDT probe.
+ */
 typedef struct dt_pid_probe {
 	dtrace_hdl_t *dpp_dtp;
 	dt_pcb_t *dpp_pcb;
@@ -95,6 +98,25 @@ dt_pid_error(dtrace_hdl_t *dtp, dt_pcb_t *pcb, dt_proc_t *dpr,
 	return 1;
 }
 
+void
+dt_pid_free_uprobespecs(dtrace_hdl_t *dtp)
+{
+	size_t i;
+
+	if (!dtp->dt_uprobespecs)
+		return;
+
+	for (i = 0; i < dtp->dt_uprobespecs_sz; i++) {
+		free(dtp->dt_uprobespecs[i].pps_prv);
+		free(dtp->dt_uprobespecs[i].pps_mod);
+		free(dtp->dt_uprobespecs[i].pps_fun);
+		free(dtp->dt_uprobespecs[i].pps_prb);
+	}
+
+	free(dtp->dt_uprobespecs);
+	dtp->dt_uprobespecs = NULL;
+}
+
 static int
 dt_pid_create_fbt_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     pid_probespec_t *psp, const GElf_Sym *symp, pid_probetype_t type)
@@ -176,7 +198,7 @@ dt_pid_per_sym(dt_pid_probe_t *pp, const GElf_Sym *symp, const char *func)
 	psp->pps_inum = pp->dpp_inum;
 	psp->pps_fn = strdup(pp->dpp_fname);
 	psp->pps_vaddr = pp->dpp_vaddr;
-	strcpy_safe(psp->pps_fun, sizeof(psp->pps_fun), func);
+	psp->pps_fun = (char *) func;
 
 	if (!isdash && gmatch("return", pp->dpp_name)) {
 		if (dt_pid_create_fbt_probe(pp->dpp_pr, dtp, psp, symp,
@@ -571,35 +593,28 @@ dt_pid_create_pid_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp,
 }
 
 /*
- * Rescan the PID uprobe list and create suitable underlying probes.
- *
- * If dpr is set, just set up probes relating to mappings found in that one
- * process.  (dpr must in this case be locked.)
+ * Rescan the uprobe list and remember its contents.
  *
- * If pdp is set, create overlying USDT probes for the specified probe
- * description.
- *
- * Return 0 on success or -1 on error.  (Failure to create specific underlying
- * probes is not an error.)
+ * This avoids us having to rescan the whole thing every time we create every
+ * single probe in turn.
  */
 static int
-dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
-			  dtrace_probedesc_t *pdp, dt_pcb_t *pcb)
+dt_pid_rescan_uprobes(dtrace_hdl_t *dtp, dt_pcb_t *pcb)
 {
-	const dt_provider_t *pvp;
+	typedef struct uprobe_line
+	{
+		dt_list_t list;
+		char *line;
+		int is_enabled;
+	} uprobe_line_t;
+	dt_list_t lines = {0};
+	uprobe_line_t *linep, *old_linep;
+	size_t i = 0;
+	int ret = 0;
+
 	FILE *f;
 	char *buf = NULL;
 	size_t sz;
-	int ret = 0;
-
-	pvp = dtp->dt_prov_usdt;
-	if (!pvp) {
-		pvp = dt_provider_lookup(dtp, "usdt");
-		assert(pvp != NULL);
-		dtp->dt_prov_usdt = pvp;
-	}
-
-	assert(pvp->impl != NULL && pvp->impl->provide_probe != NULL);
 
 	f = fopen(TRACEFS "uprobe_events", "r");
 	if (!f) {
@@ -609,76 +624,95 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
 	}
 
 	/*
-	 * Systemwide probing: not yet implemented.
+	 * We are only interested in pid uprobes, not any other uprobes that may
+	 * exist.  Some of these may be for pid probes, some for usdt: we keep
+	 * track of all of them regardless.
+	 *
+	 * Suck in the list of uprobes in one go, since we need to run over it
+	 * twice (once to count pids and allocate space, once to populate them)
+	 * and it might change between reads.
 	 */
-	assert(dpr != NULL);
 
-	dt_dprintf("Scanning for usdt probes matching %i\n", dpr->dpr_pid);
+#define UPROBE_PREFIX "p:dt_pid/p_"
+#define UPROBE_IS_ENABLED_PREFIX "p:dt_pid_is_enabled/p_"
+
+	while (getline(&buf, &sz, f) >= 0) {
+		uprobe_line_t *line;
+		int is_enabled;
+
+		if (strncmp(buf, UPROBE_PREFIX,
+			strlen(UPROBE_PREFIX)) == 0)
+			is_enabled = 0;
+		else if (strncmp(buf, UPROBE_IS_ENABLED_PREFIX,
+			strlen(UPROBE_IS_ENABLED_PREFIX)) == 0)
+			is_enabled = 1;
+		else
+			continue;
+
+		line = dt_zalloc(dtp, sizeof (struct uprobe_line));
+		if (!line) {
+			fclose(f);
+			goto err; 		/* errno is set for us. */
+		}
+
+		line->line = buf;
+		line->is_enabled = is_enabled;
+		dt_list_append(&lines, line);
+		sz = 0;
+		dtp->dt_uprobespecs_sz++;
+		buf = NULL;
+	}
+	fclose(f);
+
+	dtp->dt_uprobespecs = dt_calloc(dtp, dtp->dt_uprobespecs_sz,
+	    sizeof(pid_probespec_t));
+	if (!dtp->dt_uprobespecs)
+		goto err;			/* errno is set for us.  */
 
 	/*
-	 * We are only interested in pid uprobes, not any other uprobes that may
-	 * exist.  Some of these may be for pid probes, some for usdt: we create
-	 * underlying probes for all of them, except that we may only be
-	 * interested in probes belonging to mappings in a particular process,
-	 * in which case we create probes for that process only.
+	 * Now we know how many specs exist, parse and create them.
 	 */
-	while (getline(&buf, &sz, f) >= 0) {
-		dev_t dev;
-		ino_t inum;
+	for (linep = dt_list_next(&lines); linep != NULL;
+	     linep = dt_list_next(linep)) {
 		uint64_t off;
-		int is_enabled;
 		const char *fmt;
-		unsigned long long dev_ll, inum_ll;
-		char *inum_str = NULL;
+		unsigned long long dev, inum;
 		char *spec = NULL;
 		char *eprv = NULL, *emod = NULL, *efun = NULL, *eprb = NULL;
 		char *prv = NULL, *mod = NULL, *fun = NULL, *prb = NULL;
-		pid_probespec_t psp;
+		pid_probespec_t *psp;
 
-#define UPROBE_PREFIX "p:dt_pid/p_"
-#define UPROBE_IS_ENABLED_PREFIX "p:dt_pid_is_enabled/p_"
 #define UPROBE_PROBE_FMT "%llx_%llx_%lx %ms P%m[^= ]=\\1 M%m[^= ]=\\2 F%m[^= ]=\\3 N%m[^= ]=\\4"
 #define UPROBE_FMT UPROBE_PREFIX UPROBE_PROBE_FMT
 #define UPROBE_IS_ENABLED_FMT UPROBE_IS_ENABLED_PREFIX UPROBE_PROBE_FMT
 
-		if (strncmp(buf, UPROBE_PREFIX, strlen(UPROBE_PREFIX)) == 0) {
-			is_enabled = 0;
+		if (!linep->is_enabled)
 			fmt = UPROBE_FMT;
-		} else if (strncmp(buf, UPROBE_IS_ENABLED_PREFIX,
-			strlen(UPROBE_IS_ENABLED_PREFIX)) == 0) {
-			is_enabled = 1;
+		else
 			fmt = UPROBE_IS_ENABLED_FMT;
-		}
-		else /* Not a DTrace uprobe. */
-			continue;
 
-		switch (sscanf(buf, fmt, &dev_ll, &inum_ll,
+		switch (sscanf(linep->line, fmt, &dev, &inum,
 			       &off, &spec, &eprv, &emod, &efun, &eprb)) {
 		case 8: /* Includes dtrace probe names: decode them. */
 			prv = uprobe_decode_name(eprv);
 			mod = uprobe_decode_name(emod);
 			fun = uprobe_decode_name(efun);
 			prb = uprobe_decode_name(eprb);
-			dev = dev_ll;
-			inum = inum_ll;
 			break;
 		case 4: /* No dtrace probe name - not a USDT probe. */
 			goto next;
 		default:
-			if ((strlen(buf) > 0) && (buf[strlen(buf)-1] == '\n'))
-				buf[strlen(buf)-1] = 0;
-			dt_dprintf("Cannot parse %s as a DTrace uprobe name\n", buf);
+			if ((strlen(linep->line) > 0) &&
+			    (linep->line[strlen(linep->line)-1] == '\n'))
+				linep->line[strlen(linep->line)-1] = 0;
+			dt_dprintf("Cannot parse %s as a DTrace uprobe name\n",
+			    linep->line);
+			dtp->dt_uprobespecs_sz--;
 			goto next;
 		}
 
-		if (asprintf(&inum_str, "%llx", inum_ll) < 0)
-			goto next;
-
-		/*
-		 * Make the underlying probe, if not already present.
-		 */
-		memset(&psp, 0, sizeof(pid_probespec_t));
-		psp.pps_type = is_enabled ? DTPPT_IS_ENABLED : DTPPT_OFFSETS;
+		psp = &dtp->dt_uprobespecs[i++];
+		psp->pps_type = linep->is_enabled ? DTPPT_IS_ENABLED : DTPPT_OFFSETS;
 
 		/*
 		 * These components are only used for creation of an underlying
@@ -686,26 +720,106 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
 		 * not explicitly listed in the D program, which will never be
 		 * enabled.  In future this may change.
 		 */
-		psp.pps_prv = prv;
-		psp.pps_mod = mod;
-		strcpy_safe(psp.pps_fun, sizeof(psp.pps_fun), fun);
-		psp.pps_prb = prb;
+		psp->pps_prv = prv;
+		psp->pps_mod = mod;
+		psp->pps_fun = fun;
+		psp->pps_prb = prb;
 
 		/*
 		 * Always used.
 		 */
-		psp.pps_dev = dev;
-		psp.pps_inum = inum;
-		psp.pps_off = off;
+		psp->pps_dev = dev;
+		psp->pps_inum = inum;
+		psp->pps_off = off;
+	next:
+		free(eprv); free(emod); free(efun); free(eprb);
+		free(spec);
+	}
+
+	goto out;
+
+err:
+	ret = -1;
+
+out:
+	old_linep = NULL;
+
+	for (linep = dt_list_next(&lines); linep != NULL;
+	     linep = dt_list_next(linep)) {
+		free(linep->line);
+		free(old_linep);
+		old_linep = linep;
+	}
+	free(old_linep);
+
+	return ret;
+}
+
+/*
+ * Rescan the PID uprobe list and create suitable underlying probes.
+ *
+ * If dpr is set, just set up probes relating to mappings found in that one
+ * process.  (dpr must in this case be locked.)
+ *
+ * If pdp is set, create overlying USDT probes for the specified probe
+ * description.
+ *
+ * Return 0 on success or -1 on error.  (Failure to create specific underlying
+ * probes is not an error.)
+ */
+static int
+dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
+			  dtrace_probedesc_t *pdp, dt_pcb_t *pcb)
+{
+	const dt_provider_t *pvp;
+	size_t i;
+	int ret = 0;
+
+	/*
+	 * Systemwide probing: not yet implemented.
+	 */
+	assert(dpr != NULL);
+
+	dt_dprintf("Scanning for usdt probes matching %i\n", dpr->dpr_pid);
+
+	/*
+	 * For now, we only read the list of probes once.  In time we will
+	 * reread it whenever necessary.
+	 */
+	pvp = dtp->dt_prov_usdt;
+	if (!pvp) {
+		pvp = dt_provider_lookup(dtp, "usdt");
+		assert(pvp != NULL);
+		dtp->dt_prov_usdt = pvp;
+		if (dt_pid_rescan_uprobes(dtp, pcb) < 0)
+			return -1;		/* errno is set for us.  */
+	}
+
+	assert(pvp->impl != NULL && pvp->impl->provide_probe != NULL);
+
+	/*
+	 * We are only interested in pid uprobes, not any other uprobes that may
+	 * exist.  Some of these may be for pid probes, some for usdt: we create
+	 * underlying probes for all of them, if we are interested in creating
+	 * mappings for that process at all.
+	 */
+	for (i = 0; i < dtp->dt_uprobespecs_sz; i++) {
+		pid_probespec_t *psp = &dtp->dt_uprobespecs[i];
 
 		/*
 		 * Filter out probes not related to the process of interest.
 		 */
 		if (dpr && dpr->dpr_proc) {
 			assert(MUTEX_HELD(&dpr->dpr_lock));
-			if (Pinode_to_file_map(dpr->dpr_proc, dev, inum) == NULL)
-				goto next;
-			psp.pps_pid = Pgetpid(dpr->dpr_proc);
+			if (Pinode_to_file_map(dpr->dpr_proc, psp->pps_dev,
+				psp->pps_inum) == NULL)
+				continue;
+
+			/*
+			 * This is overwritten repeatedly with each relevant PID
+			 * in turn.
+			 */
+			psp->pps_pid = Pgetpid(dpr->dpr_proc);
 		}
 
 		/*
@@ -722,15 +836,15 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
 
 			if ((pdp->prv[0] != 0 &&
 			     strcmp(dt_pid_de_pid(de_pid_prov, pdp->prv),
-				    psp.pps_prv) != 0) ||
-			    (pdp->mod[0] != 0 && strcmp(pdp->mod, psp.pps_mod) != 0) ||
-			    (pdp->fun[0] != 0 && strcmp(pdp->fun, psp.pps_fun) != 0) ||
-			    (pdp->prb[0] != 0 && strcmp(pdp->prb, psp.pps_prb) != 0)) {
+				    psp->pps_prv) != 0) ||
+			    (pdp->mod[0] != 0 && strcmp(pdp->mod, psp->pps_mod) != 0) ||
+			    (pdp->fun[0] != 0 && strcmp(pdp->fun, psp->pps_fun) != 0) ||
+			    (pdp->prb[0] != 0 && strcmp(pdp->prb, psp->pps_prb) != 0)) {
 				dt_dprintf("%s:%s:%s:%s -> %s:%s:%s:%s: match failure\n",
 					   pdp->prv, pdp->mod, pdp->fun, pdp->prb,
-					   psp.pps_prv, psp.pps_mod, psp.pps_fun,
-					   psp.pps_prb);
-				goto next;
+					   psp->pps_prv, psp->pps_mod, psp->pps_fun,
+					   psp->pps_prb);
+				continue;
 			}
 		}
 
@@ -744,11 +858,11 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
 
 		dt_dprintf("providing %s:%s:%s:%s\n", pdp->prv, pdp->mod,
 			   pdp->fun, pdp->prb);
-		if (pvp->impl->provide_probe(dtp, &psp) < 0 && pdp) {
+		if (pvp->impl->provide_probe(dtp, psp) < 0 && pdp) {
 			dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
 				     "failed to instantiate %sprobe %s for pid %d: %s",
-				     is_enabled ? "is-enabled ": "", pdp->prb,
-				     dpr->dpr_pid,
+				     psp->pps_type == DTPPT_IS_ENABLED ?
+			    	     "is-enabled ": "", pdp->prb, dpr->dpr_pid,
 				     dtrace_errmsg(dtp, dtrace_errno(dtp)));
 			ret = -1;
 		}
@@ -759,15 +873,7 @@ dt_pid_create_usdt_probes(dtrace_hdl_t *dtp, dt_proc_t *dpr,
 			 */
 			dt_pid_fix_mod(NULL, pdp, dtp, dpr->dpr_pid);
 		}
-
-	next:
-		free(eprv); free(emod); free(efun); free(eprb);
-		free(prv);  free(mod);  free(fun);  free(prb);
-		free(inum_str);
-		free(spec);
-		continue;
 	}
-	free(buf);
 
 	return ret;
 }
diff --git a/libdtrace/dt_pid.h b/libdtrace/dt_pid.h
index 0576848128159..e06186eb458b3 100644
--- a/libdtrace/dt_pid.h
+++ b/libdtrace/dt_pid.h
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -21,6 +21,7 @@ extern int dt_pid_create_probes(dtrace_probedesc_t *, dtrace_hdl_t *,
 extern int dt_pid_create_probes_module(dtrace_hdl_t *, dt_proc_t *);
 extern pid_t dt_pid_get_pid(const dtrace_probedesc_t *, dtrace_hdl_t *, dt_pcb_t *,
 			    dt_proc_t *);
+extern void dt_pid_free_uprobespecs(dtrace_hdl_t *);
 
 #ifdef	__cplusplus
 }
-- 
2.39.1.268.g9de2f9a303




More information about the DTrace-devel mailing list