[DTrace-devel] [PATCH 44/47] Fix creation of FBT return probes

Kris Van Hees kris.van.hees at oracle.com
Sun May 3 20:18:22 PDT 2020


The FBT provider was looking for a EVENTSFS/kprobes/<fun>/format to
determine the event id for the probe.  If this file did not exist, a
new kprobe or kretprobe (entry vs return) would be created and we
would try to read that file again.  Eugene Loh flagged the problem
in the implementation: both entry and return probe for a given
function were retrieving the id for the same kprobe event, as given
by the file pattern shown above.

Since a given function can (and often will) have an entry probe and
a return probe associated with it, they must be created as distinct
events in the kernel tracing infrastructure.

Dynamically created probes like kprobes, kretprobes, and uprobes
(used for the BEGIN and END probes) are grouped based on the dtrace
instance that creates them (pid), provider name, and an optional
extra distinction.  The BEGIN and END probes are created under the
dt_<pid>_dtrace group, whereas FBT entry probes are created under
the dt_<pid>_fbt_entry group, etc.

The dtrace provider probe creation and removal have been updated in
favour of a more generic implementation that is shared with the FBT
provider.

The SDT provider has been updated to filter out any probes from our
groups.

Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 libdtrace/dt_prov_dtrace.c |  14 ++---
 libdtrace/dt_prov_fbt.c    | 103 +++++++++++++++++++++++--------------
 libdtrace/dt_prov_sdt.c    |  40 ++++++++------
 libdtrace/dt_provider.h    |  10 +++-
 4 files changed, 103 insertions(+), 64 deletions(-)

diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
index ad4c7fac..b7fb4ded 100644
--- a/libdtrace/dt_prov_dtrace.c
+++ b/libdtrace/dt_prov_dtrace.c
@@ -290,20 +290,20 @@ static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
 	if (fd == -1)
 		goto out;
 
-	rc = dprintf(fd, "p:" DTRACE_PAT "/%s %s\n",
-		     getpid(), prp->desc->prb, spec);
+	rc = dprintf(fd, "p:" GROUP_FMT "/%s %s\n",
+		     GROUP_DATA, prp->desc->prb, spec);
 	close(fd);
 	if (rc == -1)
 		goto out;
 
-	len = snprintf(NULL, 0, "%s" DTRACE_PAT "/%s/format",
-		       EVENTSFS, getpid(), prp->desc->prb) + 1;
+	len = snprintf(NULL, 0, "%s" GROUP_FMT "/%s/format",
+		       EVENTSFS, GROUP_DATA, prp->desc->prb) + 1;
 	fn = dt_alloc(dtp, len);
 	if (fn == NULL)
 		goto out;
 
-	snprintf(fn, len, "%s" DTRACE_PAT "/%s/format",
-		 EVENTSFS, getpid(), prp->desc->prb);
+	snprintf(fn, len, "%s" GROUP_FMT "/%s/format",
+		 EVENTSFS, GROUP_DATA, prp->desc->prb);
 	f = fopen(fn, "r");
 	if (f == NULL)
 		goto out;
@@ -342,7 +342,7 @@ static int probe_fini(dtrace_hdl_t *dtp, dt_probe_t *prp)
 	if (fd == -1)
 		return -1;
 
-	dprintf(fd, "-:" DTRACE_PAT "/%s\n", getpid(), prp->desc->prb);
+	dprintf(fd, "-:" GROUP_FMT "/%s\n", GROUP_DATA, prp->desc->prb);
 	close(fd);
 
 	return 0;
diff --git a/libdtrace/dt_prov_fbt.c b/libdtrace/dt_prov_fbt.c
index fa7e28bf..2cec5146 100644
--- a/libdtrace/dt_prov_fbt.c
+++ b/libdtrace/dt_prov_fbt.c
@@ -43,7 +43,8 @@ static const char		modname[] = "vmlinux";
 #define KPROBE_EVENTS		TRACEFS "kprobe_events"
 #define PROBE_LIST		TRACEFS "available_filter_functions"
 
-#define KPROBESFS		EVENTSFS "kprobes/"
+#define FBT_GROUP_FMT		GROUP_FMT "_%s"
+#define FBT_GROUP_DATA		GROUP_DATA, prp->desc->prb
 
 static const dtrace_pattr_t	pattr = {
 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
@@ -310,60 +311,81 @@ static void trampoline(dt_pcb_t *pcb, int haspred)
 static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp,
 		      int *idp, int *argcp, dt_argdesc_t **argvp)
 {
+	int	fd;
+	char	*fn;
+	size_t	len;
 	FILE	*f;
-	char	fn[256];
-	int	rc = 0;
+	int	rc = -1;
 
 	*idp = -1;
-
-	strcpy(fn, KPROBESFS);
-	strcat(fn, prp->desc->fun);
-	strcat(fn, "/format");
+	*argcp = 0;			/* no arguments by default */
+	*argvp = NULL;
 
 	/*
-	 * We check to see if the kprobe event already exists in the tracing
-	 * sub-system.  If not, we try to register the probe with the tracing
-	 * sub-system, and try accessing it again.
+	 * Register the kprobe with the tracing subsystem.  This will
+	 * create a tracepoint event.
 	 */
-again:
+	fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
+	if (fd == -1)
+		return -1;
+
+	rc = dprintf(fd, "%c:" FBT_GROUP_FMT "/%s %s\n",
+		     prp->desc->prb[0] == 'e' ? 'p' : 'r', FBT_GROUP_DATA,
+		     prp->desc->fun, prp->desc->fun);
+	close(fd);
+	if (rc == -1)
+		return -1;
+
+	len = snprintf(NULL, 0, "%s" FBT_GROUP_FMT "/%s/format", EVENTSFS,
+		       FBT_GROUP_DATA, prp->desc->fun) + 1;
+	fn = dt_alloc(dtp, len);
+	if (fn == NULL)
+		goto out;
+
+	snprintf(fn, len, "%s" FBT_GROUP_FMT "/%s/format", EVENTSFS,
+		 FBT_GROUP_DATA, prp->desc->fun);
 	f = fopen(fn, "r");
-	if (f == NULL) {
-		int	fd;
-		char	c = 'p';
-
-		if (rc)
-			goto out;
+	if (f == NULL)
+		goto out;
 
-		rc = -ENOENT;
+	rc = tp_event_info(dtp, f, 0, idp, NULL, NULL);
+	fclose(f);
 
-		/*
-		 * The probe name component is either "entry" or "return" for
-		 * FBT probes.
-		 */
-		if (prp->desc->prb[0] == 'r')
-			c = 'r';
+out:
+	dt_free(dtp, fn);
 
-		/*
-		 * Register the kprobe with the tracing subsystem.  This will
-		 * create a tracepoint event.
-		 */
-		fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
-		if (fd == -1)
-			goto out;
+	return rc;
+}
 
-		dprintf(fd, "%c:%s %s\n", c, prp->desc->fun, prp->desc->fun);
-		close(fd);
+/*
+ * Try to clean up system resources that may have been allocated for this
+ * probe.
+ *
+ * If there is an event FD, we close it.
+ *
+ * We also try to remove any uprobe that may have been created for the probe.
+ * This is harmless for probes that didn't get created.  If the removal fails
+ * for some reason we are out of luck - fortunately it is not harmful to the
+ * system as a whole.
+ */
+static int probe_fini(dtrace_hdl_t *dtp, dt_probe_t *prp)
+{
+	int	fd;
 
-		goto again;
+	if (prp->event_fd != -1) {
+		close(prp->event_fd);
+		prp->event_fd = -1;
 	}
 
-	*argcp = 0;
-	*argvp = NULL;
-	rc = tp_event_info(dtp, f, 0, idp, NULL, NULL);
-	fclose(f);
+	fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
+	if (fd == -1)
+		return -1;
 
-out:
-	return rc;
+	dprintf(fd, "-:" FBT_GROUP_FMT "/%s\n", FBT_GROUP_DATA,
+		prp->desc->fun);
+	close(fd);
+
+	return 0;
 }
 
 dt_provimpl_t	dt_fbt = {
@@ -372,4 +394,5 @@ dt_provimpl_t	dt_fbt = {
 	.populate	= &populate,
 	.trampoline	= &trampoline,
 	.probe_info	= &probe_info,
+	.probe_fini	= &probe_fini,
 };
diff --git a/libdtrace/dt_prov_sdt.c b/libdtrace/dt_prov_sdt.c
index 6ea905ad..bc9642f8 100644
--- a/libdtrace/dt_prov_sdt.c
+++ b/libdtrace/dt_prov_sdt.c
@@ -36,9 +36,9 @@ static const char		modname[] = "vmlinux";
 
 #define PROBE_LIST		TRACEFS "available_events"
 
-#define KPROBES			"kprobes:"
-#define SYSCALLS		"syscalls:"
-#define UPROBES			"uprobes:"
+#define KPROBES			"kprobes"
+#define SYSCALLS		"syscalls"
+#define UPROBES			"uprobes"
 
 /*
  * All tracing events (tracepoints) include a number of fields that we need to
@@ -247,29 +247,37 @@ static int populate(dtrace_hdl_t *dtp)
 
 	while (fgets(buf, sizeof(buf), f)) {
 		int	dummy;
+		char	str[sizeof(buf)];
 
 		p = strchr(buf, '\n');
 		if (p)
 			*p = '\0';
 
 		p = strchr(buf, ':');
-		if (p == NULL) {
-			if (dt_probe_insert(dtp, prv, prvname, modname, "",
-					    buf))
-				n++;
-		} else if (sscanf(buf, DTRACE_PAT ":", &dummy) == 1) {
-			continue;
-		} else if (memcmp(buf, KPROBES, sizeof(KPROBES) - 1) == 0) {
-			continue;
-		} else if (memcmp(buf, SYSCALLS, sizeof(SYSCALLS) - 1) == 0) {
-			continue;
-		} else if (memcmp(buf, UPROBES, sizeof(UPROBES) - 1) == 0) {
-			continue;
-		} else {
+		if (p != NULL) {
+			size_t	len;
+
 			*p++ = '\0';
+			len = strlen(buf);
+
+			if (sscanf(buf, GROUP_FMT, &dummy, str) == 2)
+				continue;
+			else if (len == strlen(KPROBES) &&
+				 strcmp(buf, KPROBES) == 0)
+				continue;
+			else if (len == strlen(SYSCALLS) &&
+				 strcmp(buf, SYSCALLS) == 0)
+				continue;
+			else if (len == strlen(UPROBES) &&
+				 strcmp(buf, UPROBES) == 0)
+				continue;
 
 			if (dt_probe_insert(dtp, prv, prvname, buf, "", p))
 				n++;
+		} else {
+			if (dt_probe_insert(dtp, prv, prvname, modname, "",
+					    buf))
+				n++;
 		}
 	}
 
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index 07ac174a..95069a13 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -19,7 +19,15 @@ extern "C" {
 #define TRACEFS		"/sys/kernel/debug/tracing/"
 #define EVENTSFS	TRACEFS "events/"
 
-#define DTRACE_PAT	"dtrace_%d"
+/*
+ * Tracepoint group naming format for DTrace providers.  Providers may append
+ * to this format string as needed.
+ *
+ * GROUP_DATA provides the necessary data items to populate the format string
+ * (PID of the dtrace process and the provider name).
+ */
+#define GROUP_FMT	"dt_%d_%s"
+#define GROUP_DATA	getpid(), prvname
 
 struct dt_probe;
 
-- 
2.26.0




More information about the DTrace-devel mailing list