[DTrace-devel] [PATCH 16/26] usdt: daemon

Kris Van Hees kris.van.hees at oracle.com
Mon Oct 24 04:36:53 UTC 2022


This commit adds a daemon, "dtprobed", which usually runs at boot
(monitored by systemd, if possible), providing /dev/dtrace/helper using
CUSE, accepting DOF from processes doing the usual DTrace ioctl()s to
that device, and using infrastructure in prior commits in this series to
pass the DOF to a child jailed with seccomp() for parsing, and accepting
structures containing parsed results back, then emitting uprobes from
these results before allowing the ioctl()ing.  The uprobes created have
stereotyped names and argument lists that include an encoded
representation of the name of the corresponding DTrace USDT probe.  (The
name also contains the address and a number of other things, so that
probes that appear in multiple places in a process still work.)

(The CUSE device is an "unrestricted ioctl" device, which restricts
dtprobed to running only as root, because the ioctl has to pull data --
the DOF -- out of arbitrary places in the client memory according to the
passed-in structure. Since you need to be root to create uprobes at all
this is not any kind of restriction.)

Extra complexity is provided by changes to libfuse.  Recent (> 2018)
libfuse has a nice logging interface, which if available means that
libfuse will log FUSE-side problems into syslog or anywhere else of your
choosing: we emit into syslog if -d or -F (debug, foreground) are not
specified and systemd is not in use (if systemd is in use, we never
daemonize at all).  But older libfuse does not provide this, and
unfortunately OL8 (but not OL7!) has such an older libfuse.  So we add a
compatibility wrapper providing a minimal reimplementation of the
logging interface if built against such an old libfuse.  The wrapper is
named like the header and source file implementing the real thing in
FUSE itself, but prepended with rpl_ to avoid the possibility of
dependencies on header file ordering when the new FUSE code is present.

Testing-wise, in installed mode the running dtprobed daemon is used, but
in in-source-tree mode runtest.sh kicks off a new dtprobed emitting to a
local device file named /dev/dtrace/test-$pid, and uses a new
testing-only environment variable to force drti.c to emit its ioctl()s
to that temporary device.  So in-tree testing always tests the actual
dtprobed in the tree (with any bugfixes), not the system copy.

Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
---
 GNUmakefile                 |   6 +-
 Makeconfig                  |   2 +
 dtprobed/60-dtprobed.rules  |   4 +
 dtprobed/Build              |  41 +++
 dtprobed/GNUmakefile        |   5 +
 dtprobed/dtprobed.c         | 647 ++++++++++++++++++++++++++++++++++++
 dtprobed/dtprobed.service   |  18 +
 dtprobed/dtrace-usdt.target |   8 +
 dtprobed/rpl_fuse_log.c     |  33 ++
 dtprobed/rpl_fuse_log.h     |  43 +++
 dtrace.spec                 |   6 +-
 runtest.sh                  |   6 +
 12 files changed, 815 insertions(+), 4 deletions(-)
 create mode 100644 dtprobed/60-dtprobed.rules
 create mode 100644 dtprobed/Build
 create mode 100644 dtprobed/GNUmakefile
 create mode 100644 dtprobed/dtprobed.c
 create mode 100644 dtprobed/dtprobed.service
 create mode 100644 dtprobed/dtrace-usdt.target
 create mode 100644 dtprobed/rpl_fuse_log.c
 create mode 100644 dtprobed/rpl_fuse_log.h

diff --git a/GNUmakefile b/GNUmakefile
index 805cf29a..fb587beb 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -3,7 +3,7 @@
 # Build files in subdirectories are included by this file.
 #
 # Oracle Linux DTrace.
-# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 
@@ -85,6 +85,10 @@ INCLUDEDIR := $(prefix)/include
 INSTINCLUDEDIR := $(DESTDIR)$(INCLUDEDIR)
 SBINDIR := $(prefix)/sbin
 INSTSBINDIR := $(DESTDIR)$(SBINDIR)
+UDEVDIR := $(prefix)/lib/udev/rules.d
+INSTUDEVDIR := $(DESTDIR)$(UDEVDIR)
+SYSTEMDUNITDIR := $(prefix)/lib/systemd/system
+INSTSYSTEMDUNITDIR := $(DESTDIR)$(SYSTEMDUNITDIR)
 DOCDIR := $(prefix)/share/doc/dtrace-$(VERSION)
 INSTDOCDIR := $(DESTDIR)$(DOCDIR)
 MANDIR := $(prefix)/share/man/man1
diff --git a/Makeconfig b/Makeconfig
index 52d72661..cc20ef4c 100644
--- a/Makeconfig
+++ b/Makeconfig
@@ -72,4 +72,6 @@ $(eval $(call check-symbol-rule,ELF_GETSHDRSTRNDX,elf_getshdrstrndx,elf))
 $(eval $(call check-symbol-rule,LIBCTF,ctf_open,ctf))
 $(eval $(call check-symbol-rule,STRRSTR,strrstr,c))
 $(eval $(call check-symbol-rule,WAITFD,waitfd,c))
+$(eval $(call check-symbol-rule,LIBSYSTEMD,sd_notify,systemd))
+$(eval $(call check-symbol-rule,FUSE_LOG,fuse_set_log_func,fuse3))
 $(eval $(call check-header-symbol-rule,CLOSE_RANGE,close_range(3,~0U,0),c,unistd))
diff --git a/dtprobed/60-dtprobed.rules b/dtprobed/60-dtprobed.rules
new file mode 100644
index 00000000..e0ec7a7c
--- /dev/null
+++ b/dtprobed/60-dtprobed.rules
@@ -0,0 +1,4 @@
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+KERNEL=="dtrace/helper", MODE="0666"
diff --git a/dtprobed/Build b/dtprobed/Build
new file mode 100644
index 00000000..b5254417
--- /dev/null
+++ b/dtprobed/Build
@@ -0,0 +1,41 @@
+# Oracle Linux DTrace.
+# Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+CMDS += dtprobed
+
+dtprobed_DIR := $(current-dir)
+dtprobed_TARGET = dtprobed
+dtprobed_CPPFLAGS := -I. -Idtprobed -Ilibproc -Ilibcommon -Ilibport
+dtprobed_CFLAGS := $(shell pkg-config --cflags fuse3)
+dtprobed_LIBS := -lcommon -lproc -lcommon -lport -lelf $(shell pkg-config --libs fuse3)
+dtprobed_DEPS := libproc.a libcommon.a libport.a
+dtprobed_SOURCES := dtprobed.c
+dtprobed_LIBSOURCES := libproc libcommon
+
+ifdef HAVE_LIBSYSTEMD
+dtprobed_CFLAGS += $(shell pkg-config --cflags libsystemd)
+dtprobed_LIBS += $(shell pkg-config --libs libsystemd)
+endif
+
+ifndef HAVE_FUSE_LOG
+dtprobed_SOURCES += rpl_fuse_log.c
+endif
+
+dtprobed.c_CFLAGS := -Wno-pedantic
+
+install::
+	mkdir -p $(INSTSBINDIR)
+	$(call describe-install-target,$(INSTSBINDIR),dtprobed)
+	install -m 755 $(objdir)/dtprobed $(INSTSBINDIR)
+	mkdir -p $(INSTUDEVDIR)
+	$(call describe-install-target,$(INSTUDEVDIR),60-dtprobed.rules)
+	install -m 644 $(dtprobed_DIR)60-dtprobed.rules $(INSTUDEVDIR)
+ifdef HAVE_LIBSYSTEMD
+	mkdir -p $(INSTSYSTEMDUNITDIR)
+	$(call describe-install-target,$(INSTSYSTEMDUNITDIR),dtprobed.service)
+	install -m 644 $(dtprobed_DIR)dtprobed.service $(INSTSYSTEMDUNITDIR)
+	$(call describe-install-target,$(INSTSYSTEMDUNITDIR),dtrace-usdt.target)
+	install -m 644 $(dtprobed_DIR)dtrace-usdt.target $(INSTSYSTEMDUNITDIR)
+endif
diff --git a/dtprobed/GNUmakefile b/dtprobed/GNUmakefile
new file mode 100644
index 00000000..4a27e08b
--- /dev/null
+++ b/dtprobed/GNUmakefile
@@ -0,0 +1,5 @@
+%:
+	$(MAKE) -C .. $@
+
+all::
+	$(MAKE) -C .. all
diff --git a/dtprobed/dtprobed.c b/dtprobed/dtprobed.c
new file mode 100644
index 00000000..45556b77
--- /dev/null
+++ b/dtprobed/dtprobed.c
@@ -0,0 +1,647 @@
+/*
+ * Oracle Linux DTrace; DOF-consumption and USDT-probe-creation daemon.
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <config.h>
+
+#include <linux/seccomp.h>
+#include <sys/syscall.h>
+
+#define FUSE_USE_VERSION 31
+
+#include <cuse_lowlevel.h>
+#include <fuse_lowlevel.h>
+#ifdef HAVE_FUSE_LOG
+#include <fuse_log.h>
+#else
+#include "rpl_fuse_log.h"
+#endif
+#include <port.h>
+
+#include <dtrace/ioctl.h>
+
+#ifdef HAVE_LIBSYSTEMD
+#include <systemd/sd-daemon.h>
+#endif
+
+#include "dof_parser.h"
+#include "uprobes.h"
+
+#define DOF_MAXSZ 512 * 1024 * 1024
+
+static struct fuse_session *cuse_session;
+
+int _dtrace_debug = 0;
+static int foreground;
+void dt_debug_dump(int unused) {} 		/* For libproc.  */
+
+static pid_t parser_pid;
+static int parser_in_pipe[2];
+static int parser_out_pipe[2];
+static int timeout = 5000; 			/* In seconds.  */
+
+static void helper_ioctl(fuse_req_t req, int cmd, void *arg,
+			 struct fuse_file_info *fi, unsigned int flags,
+			 const void *in_buf, size_t in_bufsz, size_t out_bufsz);
+
+static int process_dof(fuse_req_t req, int out, int in, pid_t pid,
+		       dof_helper_t *dh, const void *in_buf);
+
+static const struct cuse_lowlevel_ops dtprobed_clop = {
+	.ioctl = helper_ioctl,
+};
+
+static void
+log_msg(enum fuse_log_level level, const char *fmt, va_list ap)
+{
+	if (!_dtrace_debug && level > FUSE_LOG_INFO)
+		return;
+
+	if (foreground)
+		vfprintf(stderr, fmt, ap);
+	else
+		vsyslog(level, fmt, ap);
+}
+
+/* For libproc */
+void
+dt_debug_printf(const char *subsys, const char *fmt, va_list ap)
+{
+	if (!_dtrace_debug)
+		return;
+
+	if (foreground) {
+		fprintf(stderr, "%s DEBUG: ", subsys);
+		vfprintf(stderr, fmt, ap);
+	} else {
+		/* Subsystem discarded (it's always 'libproc' anyway).  */
+		vsyslog(LOG_DEBUG, fmt, ap);
+	}
+}
+
+/*
+ * States for the ioctl processing loop, which gets repeatedly called due to the
+ * request/reply nature of unrestricted FUSE ioctls.
+ */
+typedef enum dtprobed_fuse_state {
+	DTP_IOCTL_START = 0,
+	DTP_IOCTL_HDR = 1,
+	DTP_IOCTL_DOFHDR = 2,
+	DTP_IOCTL_DOF = 3
+} dtprobed_fuse_state_t;
+
+/*
+ * State crossing calls to CUSE request functions.
+ */
+typedef struct dtprobed_userdata {
+	dtprobed_fuse_state_t state;
+	dof_helper_t dh;
+	dof_hdr_t dof_hdr;
+} dtprobed_userdata_t;
+
+struct fuse_session *
+setup_helper_device(int argc, char **argv, char *devname, dtprobed_userdata_t *userdata)
+{
+	struct cuse_info ci;
+	struct fuse_session *cs;
+	char *args;
+	int multithreaded;
+
+	memset(&ci, 0, sizeof(struct cuse_info));
+
+	ci.flags = CUSE_UNRESTRICTED_IOCTL;
+	ci.dev_info_argc = 1;
+	if (asprintf(&args,"DEVNAME=%s", devname) < 0) {
+		perror("allocating helper device");
+		exit(2);			/* Allow restarting.  */
+	}
+
+	const char *dev_info_argv[] = { args };
+	ci.dev_info_argv = dev_info_argv;
+
+	cs = cuse_lowlevel_setup(argc, argv, &ci, &dtprobed_clop,
+				 &multithreaded, userdata);
+
+	if (cs == NULL) {
+		perror("allocating helper device");
+		return NULL;
+	}
+
+	if (multithreaded) {
+		fprintf(stderr, "CUSE thinks dtprobed is multithreaded!\n");
+		fprintf(stderr, "This should never happen.\n");
+		errno = EINVAL;
+		return NULL;
+	}
+
+	free(args);
+	return cs;
+}
+
+void
+teardown_device(void)
+{
+	/* This is automatically called on SIGTERM.  */
+	cuse_lowlevel_teardown(cuse_session);
+}
+
+/*
+ * Parse a piece of DOF.  Return 0 iff the pipe has closed and no more parsing
+ * is possible.
+ */
+static int
+parse_dof(int in, int out)
+{
+	int ok;
+	dof_helper_t *dh;
+	dof_hdr_t *dof;
+
+	dh = dof_copyin_helper(in, out, &ok);
+	if (!dh)
+		return ok;
+
+	dof = dof_copyin_dof(in, out, &ok);
+	if (!dof)
+		return ok;
+
+	dof_parse_providers(out, dh, dof);
+
+	return ok;
+}
+
+/*
+ * Kick off the sandboxed DOF parser.  This is run in a seccomp()ed subprocess,
+ * and sends a stream of dof_parsed_t back to this process.
+ */
+static void
+dof_parser_start(int sync_fd)
+{
+	if ((pipe(parser_in_pipe) < 0) ||
+	    (pipe(parser_out_pipe) < 0))
+		daemon_perr(sync_fd, "cannot create DOF parser pipes", errno);
+
+	switch (parser_pid = fork()) {
+	case -1:
+		daemon_perr(sync_fd, "cannot fork DOF parser", errno);
+	case 0: {
+		/*
+		 * Sandboxed parser child.  Close unwanted fds and nail into
+		 * seccomp jail.
+		 */
+		close(fuse_session_fd(cuse_session));
+		close(parser_in_pipe[1]);
+		close(parser_out_pipe[0]);
+		if (!foreground)
+			close(sync_fd);
+
+		/*
+		 * Reporting errors at this point is difficult: we have already
+		 * closed all pipes that we might use to report it.  Just exit 1
+		 * and rely on the admin using strace :(
+		 *
+		 * Don't do any of this if debugging (but still run in a child
+		 * process).
+		 */
+		if (!_dtrace_debug)
+			if (syscall(SYS_seccomp, SECCOMP_SET_MODE_STRICT, 0, NULL) < 0)
+				_exit(1);
+
+		while (parse_dof(parser_in_pipe[0], parser_out_pipe[1]))
+			;
+		_exit(0);
+	}
+	}
+
+	close(parser_in_pipe[0]);
+	close(parser_out_pipe[1]);
+}
+
+/*
+ * Clean up wreckage if the DOF parser dies: optionally restart it.
+ */
+static void
+dof_parser_tidy(int restart)
+{
+	int status = 0;
+
+	if (parser_pid == 0)
+		return;
+
+	kill(parser_pid, SIGKILL);
+	if (errno != ESRCH)
+		while (waitpid(parser_pid, &status, 0) < 0 && errno == EINTR);
+
+	close(parser_in_pipe[1]);
+	close(parser_out_pipe[0]);
+
+	if (restart)
+		dof_parser_start(-1);
+}
+
+static dof_parsed_t *
+dof_read(fuse_req_t req, int in)
+{
+	dof_parsed_t *reply = dof_parser_host_read(in, timeout);
+
+	if (!reply)
+		return NULL;
+
+	/*
+	 * Log errors.
+	 */
+	if (reply->type == PIT_ERR) {
+		errno = reply->err.err_no;
+		fuse_log(FUSE_LOG_WARNING, "%i: dtprobed: DOF parsing error: "
+			 "%s\n", fuse_req_ctx(req)->pid,
+			 reply->err.err);
+		free(reply);
+		reply = NULL;
+	}
+
+	return reply;
+}
+
+/*
+ * Create probes as requested by the dof_parsed_t parsed from the DOF.
+ * The DOF parser has already applied the l_addr offset derived from the client
+ * process's dynamic linker.
+ */
+static void
+create_probe(pid_t pid, const char *prv, dof_parsed_t *probe, dof_parsed_t *tp)
+{
+	const char *mod, *fun, *prb;
+
+	if (tp->tracepoint.is_enabled)
+		return;				/* Not yet implemented.  */
+
+	mod = probe->probe.desc;
+	fun = mod + strlen(mod) + 1;
+	prb = fun + strlen(fun) + 1;
+
+	free(uprobe_create_from_addr(pid, tp->tracepoint.addr,
+		prv, mod, fun, prb));
+}
+
+/*
+ * Core ioctl() helper.  Repeatedly reinvoked after calls to
+ * fuse_reply_ioctl_retry, once per dereference.
+ */
+static void
+helper_ioctl(fuse_req_t req, int cmd, void *arg,
+	     struct fuse_file_info *fi, unsigned int flags,
+	     const void *in_buf, size_t in_bufsz, size_t out_bufsz)
+{
+	dtprobed_userdata_t *userdata = fuse_req_userdata(req);
+	struct iovec in;
+	pid_t pid = fuse_req_ctx(req)->pid;
+	const char *errmsg;
+
+	/*
+	 * We can just ignore FUSE_IOCTL_COMPAT: the 32-bit and 64-bit versions
+	 * of the DOF structures are intentionally identical.
+	 */
+
+	switch (cmd) {
+	case DTRACEHIOC_ADDDOF:
+		break;
+	case DTRACEHIOC_REMOVE: /* TODO */
+		fuse_reply_ioctl(req, 0, NULL, 0);
+		return;
+	default: errmsg = "invalid ioctl";;
+		fuse_log(FUSE_LOG_WARNING, "%i: dtprobed: %s %lx\n",
+			 pid, errmsg, cmd);
+		goto fuse_err;
+	}
+
+	/*
+	 * First call: get the ioctl arg content, a dof_helper_t.
+	 */
+	if (userdata->state == DTP_IOCTL_START) {
+		in.iov_base = arg;
+		in.iov_len = sizeof(dof_helper_t);
+
+		errmsg = "error reading ioctl size";
+		if (fuse_reply_ioctl_retry(req, &in, 1, NULL, 0) < 0)
+			goto fuse_errmsg;
+		userdata->state = DTP_IOCTL_HDR;
+		return;
+	}
+
+	/*
+	 * Second call: validate the dof_hdr_t length, get the initial DOF.
+	 */
+	if (userdata->state == DTP_IOCTL_HDR) {
+		if (in_bufsz != sizeof(dof_helper_t)) {
+			errmsg = "helper size incorrect";
+			fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s: "
+				 "expected at least %zi, not %zi\n", pid,
+				 errmsg, in_bufsz, sizeof(dof_helper_t));
+			goto fuse_err;
+		}
+		memcpy(&userdata->dh, in_buf, sizeof(dof_helper_t));
+
+		in.iov_base = (void *) userdata->dh.dofhp_dof;
+		in.iov_len = sizeof(dof_hdr_t);
+
+		errmsg = "cannot read DOF header";
+		if (fuse_reply_ioctl_retry(req, &in, 1, NULL, 0) < 0)
+			goto fuse_errmsg;
+
+		userdata->state = DTP_IOCTL_DOFHDR;
+		return;
+	}
+
+	/*
+	 * From here on we are always fetching DOF: the inbound buffer must be
+	 * at least as big as the DOF header.
+	 */
+	if (in_bufsz < sizeof(dof_hdr_t)) {
+		errmsg = "DOF too small";
+		fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s: expected at least %zi, "
+			 "not %zi\n", pid, errmsg, sizeof(dof_hdr_t), in_bufsz);
+		goto fuse_err;
+	}
+
+	/*
+	 * Third call: validate the DOF length and get the DOF itself.
+	 */
+	if (userdata->state == DTP_IOCTL_DOFHDR) {
+		/*
+		 * Too much data is as bad as too little.
+		 */
+		if (in_bufsz > sizeof(dof_hdr_t)) {
+			errmsg = "DOF header size incorrect";
+			fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s: %zi, not %zi\n",
+				 pid, errmsg, in_bufsz, sizeof(dof_hdr_t));
+			goto fuse_err;
+		}
+		memcpy(&userdata->dof_hdr, in_buf, sizeof(dof_hdr_t));
+
+		if (userdata->dof_hdr.dofh_loadsz > DOF_MAXSZ)
+			fuse_log(FUSE_LOG_WARNING, "%i: dtprobed: DOF size of %zi longer than is sane\n",
+				 pid, userdata->dof_hdr.dofh_loadsz);
+
+		in.iov_base = (void *) userdata->dh.dofhp_dof;
+		in.iov_len = userdata->dof_hdr.dofh_loadsz;
+
+		errmsg = "cannot read DOF";
+		if (fuse_reply_ioctl_retry(req, &in, 1, NULL, 0) < 0)
+			goto fuse_errmsg;
+		userdata->state = DTP_IOCTL_DOF;
+		return;
+	}
+
+	if (userdata->state != DTP_IOCTL_DOF) {
+		errmsg = "FUSE internal state incorrect";
+		goto fuse_errmsg;
+	}
+
+	/*
+	 * Final call: DOF acquired.  Pass to parser for processing, then reply
+	 * to unblock the ioctl() caller and return to start state.
+	 */
+
+	if (process_dof(req, parser_in_pipe[1], parser_out_pipe[0], pid,
+			&userdata->dh, in_buf) < 0)
+		goto process_err;
+
+	if (fuse_reply_ioctl(req, 0, NULL, 0) < 0)
+		goto process_err;
+
+	userdata->state = DTP_IOCTL_START;
+
+	return;
+
+  fuse_errmsg:
+	fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s\n", pid, errmsg);
+
+  fuse_err:
+	if (fuse_reply_err(req, EINVAL) < 0)
+		fuse_log(FUSE_LOG_ERR, "%i: dtprobed: %s\n", pid,
+			 "cannot send error to ioctl caller: %s",
+			errmsg);
+	userdata->state = DTP_IOCTL_START;
+	return;
+	
+ process_err:
+	if (fuse_reply_err(req, EINVAL) < 0)
+		fuse_log(FUSE_LOG_ERR, "%i: cannot unblock caller\n",
+			 pid);
+	userdata->state = DTP_IOCTL_START;
+	return;
+}
+
+/*
+ * Process some DOF, passing it to the parser and creating probes from it.
+ */
+static int
+process_dof(fuse_req_t req, int out, int in, pid_t pid,
+	    dof_helper_t *dh, const void *in_buf)
+{
+	dof_parsed_t *provider;
+	const char *errmsg, *prv;
+	size_t i;
+
+	errmsg = "DOF parser write failed";
+	while ((errno = dof_parser_host_write(out, dh,
+					      (dof_hdr_t *) in_buf)) == EAGAIN);
+	if (errno != 0)
+		goto err;
+
+	/*
+	 * Wait for parsed reply.
+	 */
+
+	errmsg = "parsed DOF read failed";
+	provider = dof_read(req, parser_out_pipe[0]);
+	if (!provider || provider->type != PIT_PROVIDER)
+		goto err;
+
+	prv = provider->provider.name;
+	for (i = 0; i < provider->provider.nprobes; i++) {
+		dof_parsed_t *probe = dof_read(req, in);
+		size_t j;
+
+		errmsg = "no probes, or parse state corrupt";
+		if (!probe || probe->type != PIT_PROBE)
+			goto err;
+
+		for (j = 0; j < probe->probe.ntp; j++) {
+			dof_parsed_t *tp = dof_read(req, in);
+
+			errmsg = "no tracepoints in a probe, or parse state corrupt";
+			if (!tp || tp->type != PIT_TRACEPOINT)
+				goto err;
+
+			/*
+			 * Ignore errors here: we want to create as many probes
+			 * as we can, even if creation of some of them fails.
+			 */
+			create_probe(pid, prv, probe, tp);
+			free(tp);
+		}
+		free(probe);
+	}
+	free(provider);
+
+	return 0;
+
+err:
+	fuse_log(FUSE_LOG_ERR, "%i: dtprobed: parser error: %s\n", pid, errmsg);
+	kill(parser_pid, SIGKILL);
+	dof_parser_tidy(1);
+	return -1;
+}	
+
+static int
+loop(void)
+{
+	struct fuse_buf fbuf = { .mem = NULL };
+	struct pollfd fds[1];
+	int ret = 0;
+
+	fds[0].fd = fuse_session_fd(cuse_session);
+	fds[0].events = POLLIN;
+
+	while (!fuse_session_exited(cuse_session)) {
+		if ((ret = poll(fds, 1, -1)) < 0)
+			break;
+
+		if (fds[0].revents != 0) {
+			if ((ret = fuse_session_receive_buf(cuse_session,
+							    &fbuf)) <= 0) {
+				if (ret == -EINTR)
+					continue;
+
+				break;
+			}
+
+			fuse_session_process_buf(cuse_session, &fbuf);
+		}
+	}
+
+	free(fbuf.mem);
+	fuse_session_reset(cuse_session);
+	return ret < 0 ? -1 : 0;
+}
+
+static void
+usage(void)
+{
+	fprintf(stderr, "Syntax: dtprobed [-F] [-d] [-n devname] [-t timeout]\n");
+	exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+	int opt;
+	char *devname = "dtrace/helper";
+	int sync_fd = -1;
+	int ret;
+	struct sigaction sa = {0};
+	dtprobed_userdata_t userdata = {0};
+
+	/*
+	 * These are "command-line" arguments to FUSE itself: our args are
+	 * different.  The double-NULL allows us to add an arg.
+	 */
+	char *fuse_argv[] = { argv[0], "-f", "-s", "-o", "allow_other", NULL, NULL };
+	int fuse_argc = 5;
+
+	while ((opt = getopt(argc, argv, "Fdn:t:")) != -1) {
+		switch (opt) {
+		case 'F':
+			foreground = 1;
+			break;
+		case 'n':
+			devname = strdup(optarg);
+			break;
+		case 'd':
+			if (!_dtrace_debug) {
+				_dtrace_debug = 1;
+				fuse_argv[fuse_argc++] = "-d";
+			}
+			break;
+		case 't':
+			timeout = atoi(optarg);
+			if (timeout <= 0) {
+				fprintf(stderr, "Error: timeout must be a "
+					"positive integer, not %s\n", optarg);
+				exit(1);
+			}
+			break;
+		default:
+			usage();
+		}
+	}
+
+	if (optind < argc)
+		usage();
+
+	/*
+	 * Close all fds before doing anything else: we cannot close them during
+	 * daemonization because CUSE opens fds of its own that we want to keep
+	 * around.
+	 */
+	close_range(3, ~0U, 0);
+
+	if ((cuse_session = setup_helper_device(fuse_argc, fuse_argv,
+						devname, &userdata)) == NULL)
+		exit(1);
+
+	if (!foreground) {
+		if ((sync_fd = daemonize(0)) < 0) {
+			teardown_device();
+			exit(2);
+		}
+	}
+
+	/*
+	 * We are now daemonized, if we need to be.  Arrange to log to syslog,
+	 * fire off the jailed parser subprocess, then report successful startup
+	 * down our synchronization pipe (by closing it) and tell systemd (if
+	 * present) that we have started.
+	 */
+	fuse_set_log_func(log_msg);
+
+	/*
+	 * Ignore SIGPIPE to allow for a non-hideous way to detect parser
+	 * process death.
+	 */
+	sa.sa_handler = SIG_IGN;
+	(void) sigaction(SIGPIPE, &sa, NULL);
+
+	dof_parser_start(sync_fd);
+
+	if (!foreground)
+		close(sync_fd);
+
+#ifdef HAVE_LIBSYSTEMD
+	sd_notify(1, "READY=1");
+#endif
+
+	ret = loop();
+
+	dof_parser_tidy(0);
+	teardown_device();
+
+	if (ret == 0)
+		exit(0);
+	else
+		exit(2);			/* Allow restarting.  */
+}
diff --git a/dtprobed/dtprobed.service b/dtprobed/dtprobed.service
new file mode 100644
index 00000000..f5980734
--- /dev/null
+++ b/dtprobed/dtprobed.service
@@ -0,0 +1,18 @@
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+[Unit]
+Description=DTrace USDT probe creation daemon
+Documentation=man:dtprobed(8)
+
+[Service]
+Type=notify
+ExecStart=/usr/sbin/dtprobed -F
+Restart=on-failure
+RestartPreventExitStatus=1
+ProtectSystem=strict
+ProtectHome=true
+PrivateDevices=false
+PrivateNetwork=true
+ProtectControlGroups=true
+RestrictSUIDSGID=true
diff --git a/dtprobed/dtrace-usdt.target b/dtprobed/dtrace-usdt.target
new file mode 100644
index 00000000..f9d40e22
--- /dev/null
+++ b/dtprobed/dtrace-usdt.target
@@ -0,0 +1,8 @@
+[Unit]
+Description=DTrace USDT operating normally
+Documentation=man:dtprobed(8)
+Requires=dtprobed.service
+BindsTo=dtprobed.service
+After=dtprobed.service
+RefuseManualStart=true
+RefuseManualStop=true
diff --git a/dtprobed/rpl_fuse_log.c b/dtprobed/rpl_fuse_log.c
new file mode 100644
index 00000000..801b1fb8
--- /dev/null
+++ b/dtprobed/rpl_fuse_log.c
@@ -0,0 +1,33 @@
+/*
+ * Oracle Linux DTrace; FUSE logging reimplementation.
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+#include <sys/compiler.h>
+#include "rpl_fuse_log.h"
+#include <stdarg.h>
+#include <stdio.h>
+
+static void default_log_func(enum fuse_log_level level _dt_unused_,
+			     const char *fmt, va_list ap)
+{
+	vfprintf(stderr, fmt, ap);
+}
+
+static rpl_log_func_t log_func = default_log_func;
+
+void fuse_set_log_func(rpl_log_func_t func)
+{
+	log_func = func;
+}
+
+void fuse_log(enum fuse_log_level level, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	log_func(level, fmt, ap);
+	va_end(ap);
+}
diff --git a/dtprobed/rpl_fuse_log.h b/dtprobed/rpl_fuse_log.h
new file mode 100644
index 00000000..5baf65a2
--- /dev/null
+++ b/dtprobed/rpl_fuse_log.h
@@ -0,0 +1,43 @@
+/*
+ * Oracle Linux DTrace; FUSE logging reimplementation.
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ */
+
+#ifndef	_RPL_FUSE_LOG_H
+#define	_RPL_FUSE_LOG_H
+
+#include <stdarg.h>
+
+/*
+ * Reimplementation of fuse_log API in FUSE 3.7.0+.  Not used when FUSE is
+ * sufficiently new.
+ *
+ * We want to use this API if available so that the daemon will log
+ * FUSE-level errors to syslog when not running under systemd.  When
+ * using older FUSE, this combination will throw away such errors,
+ * but that's no excuse for throwing away our own errors too.
+ */
+
+enum fuse_log_level
+{
+	FUSE_LOG_EMERG,
+	FUSE_LOG_ALERT,
+	FUSE_LOG_CRIT,
+	FUSE_LOG_ERR,
+	FUSE_LOG_WARNING,
+	FUSE_LOG_NOTICE,
+	FUSE_LOG_INFO,
+	FUSE_LOG_DEBUG
+};
+
+typedef void (*rpl_log_func_t)(enum fuse_log_level level, const char *fmt,
+			       va_list ap);
+
+void fuse_set_log_func(rpl_log_func_t func);
+
+void fuse_log(enum fuse_log_level level, const char *fmt, ...);
+
+#endif
+
diff --git a/dtrace.spec b/dtrace.spec
index 8bb24688..11cb669a 100644
--- a/dtrace.spec
+++ b/dtrace.spec
@@ -1,7 +1,7 @@
 # spec file for package dtrace
 #
 # Oracle Linux DTrace.
-# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 
@@ -55,8 +55,8 @@ BuildRequires: rpm
 Name:         dtrace
 License:      Universal Permissive License (UPL), Version 1.0
 Group:        Development/Tools
-Requires:     cpp elfutils-libelf zlib libpcap
-BuildRequires: glibc-headers bison flex zlib-devel elfutils-libelf-devel
+Requires:     cpp elfutils-libelf zlib libpcap fuse3 >= 3.2.0
+BuildRequires: glibc-headers bison flex zlib-devel elfutils-libelf-devel fuse3-devel >= 3.2.0 systemd-devel
 BuildRequires: glibc-static %{glibc32} wireshark libpcap-devel valgrind-devel
 BuildRequires: kernel%{variant}-devel = %{build_kernel}
 %if "%{?dist}" == ".el8"
diff --git a/runtest.sh b/runtest.sh
index 6030bd58..84d04ffb 100755
--- a/runtest.sh
+++ b/runtest.sh
@@ -531,11 +531,17 @@ if [[ -z $USE_INSTALLED ]]; then
     test_libdir="$(pwd)/build/dlibs"
     test_ldflags="-L$(pwd)/build"
     test_incflags="-Iinclude -Iuts/common -Ibuild -Ilibdtrace -DARCH_$arch"
+    helper_device="dtrace/test-$$"
+    dtprobed_flags="-n $helper_device -F"
+    export DTRACE_DOF_INIT_DEVNAME="/dev/$helper_device"
 
     if [[ -z $(eval echo $dtrace) ]]; then
     	echo "No dtraces available." >&2
     	exit 1
     fi
+    build/dtprobed $dtprobed_flags &
+    dtprobed_pid=$!
+    ZAPTHESE+=($dtprobed_pid)
 else
     dtrace="/usr/sbin/dtrace"
     test_libdir="installed"
-- 
2.31.1




More information about the DTrace-devel mailing list