[DTrace-devel] [PATCH 6/7] consume: consume only one gulp of buffer data

Nick Alcock nick.alcock at oracle.com
Wed Mar 20 14:15:36 UTC 2024


The per-CPU consume loop in dt_consume_cpu() works over the ring buffer and
sucks all its contents in: but for very high-volume tracing runs (like
test/unittest/proc/tst.slow-self-grab.sh) with records that are expensive to
process (like ustack()), new data can easily flow in fast enough that it
never manages to drain the buffer.

This means it never returns to its caller, which has all sorts of unpleasant
consequences from starving other CPUs through problems with BEGIN/END
processing through the simpler problem that it never returns to its caller,
so dtrace_work() never terminates, so any causes of termination other than a
exit() are never processed (cmd/dtrace.c can also terminate on SIGINT,
dtrace -c processes exiting, etc).

The fix is trivial: just don't advance the tail pointer until the whole
buffer is consumed.  It might take a while (the user can reduce the bufsize
to help with that if needed), but it will make forward progress, eventually,
getting through every CPU and then returning to its caller.

Add a couple of new tests to make sure that BEGIN is still processed first
even if the buffer is flooded with noise from other probes, and to verify
that dtrace -c-induced process termination and self-grabs both work and
don't disrupt things even when under high consume load (in this case, due to
I/O from DTRACE_DEBUG= output, which still triggers the relevant syscall
probe even though the shell promptly throws it all at /dev/null).

Thanks-to: Kris Van Hees <kris.van.hees at oracle.com>
Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
---
 libdtrace/dt_consume.c                        | 88 +++++++++----------
 test/unittest/begin/tst.begin-starvation.sh   | 33 +++++++
 test/unittest/proc/tst.self-grab.sh           |  3 +-
 ...tst.self-grab.sh => tst.slow-self-grab.sh} |  6 +-
 4 files changed, 80 insertions(+), 50 deletions(-)
 create mode 100755 test/unittest/begin/tst.begin-starvation.sh
 copy test/unittest/proc/{tst.self-grab.sh => tst.slow-self-grab.sh} (69%)

diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index dec2314b5ad9a..e9b33cfc57cb5 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -2748,63 +2748,57 @@ dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, dt_peb_t *peb,
 	 */
 	base = peb->base + pebset->page_size;
 
-	do {
-		if (peekflags == CONSUME_PEEK || peekflags == CONSUME_PEEK_FINISH)
-			head = peb->last_head;
-		else {
-			head = ring_buffer_read_head(rb_page);
-			peb->last_head = head;
-		}
-		tail = rb_page->data_tail;
+	if (peekflags == CONSUME_PEEK || peekflags == CONSUME_PEEK_FINISH)
+		head = peb->last_head;
+	else {
+		head = ring_buffer_read_head(rb_page);
+		peb->last_head = head;
+	}
+	tail = rb_page->data_tail;
 
-		if (head == tail)
-			break;
+	while (tail != head) {
+		dtrace_workstatus_t rval = DTRACE_WORKSTATUS_OKAY;
 
-		do {
-			dtrace_workstatus_t rval = DTRACE_WORKSTATUS_OKAY;
+		event = base + tail % data_size;
+		hdr = (struct perf_event_header *)event;
+		len = hdr->size;
 
-			event = base + tail % data_size;
-			hdr = (struct perf_event_header *)event;
-			len = hdr->size;
+		/*
+		 * If the perf event data wraps around the boundary of
+		 * the buffer, we make a copy in contiguous memory.
+		 */
+                if (event + len > peb->endp) {
+                  char *dst;
+                  uint32_t num;
 
-			/*
-			 * If the perf event data wraps around the boundary of
-			 * the buffer, we make a copy in contiguous memory.
-			 */
-			if (event + len > peb->endp) {
-				char		*dst;
-				uint32_t	num;
+                  /* Increase the buffer as needed. */
+                  if (pebset->tmp_len < len) {
+                    pebset->tmp = realloc(pebset->tmp, len);
+                    pebset->tmp_len = len;
+                  }
 
-				/* Increase the buffer as needed. */
-				if (pebset->tmp_len < len) {
-					pebset->tmp = realloc(pebset->tmp, len);
-					pebset->tmp_len = len;
-				}
+                  dst = pebset->tmp;
+                  num = peb->endp - event + 1;
+                  memcpy(dst, event, num);
+                  memcpy(dst + num, base, len - num);
 
-				dst = pebset->tmp;
-				num = peb->endp - event + 1;
-				memcpy(dst, event, num);
-				memcpy(dst + num, base, len - num);
+                  event = dst;
+                }
 
-				event = dst;
-			}
+                rval = dt_consume_one(dtp, fp, event, &pdat, efunc, rfunc, flow,
+                                      quiet, peekflags, &last, arg);
+                if (rval == DTRACE_WORKSTATUS_DONE)
+                  return DTRACE_WORKSTATUS_OKAY;
+                if (rval != DTRACE_WORKSTATUS_OKAY)
+                  return rval;
 
-			rval = dt_consume_one(dtp, fp, event, &pdat, efunc,
-					      rfunc, flow, quiet, peekflags,
-					      &last, arg);
-			if (rval == DTRACE_WORKSTATUS_DONE)
-				return DTRACE_WORKSTATUS_OKAY;
-			if (rval != DTRACE_WORKSTATUS_OKAY)
-				return rval;
+                tail += hdr->size;
+	}
 
-			tail += hdr->size;
-		} while (tail != head);
+	if (peekflags == 0 || peekflags == CONSUME_PEEK_FINISH)
+		ring_buffer_write_tail(rb_page, tail);
 
-		if (peekflags == 0 || peekflags == CONSUME_PEEK_FINISH)
-			ring_buffer_write_tail(rb_page, tail);
-	} while (peekflags == 0);
-
-	return DTRACE_WORKSTATUS_OKAY;
+        return DTRACE_WORKSTATUS_OKAY;
 }
 
 typedef struct dt_begin {
diff --git a/test/unittest/begin/tst.begin-starvation.sh b/test/unittest/begin/tst.begin-starvation.sh
new file mode 100755
index 0000000000000..f8e473bcddd74
--- /dev/null
+++ b/test/unittest/begin/tst.begin-starvation.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+# @@ xfail: BEGIN not yet forced to be processed first
+#
+
+#
+# This script tests that BEGIN in conjunction with a high-volume probe
+# processes (at least one) BEGIN first.
+#
+if [ $# != 1 ]; then
+	echo expected one argument: '<'dtrace-path'>'
+	exit 2
+fi
+
+dtrace=$1
+# Use DTRACE_DEBUG to make dtrace itself do a lot of write output and
+# slow it down a lot.
+DTRACE_DEBUG=t exec $dtrace $dt_flags -s /dev/stdin >/dev/null 2>&1 <<EOF
+#pragma D option bufsize=32k
+syscall::write:entry
+{
+	exit(1);
+}
+BEGIN
+{
+	exit(0);
+}
+EOF
diff --git a/test/unittest/proc/tst.self-grab.sh b/test/unittest/proc/tst.self-grab.sh
index 5aab30c6bfbd0..c56501fb3f529 100755
--- a/test/unittest/proc/tst.self-grab.sh
+++ b/test/unittest/proc/tst.self-grab.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Oracle Linux DTrace.
-# Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 #
@@ -18,6 +18,7 @@ fi
 
 dtrace=$1
 exec $dtrace $dt_flags -c 'sleep 2' -s /dev/stdin >/dev/null <<EOF
+#pragma D option bufsize=32k
 syscall::write:entry
 {
 	ustack(1);
diff --git a/test/unittest/proc/tst.self-grab.sh b/test/unittest/proc/tst.slow-self-grab.sh
similarity index 69%
copy from test/unittest/proc/tst.self-grab.sh
copy to test/unittest/proc/tst.slow-self-grab.sh
index 5aab30c6bfbd0..0a6aafb6b6f91 100755
--- a/test/unittest/proc/tst.self-grab.sh
+++ b/test/unittest/proc/tst.slow-self-grab.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Oracle Linux DTrace.
-# Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 #
@@ -9,7 +9,8 @@
 #
 # This script tests that self-grabs work and do not disturb the death-counter
 # that causes dtrace -c to exit when its children die (nor fail in any other
-# way).
+# way), even when DTrace is artificially forced to process huge amounts of
+# I/O coming from itself (thus grabbing itself a great many times).
 #
 if [ $# != 1 ]; then
 	echo expected one argument: '<'dtrace-path'>'
@@ -18,6 +19,7 @@ fi
 
 dtrace=$1
 exec $dtrace $dt_flags -c 'sleep 2' -s /dev/stdin >/dev/null <<EOF
+#pragma D option bufsize=32k
 syscall::write:entry
 {
 	ustack(1);
-- 
2.44.0.273.ge0bd14271f




More information about the DTrace-devel mailing list