[DTrace-devel] [PATCH v2 02/23] lexer, parser: remove manual implementation of parsing from a string

Nick Alcock nick.alcock at oracle.com
Mon Nov 27 16:47:08 UTC 2023


dt_lex.l has had code (in one form or another) to parse input from a
string since the beginning.  I rewrote it in 2013 to use a dt_input
function rather than overriding YY_INPUT or simply overriding input()
(which flex has never allowed), but the code was still there.

This is ridiculous.  flex has had code to parse from strings since 1994,
so long ago that it predates any extant flex tarballs that I've been
able to find, and long before DTrace's lexer was written (but of course
it was written for an AT&T lex which had hardly been touched since the
1980s).  It's a bit different from what DTrace is doing now -- you have
to call yy_scan_string(), which gives you back a yy_buffer_state handle
that you free with yy_delete_buffer() -- but it works, and it means we
can rip out half the code in dt_input, leaving only the code to do
buffered reads from actual files.

Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
---
 libdtrace/dt_cc.c     | 16 +++++++++++-----
 libdtrace/dt_lex.l    | 16 +++++-----------
 libdtrace/dt_parser.h |  5 ++++-
 libdtrace/dt_pcb.h    |  2 --
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
index a42109f14f02..4c00c4b34311 100644
--- a/libdtrace/dt_cc.c
+++ b/libdtrace/dt_cc.c
@@ -609,6 +609,7 @@ dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg,
 	dt_node_t *dnp;
 	dt_decl_t *ddp;
 	dt_pcb_t pcb;
+	struct yy_buffer_state *strbuf;
 	void *rv = NULL;
 	int err;
 
@@ -633,8 +634,6 @@ dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg,
 
 	pcb.pcb_fileptr = fp;
 	pcb.pcb_string = s;
-	pcb.pcb_strptr = s;
-	pcb.pcb_strlen = s ? strlen(s) : 0;
 	pcb.pcb_sargc = argc;
 	pcb.pcb_sargv = argv;
 	pcb.pcb_sflagv = argc ? calloc(argc, sizeof(ushort_t)) : NULL;
@@ -670,11 +669,20 @@ dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg,
 	 * will longjmp back to pcb_jmpbuf to abort.  If parsing succeeds,
 	 * we optionally display the parse tree if debugging is enabled.
 	 */
-	if (yyparse() != 0 || yypcb->pcb_root == NULL)
+	if (yypcb->pcb_string)
+		strbuf = yy_scan_string(yypcb->pcb_string);
+	if (yyparse() != 0 || yypcb->pcb_root == NULL) {
+		if (yypcb->pcb_string)
+			yy_delete_buffer(strbuf);
+
 		xyerror(D_EMPTY, "empty D program translation unit\n");
+	}
 
 	yybegin(YYS_DONE);
 
+	if (yypcb->pcb_string)
+		yy_delete_buffer(strbuf);
+
 	if (cflags & DTRACE_C_CTL)
 		goto out;
 
@@ -801,8 +809,6 @@ dt_construct(dtrace_hdl_t *dtp, dt_probe_t *prp, uint_t cflags, dt_ident_t *idp)
 
 	pcb.pcb_fileptr = NULL;
 	pcb.pcb_string = NULL;
-	pcb.pcb_strptr = NULL;
-	pcb.pcb_strlen = 0;
 	pcb.pcb_sargc = 0;
 	pcb.pcb_sargv = NULL;
 	pcb.pcb_sflagv = NULL;
diff --git a/libdtrace/dt_lex.l b/libdtrace/dt_lex.l
index 2f80035ac073..e132035aaa15 100644
--- a/libdtrace/dt_lex.l
+++ b/libdtrace/dt_lex.l
@@ -912,23 +912,17 @@ id_or_type(const char *s)
 }
 
 /*
- * Do I/O efficiently, including from the pcb_string.
+ * Do I/O efficiently; handle errors properly.
  */
 static size_t
 dt_input(char *buf, size_t max_size)
 {
         size_t result;
 
-	if (yypcb->pcb_fileptr != NULL) {
-		result = fread(buf, 1, max_size, yypcb->pcb_fileptr);
-		if (result == 0 && ferror(yypcb->pcb_fileptr))
-			longjmp(yypcb->pcb_jmpbuf, EDT_FIO);
-        } else {
-                size_t len_left = (yypcb->pcb_strlen - (yypcb->pcb_strptr - yypcb->pcb_string));
-                result = len_left > max_size ? max_size : len_left;
-                memcpy(buf, yypcb->pcb_strptr, result);
-                yypcb->pcb_strptr += result;
-        }
+	assert(yypcb->pcb_string == NULL && yypcb->pcb_fileptr != NULL);
+	result = fread(buf, 1, max_size, yypcb->pcb_fileptr);
+	if (result == 0 && ferror(yypcb->pcb_fileptr))
+		longjmp(yypcb->pcb_jmpbuf, EDT_FIO);
 
         return result;
 }
diff --git a/libdtrace/dt_parser.h b/libdtrace/dt_parser.h
index b4e2f8aba604..9143b30c0301 100644
--- a/libdtrace/dt_parser.h
+++ b/libdtrace/dt_parser.h
@@ -277,8 +277,11 @@ extern void yylabel(const char *);
 extern void yybegin(yystate_t);
 extern void yyinit(struct dt_pcb *);
 
+struct yy_buffer_state;
+
+struct yy_buffer_state *yy_scan_string(const char *);
+void yy_delete_buffer(struct yy_buffer_state *);
 extern int yyparse(void);
-extern int yyinput(void);
 
 #ifdef	__cplusplus
 }
diff --git a/libdtrace/dt_pcb.h b/libdtrace/dt_pcb.h
index 017fc654a80d..f53706e2ebd9 100644
--- a/libdtrace/dt_pcb.h
+++ b/libdtrace/dt_pcb.h
@@ -27,8 +27,6 @@ typedef struct dt_pcb {
 	FILE *pcb_fileptr;	/* pointer to input file (or NULL) */
 	char *pcb_filetag;	/* optional file name string (or NULL) */
 	const char *pcb_string;	/* pointer to input string (or NULL) */
-	const char *pcb_strptr;	/* pointer to input position */
-	size_t pcb_strlen;	/* length of pcb_string */
 	int pcb_sargc;		/* number of script arguments (if any) */
 	char *const *pcb_sargv;	/* script argument strings (if any) */
 	ushort_t *pcb_sflagv;	/* script argument flags (DT_IDFLG_* bits) */
-- 
2.42.0.271.g85384428f1




More information about the DTrace-devel mailing list