[DTrace-devel] [PATCH v2 02/23] lexer, parser: remove manual implementation of parsing from a string

Kris Van Hees kris.van.hees at oracle.com
Wed Dec 6 19:36:48 UTC 2023


On Mon, Nov 27, 2023 at 04:47:08PM +0000, Nick Alcock wrote:
> dt_lex.l has had code (in one form or another) to parse input from a
> string since the beginning.  I rewrote it in 2013 to use a dt_input
> function rather than overriding YY_INPUT or simply overriding input()
> (which flex has never allowed), but the code was still there.
> 
> This is ridiculous.  flex has had code to parse from strings since 1994,
> so long ago that it predates any extant flex tarballs that I've been
> able to find, and long before DTrace's lexer was written (but of course
> it was written for an AT&T lex which had hardly been touched since the
> 1980s).  It's a bit different from what DTrace is doing now -- you have
> to call yy_scan_string(), which gives you back a yy_buffer_state handle
> that you free with yy_delete_buffer() -- but it works, and it means we
> can rip out half the code in dt_input, leaving only the code to do
> buffered reads from actual files.
> 
> Signed-off-by: Nick Alcock <nick.alcock at oracle.com>

Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

> ---
>  libdtrace/dt_cc.c     | 16 +++++++++++-----
>  libdtrace/dt_lex.l    | 16 +++++-----------
>  libdtrace/dt_parser.h |  5 ++++-
>  libdtrace/dt_pcb.h    |  2 --
>  4 files changed, 20 insertions(+), 19 deletions(-)
> 
> diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
> index a42109f14f02..4c00c4b34311 100644
> --- a/libdtrace/dt_cc.c
> +++ b/libdtrace/dt_cc.c
> @@ -609,6 +609,7 @@ dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg,
>  	dt_node_t *dnp;
>  	dt_decl_t *ddp;
>  	dt_pcb_t pcb;
> +	struct yy_buffer_state *strbuf;
>  	void *rv = NULL;
>  	int err;
>  
> @@ -633,8 +634,6 @@ dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg,
>  
>  	pcb.pcb_fileptr = fp;
>  	pcb.pcb_string = s;
> -	pcb.pcb_strptr = s;
> -	pcb.pcb_strlen = s ? strlen(s) : 0;
>  	pcb.pcb_sargc = argc;
>  	pcb.pcb_sargv = argv;
>  	pcb.pcb_sflagv = argc ? calloc(argc, sizeof(ushort_t)) : NULL;
> @@ -670,11 +669,20 @@ dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg,
>  	 * will longjmp back to pcb_jmpbuf to abort.  If parsing succeeds,
>  	 * we optionally display the parse tree if debugging is enabled.
>  	 */
> -	if (yyparse() != 0 || yypcb->pcb_root == NULL)
> +	if (yypcb->pcb_string)
> +		strbuf = yy_scan_string(yypcb->pcb_string);
> +	if (yyparse() != 0 || yypcb->pcb_root == NULL) {
> +		if (yypcb->pcb_string)
> +			yy_delete_buffer(strbuf);
> +
>  		xyerror(D_EMPTY, "empty D program translation unit\n");
> +	}
>  
>  	yybegin(YYS_DONE);
>  
> +	if (yypcb->pcb_string)
> +		yy_delete_buffer(strbuf);
> +
>  	if (cflags & DTRACE_C_CTL)
>  		goto out;
>  
> @@ -801,8 +809,6 @@ dt_construct(dtrace_hdl_t *dtp, dt_probe_t *prp, uint_t cflags, dt_ident_t *idp)
>  
>  	pcb.pcb_fileptr = NULL;
>  	pcb.pcb_string = NULL;
> -	pcb.pcb_strptr = NULL;
> -	pcb.pcb_strlen = 0;
>  	pcb.pcb_sargc = 0;
>  	pcb.pcb_sargv = NULL;
>  	pcb.pcb_sflagv = NULL;
> diff --git a/libdtrace/dt_lex.l b/libdtrace/dt_lex.l
> index 2f80035ac073..e132035aaa15 100644
> --- a/libdtrace/dt_lex.l
> +++ b/libdtrace/dt_lex.l
> @@ -912,23 +912,17 @@ id_or_type(const char *s)
>  }
>  
>  /*
> - * Do I/O efficiently, including from the pcb_string.
> + * Do I/O efficiently; handle errors properly.
>   */
>  static size_t
>  dt_input(char *buf, size_t max_size)
>  {
>          size_t result;
>  
> -	if (yypcb->pcb_fileptr != NULL) {
> -		result = fread(buf, 1, max_size, yypcb->pcb_fileptr);
> -		if (result == 0 && ferror(yypcb->pcb_fileptr))
> -			longjmp(yypcb->pcb_jmpbuf, EDT_FIO);
> -        } else {
> -                size_t len_left = (yypcb->pcb_strlen - (yypcb->pcb_strptr - yypcb->pcb_string));
> -                result = len_left > max_size ? max_size : len_left;
> -                memcpy(buf, yypcb->pcb_strptr, result);
> -                yypcb->pcb_strptr += result;
> -        }
> +	assert(yypcb->pcb_string == NULL && yypcb->pcb_fileptr != NULL);
> +	result = fread(buf, 1, max_size, yypcb->pcb_fileptr);
> +	if (result == 0 && ferror(yypcb->pcb_fileptr))
> +		longjmp(yypcb->pcb_jmpbuf, EDT_FIO);
>  
>          return result;
>  }
> diff --git a/libdtrace/dt_parser.h b/libdtrace/dt_parser.h
> index b4e2f8aba604..9143b30c0301 100644
> --- a/libdtrace/dt_parser.h
> +++ b/libdtrace/dt_parser.h
> @@ -277,8 +277,11 @@ extern void yylabel(const char *);
>  extern void yybegin(yystate_t);
>  extern void yyinit(struct dt_pcb *);
>  
> +struct yy_buffer_state;
> +
> +struct yy_buffer_state *yy_scan_string(const char *);
> +void yy_delete_buffer(struct yy_buffer_state *);
>  extern int yyparse(void);
> -extern int yyinput(void);
>  
>  #ifdef	__cplusplus
>  }
> diff --git a/libdtrace/dt_pcb.h b/libdtrace/dt_pcb.h
> index 017fc654a80d..f53706e2ebd9 100644
> --- a/libdtrace/dt_pcb.h
> +++ b/libdtrace/dt_pcb.h
> @@ -27,8 +27,6 @@ typedef struct dt_pcb {
>  	FILE *pcb_fileptr;	/* pointer to input file (or NULL) */
>  	char *pcb_filetag;	/* optional file name string (or NULL) */
>  	const char *pcb_string;	/* pointer to input string (or NULL) */
> -	const char *pcb_strptr;	/* pointer to input position */
> -	size_t pcb_strlen;	/* length of pcb_string */
>  	int pcb_sargc;		/* number of script arguments (if any) */
>  	char *const *pcb_sargv;	/* script argument strings (if any) */
>  	ushort_t *pcb_sflagv;	/* script argument flags (DT_IDFLG_* bits) */
> -- 
> 2.42.0.271.g85384428f1
> 
> 



More information about the DTrace-devel mailing list