[DTrace-devel] [PATCH v2 02/23] lexer, parser: remove manual implementation of parsing from a string
Kris Van Hees
kris.van.hees at oracle.com
Wed Dec 6 19:36:48 UTC 2023
On Mon, Nov 27, 2023 at 04:47:08PM +0000, Nick Alcock wrote:
> dt_lex.l has had code (in one form or another) to parse input from a
> string since the beginning. I rewrote it in 2013 to use a dt_input
> function rather than overriding YY_INPUT or simply overriding input()
> (which flex has never allowed), but the code was still there.
>
> This is ridiculous. flex has had code to parse from strings since 1994,
> so long ago that it predates any extant flex tarballs that I've been
> able to find, and long before DTrace's lexer was written (but of course
> it was written for an AT&T lex which had hardly been touched since the
> 1980s). It's a bit different from what DTrace is doing now -- you have
> to call yy_scan_string(), which gives you back a yy_buffer_state handle
> that you free with yy_delete_buffer() -- but it works, and it means we
> can rip out half the code in dt_input, leaving only the code to do
> buffered reads from actual files.
>
> Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> libdtrace/dt_cc.c | 16 +++++++++++-----
> libdtrace/dt_lex.l | 16 +++++-----------
> libdtrace/dt_parser.h | 5 ++++-
> libdtrace/dt_pcb.h | 2 --
> 4 files changed, 20 insertions(+), 19 deletions(-)
>
> diff --git a/libdtrace/dt_cc.c b/libdtrace/dt_cc.c
> index a42109f14f02..4c00c4b34311 100644
> --- a/libdtrace/dt_cc.c
> +++ b/libdtrace/dt_cc.c
> @@ -609,6 +609,7 @@ dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg,
> dt_node_t *dnp;
> dt_decl_t *ddp;
> dt_pcb_t pcb;
> + struct yy_buffer_state *strbuf;
> void *rv = NULL;
> int err;
>
> @@ -633,8 +634,6 @@ dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg,
>
> pcb.pcb_fileptr = fp;
> pcb.pcb_string = s;
> - pcb.pcb_strptr = s;
> - pcb.pcb_strlen = s ? strlen(s) : 0;
> pcb.pcb_sargc = argc;
> pcb.pcb_sargv = argv;
> pcb.pcb_sflagv = argc ? calloc(argc, sizeof(ushort_t)) : NULL;
> @@ -670,11 +669,20 @@ dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg,
> * will longjmp back to pcb_jmpbuf to abort. If parsing succeeds,
> * we optionally display the parse tree if debugging is enabled.
> */
> - if (yyparse() != 0 || yypcb->pcb_root == NULL)
> + if (yypcb->pcb_string)
> + strbuf = yy_scan_string(yypcb->pcb_string);
> + if (yyparse() != 0 || yypcb->pcb_root == NULL) {
> + if (yypcb->pcb_string)
> + yy_delete_buffer(strbuf);
> +
> xyerror(D_EMPTY, "empty D program translation unit\n");
> + }
>
> yybegin(YYS_DONE);
>
> + if (yypcb->pcb_string)
> + yy_delete_buffer(strbuf);
> +
> if (cflags & DTRACE_C_CTL)
> goto out;
>
> @@ -801,8 +809,6 @@ dt_construct(dtrace_hdl_t *dtp, dt_probe_t *prp, uint_t cflags, dt_ident_t *idp)
>
> pcb.pcb_fileptr = NULL;
> pcb.pcb_string = NULL;
> - pcb.pcb_strptr = NULL;
> - pcb.pcb_strlen = 0;
> pcb.pcb_sargc = 0;
> pcb.pcb_sargv = NULL;
> pcb.pcb_sflagv = NULL;
> diff --git a/libdtrace/dt_lex.l b/libdtrace/dt_lex.l
> index 2f80035ac073..e132035aaa15 100644
> --- a/libdtrace/dt_lex.l
> +++ b/libdtrace/dt_lex.l
> @@ -912,23 +912,17 @@ id_or_type(const char *s)
> }
>
> /*
> - * Do I/O efficiently, including from the pcb_string.
> + * Do I/O efficiently; handle errors properly.
> */
> static size_t
> dt_input(char *buf, size_t max_size)
> {
> size_t result;
>
> - if (yypcb->pcb_fileptr != NULL) {
> - result = fread(buf, 1, max_size, yypcb->pcb_fileptr);
> - if (result == 0 && ferror(yypcb->pcb_fileptr))
> - longjmp(yypcb->pcb_jmpbuf, EDT_FIO);
> - } else {
> - size_t len_left = (yypcb->pcb_strlen - (yypcb->pcb_strptr - yypcb->pcb_string));
> - result = len_left > max_size ? max_size : len_left;
> - memcpy(buf, yypcb->pcb_strptr, result);
> - yypcb->pcb_strptr += result;
> - }
> + assert(yypcb->pcb_string == NULL && yypcb->pcb_fileptr != NULL);
> + result = fread(buf, 1, max_size, yypcb->pcb_fileptr);
> + if (result == 0 && ferror(yypcb->pcb_fileptr))
> + longjmp(yypcb->pcb_jmpbuf, EDT_FIO);
>
> return result;
> }
> diff --git a/libdtrace/dt_parser.h b/libdtrace/dt_parser.h
> index b4e2f8aba604..9143b30c0301 100644
> --- a/libdtrace/dt_parser.h
> +++ b/libdtrace/dt_parser.h
> @@ -277,8 +277,11 @@ extern void yylabel(const char *);
> extern void yybegin(yystate_t);
> extern void yyinit(struct dt_pcb *);
>
> +struct yy_buffer_state;
> +
> +struct yy_buffer_state *yy_scan_string(const char *);
> +void yy_delete_buffer(struct yy_buffer_state *);
> extern int yyparse(void);
> -extern int yyinput(void);
>
> #ifdef __cplusplus
> }
> diff --git a/libdtrace/dt_pcb.h b/libdtrace/dt_pcb.h
> index 017fc654a80d..f53706e2ebd9 100644
> --- a/libdtrace/dt_pcb.h
> +++ b/libdtrace/dt_pcb.h
> @@ -27,8 +27,6 @@ typedef struct dt_pcb {
> FILE *pcb_fileptr; /* pointer to input file (or NULL) */
> char *pcb_filetag; /* optional file name string (or NULL) */
> const char *pcb_string; /* pointer to input string (or NULL) */
> - const char *pcb_strptr; /* pointer to input position */
> - size_t pcb_strlen; /* length of pcb_string */
> int pcb_sargc; /* number of script arguments (if any) */
> char *const *pcb_sargv; /* script argument strings (if any) */
> ushort_t *pcb_sflagv; /* script argument flags (DT_IDFLG_* bits) */
> --
> 2.42.0.271.g85384428f1
>
>
More information about the DTrace-devel
mailing list