[DTrace-devel] [PATCH 2/4] lexer: the things inside an enum { ... } declaration are identifiers
Kris Van Hees
kris.van.hees at oracle.com
Fri Oct 3 00:00:38 UTC 2025
On Thu, Sep 18, 2025 at 07:03:36PM +0100, Nick Alcock via DTrace-devel wrote:
> The code in dt_lex.c:id_or_type() tries to figure out if something is an
> identifier or not: if it cannot prove it is an identifier, it concludes
> it must be a type name, which later triggers a search for said type (or
> identifier) which eventually reaches (expensively) across the entire
> kernel CTF. Usually this is what we want, since we use type names that
> actually reside in the kernel extensively and do not expect to have to
> decorate all of them with `. But if this misfires bad things can happen.
>
> In the case of enums, existing code in dt_decl.c checks for duplicate
> identifiers, and carefully avoids considering code outside the C and D
> dicts to be duplicates: but if id_or_type() concludes this enumerator is
> probably a type name, we'll import the thing we find even if it's an
> identifier, and then conflict. Enumerators cannot be type names, so this
> must always be wrong (if we actually do put a type name in there,
> dt_parser.c will correctly reject it no matter what the lexer says).
>
> So add yet another piece of parser context identifying when we are
> inside the { } in an enum (we set it to 1 when the enum is seen, then
> bump it when the braces are seen, so if it's 2 we are in the relevant
> context; it is reset to 0 on every ;), then use that to forcibly declare
> everything seen inside enums an identifier without trying to chase it
> down
>
> Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> libdtrace/dt_lex.l | 17 +++++++++++++++--
> libdtrace/dt_pcb.h | 1 +
> 2 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/libdtrace/dt_lex.l b/libdtrace/dt_lex.l
> index 9f12f5c7ca289..fd70aa0aa5803 100644
> --- a/libdtrace/dt_lex.l
> +++ b/libdtrace/dt_lex.l
> @@ -88,7 +88,7 @@ if (yypcb->pcb_token != 0) {
> <S0>do return DT_KEY_DO;
> <S0>double return DT_KEY_DOUBLE;
> <S0>else return DT_KEY_ELSE;
> -<S0>enum { yypcb->pcb_sou_type = 1; return DT_KEY_ENUM; }
> +<S0>enum { yypcb->pcb_sou_type = 1; yypcb->pcb_enum_decl = 1; return DT_KEY_ENUM; }
> <S0>extern return DT_KEY_EXTERN;
> <S0>float return DT_KEY_FLOAT;
> <S0>for return DT_KEY_FOR;
> @@ -128,6 +128,7 @@ if (yypcb->pcb_token != 0) {
> <S2>counter { yybegin(YYS_DEFINE); return DT_KEY_COUNTER; }
> <S2>double { yybegin(YYS_EXPR); return DT_KEY_DOUBLE; }
> <S2>enum { yybegin(YYS_EXPR); yypcb->pcb_sou_type = 1;
> + yypcb->pcb_enum_decl = 1;
> return DT_KEY_ENUM; }
> <S2>extern { yybegin(YYS_EXPR); return DT_KEY_EXTERN; }
> <S2>float { yybegin(YYS_EXPR); return DT_KEY_FLOAT; }
> @@ -463,6 +464,7 @@ if (yypcb->pcb_token != 0) {
> <S0>"(" {
> yypcb->pcb_parens++;
> yypcb->pcb_sou_type = 0;
> + yypcb->pcb_enum_decl = 0;
> return DT_TOK_LPAR;
> }
>
> @@ -488,10 +490,13 @@ if (yypcb->pcb_token != 0) {
> <S2>"{" {
> yypcb->pcb_braces++;
> yypcb->pcb_sou_type = 0;
> + if (yypcb->pcb_enum_decl)
> + yypcb->pcb_enum_decl++;
> return '{';
> }
>
> <S0>"}" {
> + yypcb->pcb_enum_decl = 0;
> if (--yypcb->pcb_braces < 0)
> yyerror("extra } in input stream\n");
> return '}';
> @@ -536,7 +541,7 @@ if (yypcb->pcb_token != 0) {
> <S0>"--" return DT_TOK_SUBSUB;
> <S0>"..." return DT_TOK_ELLIPSIS;
> <S0>"," return DT_TOK_COMMA;
> -<S0>";" return ';';
> +<S0>";" yypcb->pcb_enum_decl = 0; return ';';
> <S0>{RGX_WS} ; /* discard */
> <S0>"\\"\n ; /* discard */
> <S0>. {
> @@ -769,6 +774,14 @@ id_or_type(const char *s)
> return DT_TOK_IDENT;
> }
>
> + /*
> + * Inside an enumeration declaration's { }'s region: must be an ident.
> + * Checking for conflicts is handled by dt_decl_enumerator(). No
> + * need to look anything up here.
> + */
> + if (yypcb->pcb_enum_decl == 2)
> + return DT_TOK_IDENT;
> +
> /*
> * If the lexeme is a global variable or likely identifier, then it is
> * an identifier token.
> diff --git a/libdtrace/dt_pcb.h b/libdtrace/dt_pcb.h
> index 7c57f83220b28..b6a7620f4920c 100644
> --- a/libdtrace/dt_pcb.h
> +++ b/libdtrace/dt_pcb.h
> @@ -71,6 +71,7 @@ typedef struct dt_pcb {
> int pcb_parens; /* number of open parentheses in lexer */
> int pcb_sou_type; /* lexer in struct/union type name */
> int pcb_sou_deref; /* lexer in struct/union dereference */
> + int pcb_enum_decl; /* lexer in enum declaration: 2 for inside { }. */
> int pcb_xlator_input; /* in translator input type */
> int pcb_array_dimens; /* in array dimensions */
> int pcb_alloca_taints; /* number of alloca taint changes */
> --
> 2.48.1.283.g18c60a128c
>
>
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list