[DTrace-devel] [PATCH 2/4] lexer: the things inside an enum { ... } declaration are identifiers

Kris Van Hees kris.van.hees at oracle.com
Fri Oct 3 00:00:38 UTC 2025


On Thu, Sep 18, 2025 at 07:03:36PM +0100, Nick Alcock via DTrace-devel wrote:
> The code in dt_lex.c:id_or_type() tries to figure out if something is an
> identifier or not: if it cannot prove it is an identifier, it concludes
> it must be a type name, which later triggers a search for said type (or
> identifier) which eventually reaches (expensively) across the entire
> kernel CTF.  Usually this is what we want, since we use type names that
> actually reside in the kernel extensively and do not expect to have to
> decorate all of them with `. But if this misfires bad things can happen.
> 
> In the case of enums, existing code in dt_decl.c checks for duplicate
> identifiers, and carefully avoids considering code outside the C and D
> dicts to be duplicates: but if id_or_type() concludes this enumerator is
> probably a type name, we'll import the thing we find even if it's an
> identifier, and then conflict. Enumerators cannot be type names, so this
> must always be wrong (if we actually do put a type name in there,
> dt_parser.c will correctly reject it no matter what the lexer says).
> 
> So add yet another piece of parser context identifying when we are
> inside the { } in an enum (we set it to 1 when the enum is seen, then
> bump it when the braces are seen, so if it's 2 we are in the relevant
> context; it is reset to 0 on every ;), then use that to forcibly declare
> everything seen inside enums an identifier without trying to chase it
> down
> 
> Signed-off-by: Nick Alcock <nick.alcock at oracle.com>

Reviewed-by: Kris Van Hees <kris.van.hees at oracle.com>

> ---
>  libdtrace/dt_lex.l | 17 +++++++++++++++--
>  libdtrace/dt_pcb.h |  1 +
>  2 files changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/libdtrace/dt_lex.l b/libdtrace/dt_lex.l
> index 9f12f5c7ca289..fd70aa0aa5803 100644
> --- a/libdtrace/dt_lex.l
> +++ b/libdtrace/dt_lex.l
> @@ -88,7 +88,7 @@ if (yypcb->pcb_token != 0) {
>  <S0>do		return DT_KEY_DO;
>  <S0>double	return DT_KEY_DOUBLE;
>  <S0>else	return DT_KEY_ELSE;
> -<S0>enum	{ yypcb->pcb_sou_type = 1; return DT_KEY_ENUM; }
> +<S0>enum	{ yypcb->pcb_sou_type = 1; yypcb->pcb_enum_decl = 1; return DT_KEY_ENUM; }
>  <S0>extern	return DT_KEY_EXTERN;
>  <S0>float	return DT_KEY_FLOAT;
>  <S0>for		return DT_KEY_FOR;
> @@ -128,6 +128,7 @@ if (yypcb->pcb_token != 0) {
>  <S2>counter	{ yybegin(YYS_DEFINE);	return DT_KEY_COUNTER; }
>  <S2>double	{ yybegin(YYS_EXPR);	return DT_KEY_DOUBLE; }
>  <S2>enum	{ yybegin(YYS_EXPR);	yypcb->pcb_sou_type = 1;
> +					yypcb->pcb_enum_decl = 1;
>  					return DT_KEY_ENUM; }
>  <S2>extern	{ yybegin(YYS_EXPR);	return DT_KEY_EXTERN; }
>  <S2>float	{ yybegin(YYS_EXPR);	return DT_KEY_FLOAT; }
> @@ -463,6 +464,7 @@ if (yypcb->pcb_token != 0) {
>  <S0>"("		{
>  			yypcb->pcb_parens++;
>  			yypcb->pcb_sou_type = 0;
> +			yypcb->pcb_enum_decl = 0;
>  			return DT_TOK_LPAR;
>  		}
>  
> @@ -488,10 +490,13 @@ if (yypcb->pcb_token != 0) {
>  <S2>"{"		{
>  			yypcb->pcb_braces++;
>  			yypcb->pcb_sou_type = 0;
> +			if (yypcb->pcb_enum_decl)
> +				yypcb->pcb_enum_decl++;
>  			return '{';
>  		}
>  
>  <S0>"}"		{
> +			yypcb->pcb_enum_decl = 0;
>  			if (--yypcb->pcb_braces < 0)
>  				yyerror("extra } in input stream\n");
>  			return '}';
> @@ -536,7 +541,7 @@ if (yypcb->pcb_token != 0) {
>  <S0>"--"	return DT_TOK_SUBSUB;
>  <S0>"..."	return DT_TOK_ELLIPSIS;
>  <S0>","		return DT_TOK_COMMA;
> -<S0>";"		return ';';
> +<S0>";"		yypcb->pcb_enum_decl = 0; return ';';
>  <S0>{RGX_WS}	; /* discard */
>  <S0>"\\"\n	; /* discard */
>  <S0>.		{
> @@ -769,6 +774,14 @@ id_or_type(const char *s)
>  		return DT_TOK_IDENT;
>  	}
>  
> +	/*
> +	 * Inside an enumeration declaration's { }'s region: must be an ident.
> +	 * Checking for conflicts is handled by dt_decl_enumerator().  No
> +	 * need to look anything up here.
> +	 */
> +	if (yypcb->pcb_enum_decl == 2)
> +	    return DT_TOK_IDENT;
> +
>  	/*
>  	 * If the lexeme is a global variable or likely identifier, then it is
>  	 * an identifier token.
> diff --git a/libdtrace/dt_pcb.h b/libdtrace/dt_pcb.h
> index 7c57f83220b28..b6a7620f4920c 100644
> --- a/libdtrace/dt_pcb.h
> +++ b/libdtrace/dt_pcb.h
> @@ -71,6 +71,7 @@ typedef struct dt_pcb {
>  	int pcb_parens;		/* number of open parentheses in lexer */
>  	int pcb_sou_type;	/* lexer in struct/union type name */
>  	int pcb_sou_deref;	/* lexer in struct/union dereference */
> +	int pcb_enum_decl;	/* lexer in enum declaration: 2 for inside { }. */
>  	int pcb_xlator_input;	/* in translator input type */
>  	int pcb_array_dimens;	/* in array dimensions */
>  	int pcb_alloca_taints;	/* number of alloca taint changes */
> -- 
> 2.48.1.283.g18c60a128c
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list