[DTrace-devel] [PATCH 2/4] lexer: the things inside an enum { ... } declaration are identifiers

Nick Alcock nick.alcock at oracle.com
Thu Sep 18 18:03:36 UTC 2025


The code in dt_lex.c:id_or_type() tries to figure out if something is an
identifier or not: if it cannot prove it is an identifier, it concludes
it must be a type name, which later triggers a search for said type (or
identifier) which eventually reaches (expensively) across the entire
kernel CTF.  Usually this is what we want, since we use type names that
actually reside in the kernel extensively and do not expect to have to
decorate all of them with `. But if this misfires bad things can happen.

In the case of enums, existing code in dt_decl.c checks for duplicate
identifiers, and carefully avoids considering code outside the C and D
dicts to be duplicates: but if id_or_type() concludes this enumerator is
probably a type name, we'll import the thing we find even if it's an
identifier, and then conflict. Enumerators cannot be type names, so this
must always be wrong (if we actually do put a type name in there,
dt_parser.c will correctly reject it no matter what the lexer says).

So add yet another piece of parser context identifying when we are
inside the { } in an enum (we set it to 1 when the enum is seen, then
bump it when the braces are seen, so if it's 2 we are in the relevant
context; it is reset to 0 on every ;), then use that to forcibly declare
everything seen inside enums an identifier without trying to chase it
down

Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
---
 libdtrace/dt_lex.l | 17 +++++++++++++++--
 libdtrace/dt_pcb.h |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/libdtrace/dt_lex.l b/libdtrace/dt_lex.l
index 9f12f5c7ca289..fd70aa0aa5803 100644
--- a/libdtrace/dt_lex.l
+++ b/libdtrace/dt_lex.l
@@ -88,7 +88,7 @@ if (yypcb->pcb_token != 0) {
 <S0>do		return DT_KEY_DO;
 <S0>double	return DT_KEY_DOUBLE;
 <S0>else	return DT_KEY_ELSE;
-<S0>enum	{ yypcb->pcb_sou_type = 1; return DT_KEY_ENUM; }
+<S0>enum	{ yypcb->pcb_sou_type = 1; yypcb->pcb_enum_decl = 1; return DT_KEY_ENUM; }
 <S0>extern	return DT_KEY_EXTERN;
 <S0>float	return DT_KEY_FLOAT;
 <S0>for		return DT_KEY_FOR;
@@ -128,6 +128,7 @@ if (yypcb->pcb_token != 0) {
 <S2>counter	{ yybegin(YYS_DEFINE);	return DT_KEY_COUNTER; }
 <S2>double	{ yybegin(YYS_EXPR);	return DT_KEY_DOUBLE; }
 <S2>enum	{ yybegin(YYS_EXPR);	yypcb->pcb_sou_type = 1;
+					yypcb->pcb_enum_decl = 1;
 					return DT_KEY_ENUM; }
 <S2>extern	{ yybegin(YYS_EXPR);	return DT_KEY_EXTERN; }
 <S2>float	{ yybegin(YYS_EXPR);	return DT_KEY_FLOAT; }
@@ -463,6 +464,7 @@ if (yypcb->pcb_token != 0) {
 <S0>"("		{
 			yypcb->pcb_parens++;
 			yypcb->pcb_sou_type = 0;
+			yypcb->pcb_enum_decl = 0;
 			return DT_TOK_LPAR;
 		}
 
@@ -488,10 +490,13 @@ if (yypcb->pcb_token != 0) {
 <S2>"{"		{
 			yypcb->pcb_braces++;
 			yypcb->pcb_sou_type = 0;
+			if (yypcb->pcb_enum_decl)
+				yypcb->pcb_enum_decl++;
 			return '{';
 		}
 
 <S0>"}"		{
+			yypcb->pcb_enum_decl = 0;
 			if (--yypcb->pcb_braces < 0)
 				yyerror("extra } in input stream\n");
 			return '}';
@@ -536,7 +541,7 @@ if (yypcb->pcb_token != 0) {
 <S0>"--"	return DT_TOK_SUBSUB;
 <S0>"..."	return DT_TOK_ELLIPSIS;
 <S0>","		return DT_TOK_COMMA;
-<S0>";"		return ';';
+<S0>";"		yypcb->pcb_enum_decl = 0; return ';';
 <S0>{RGX_WS}	; /* discard */
 <S0>"\\"\n	; /* discard */
 <S0>.		{
@@ -769,6 +774,14 @@ id_or_type(const char *s)
 		return DT_TOK_IDENT;
 	}
 
+	/*
+	 * Inside an enumeration declaration's { }'s region: must be an ident.
+	 * Checking for conflicts is handled by dt_decl_enumerator().  No
+	 * need to look anything up here.
+	 */
+	if (yypcb->pcb_enum_decl == 2)
+	    return DT_TOK_IDENT;
+
 	/*
 	 * If the lexeme is a global variable or likely identifier, then it is
 	 * an identifier token.
diff --git a/libdtrace/dt_pcb.h b/libdtrace/dt_pcb.h
index 7c57f83220b28..b6a7620f4920c 100644
--- a/libdtrace/dt_pcb.h
+++ b/libdtrace/dt_pcb.h
@@ -71,6 +71,7 @@ typedef struct dt_pcb {
 	int pcb_parens;		/* number of open parentheses in lexer */
 	int pcb_sou_type;	/* lexer in struct/union type name */
 	int pcb_sou_deref;	/* lexer in struct/union dereference */
+	int pcb_enum_decl;	/* lexer in enum declaration: 2 for inside { }. */
 	int pcb_xlator_input;	/* in translator input type */
 	int pcb_array_dimens;	/* in array dimensions */
 	int pcb_alloca_taints;	/* number of alloca taint changes */
-- 
2.48.1.283.g18c60a128c




More information about the DTrace-devel mailing list