I want to write a lexer that has multiple modes. But the modes are mostly similar. The only difference is that I reference the same character by a different name. I have it working, the problem is I have to copy the entire lexer change all the names, add types, and add one line for each mode.
Here is the general problem I want to solve. I want a comma to have high precedence outside a '[' ']'. I want it to have a low precedence inside the '['
, ']'
. To do that I push and pop modes with the '['
and ']'
. But since the only thing I am changing is the precedence I have to copy all the rules into each mode and given them different names.
One additional thing, once inside a '['
there would be no way to get back to the high precedence comma. So when the grammar moves into a '{'
, '}'
section the comma reverts back to high precedence.
In order to accomplish this I have the initial default mode plus CONJUNCTION
(high precedence) and NON_CONJUNCTION
(low precedence). I copy all the rules from the default mode and rename them to C_
or NC_
. Then I assign their type back to the type of the default mode.
I would rather accomplish this without coping, renaming, and assigning types to all the rules from the default mode.
Here is my lexer:
lexer grammar DabarLexer;
OPEN_PAREN : '(' -> pushMode(NON_CONJUNCTION) ;
CLOSE_PAREN : ')' -> popMode;
OPEN_BRACKET : '[' -> pushMode(NON_CONJUNCTION) ;
CLOSE_BRACKET : ']' -> popMode ;
OPEN_CURLY : '{' -> pushMode(CONJUNCTION) ;
CLOSE_CURLY : '}' -> popMode ;
SPACE : ' ' ;
HEAVY_COMMA : ',';
ENDLINE : '\n' ;
PERIOD : '.' ;
SINGLE_QUOTE : '\'' ;
DOUBLE_QUOTE : '"' ;
INDENTION : '\t' -> skip;
fragment SYMBOL : HEAVY_COMMA | OPEN_BRACKET | CLOSE_BRACKET | OPEN_PAREN | CLOSE_PAREN | OPEN_CURLY | CLOSE_CURLY | SPACE | ENDLINE | PERIOD | SINGLE_QUOTE | DOUBLE_QUOTE | INDENTION ;
ESCAPE : '\\' SYMBOL ;
fragment NON_SYMBOL : ~[(){}',; \n.\t"\[\]] ;
IDENTIFIER : (NON_SYMBOL | ESCAPE)+ ;
LITERAL : (SINGLE_QUOTE (NON_SYMBOL | ESCAPE)+ SINGLE_QUOTE) | DOUBLE_QUOTE (NON_SYMBOL | ESCAPE)+ DOUBLE_QUOTE ;
mode CONJUNCTION ;
C_HEAVY_COMMA : ',' -> type(HEAVY_COMMA);
C_OPEN_PAREN : '(' -> type(OPEN_PAREN), pushMode(NON_CONJUNCTION) ;
C_CLOSE_PAREN : ')' -> type(CLOSE_PAREN), popMode;
C_OPEN_BRACKET : '[' -> type(OPEN_BRACKET), pushMode(NON_CONJUNCTION) ;
C_CLOSE_BRACKET : ']' -> type(CLOSE_BRACKET), popMode ;
C_OPEN_CURLY : '{' -> type(OPEN_CURLY), pushMode(CONJUNCTION) ;
C_CLOSE_CURLY : '}' -> type(CLOSE_CURLY), popMode ;
C_SPACE : ' ' -> type(SPACE);
C_ENDLINE : '\n' -> type(ENDLINE);
C_PERIOD : '.' -> type(PERIOD);
C_SINGLE_QUOTE : '\'' -> type(SINGLE_QUOTE);
C_DOUBLE_QUOTE : '"' -> type(DOUBLE_QUOTE);
C_INDENTION : '\t' -> type(INDENTION),skip;
fragment C_SYMBOL : ( HEAVY_COMMA | C_OPEN_BRACKET | C_CLOSE_BRACKET | C_OPEN_PAREN | C_CLOSE_PAREN | C_OPEN_CURLY | C_CLOSE_CURLY | C_SPACE | C_ENDLINE | C_PERIOD | C_SINGLE_QUOTE | C_DOUBLE_QUOTE | C_INDENTION ) ;
C_ESCAPE : '\\' C_SYMBOL -> type(ESCAPE);
fragment C_NON_SYMBOL : ~[(){}',; \n.\t"\[\]] ;
C_IDENTIFIER : (C_NON_SYMBOL | C_ESCAPE)+ -> type(IDENTIFIER);
C_LITERAL : ((C_SINGLE_QUOTE (C_NON_SYMBOL | C_ESCAPE)+ C_SINGLE_QUOTE) | C_DOUBLE_QUOTE (C_NON_SYMBOL | C_ESCAPE)+ C_DOUBLE_QUOTE) -> type(LITERAL);
mode NON_CONJUNCTION ;
LIGHT_COMMA : ',' ;
NC_OPEN_PAREN : '(' -> type(OPEN_PAREN), pushMode(NON_CONJUNCTION) ;
NC_CLOSE_PAREN : ')' -> type(CLOSE_PAREN), popMode;
NC_OPEN_BRACKET : '[' -> type(OPEN_BRACKET), pushMode(NON_CONJUNCTION) ;
NC_CLOSE_BRACKET : ']' -> type(CLOSE_BRACKET), popMode ;
NC_OPEN_CURLY : '{' -> type(OPEN_CURLY), pushMode(CONJUNCTION) ;
NC_CLOSE_CURLY : '}' -> type(CLOSE_CURLY), popMode ;
NC_SPACE : ' ' -> type(SPACE);
NC_ENDLINE : '\n' -> type(ENDLINE);
NC_PERIOD : '.' -> type(PERIOD);
NC_SINGLE_QUOTE : '\'' -> type(SINGLE_QUOTE);
NC_DOUBLE_QUOTE : '"' -> type(DOUBLE_QUOTE);
NC_INDENTION : '\t' -> type(INDENTION),skip;
fragment NC_SYMBOL : ( LIGHT_COMMA | NC_OPEN_BRACKET | NC_CLOSE_BRACKET | NC_OPEN_PAREN | NC_CLOSE_PAREN | NC_OPEN_CURLY | NC_CLOSE_CURLY | NC_SPACE | NC_ENDLINE | NC_PERIOD | NC_SINGLE_QUOTE | NC_DOUBLE_QUOTE | NC_INDENTION ) ;
NC_ESCAPE : '\\' NC_SYMBOL -> type(ESCAPE);
fragment NC_NON_SYMBOL : ~[(){}',; \n.\t"\[\]] ;
NC_IDENTIFIER : (NC_NON_SYMBOL | NC_ESCAPE)+ -> type(IDENTIFIER);
NC_LITERAL : ((NC_SINGLE_QUOTE (NC_NON_SYMBOL | NC_ESCAPE)+ NC_SINGLE_QUOTE) | NC_DOUBLE_QUOTE (NC_NON_SYMBOL | NC_ESCAPE)+ NC_DOUBLE_QUOTE) -> type(LITERAL);