1 /* It's an automatically generated code. Do not modify it. */
2 package org.intellij.lang.regexp;
4 import com.intellij.lexer.FlexLexer;
5 import com.intellij.psi.tree.IElementType;
6 import java.util.LinkedList;
7 import com.intellij.psi.StringEscapesTokenTypes;
10 @SuppressWarnings({ "ALL", "SameParameterValue", "WeakerAccess", "SameReturnValue", "RedundantThrows", "UnusedDeclaration", "UnusedDeclaration" })
22 // This adds support for nested states. I'm no JFlex pro, so maybe this is overkill, but it works quite well.
23 private final LinkedList<Integer> states = new LinkedList();
25 // This was an idea to use the regex implementation for XML schema regexes (which use a slightly different syntax)
26 // as well, but is currently unfinished as it requires to tweak more places than just the lexer.
27 private boolean xmlSchemaMode;
29 _RegExLexer(boolean xmlSchemaMode) {
30 this((java.io.Reader)null);
31 this.xmlSchemaMode = xmlSchemaMode;
34 private void yypushstate(int state) {
35 states.addFirst(yystate());
38 private void yypopstate() {
39 final int state = states.removeFirst();
43 private void handleOptions() {
44 final String o = yytext().toString();
45 if (o.contains("x")) {
46 commentMode = !o.startsWith("-");
50 // tracks whether the lexer is in comment mode, i.e. whether whitespace is not significant and whether to ignore
51 // text after '#' till EOL
52 boolean commentMode = false;
75 META={ESCAPE} | {DOT} |
76 "^" | "$" | "?" | "*" | "+" | "|" |
77 {LBRACKET} | {LBRACE} | {LPAREN} | {RPAREN}
79 CONTROL="t" | "n" | "r" | "f" | "a" | "e"
80 BOUNDARY="b" | "B" | "A" | "z" | "Z" | "G"
82 CLASS="w" | "W" | "s" | "S" | "d" | "D" | "X" | "C"
83 XML_CLASS="c" | "C" | "i" | "I"
90 "\\Q" { yypushstate(QUOTED); return RegExpTT.QUOTE_BEGIN; }
93 "\\E" { yypopstate(); return RegExpTT.QUOTE_END; }
94 . { return RegExpTT.CHARACTER; }
98 {ESCAPE} {ESCAPE} { return RegExpTT.ESC_CHARACTER; }
101 {ESCAPE} "x" {HEX_CHAR}{2} { return RegExpTT.HEX_CHAR; }
102 {ESCAPE} "x" {ANY}{0,2} { return RegExpTT.BAD_HEX_VALUE; }
104 /* unicode escapes */
105 {ESCAPE} "u" {HEX_CHAR}{4} { return RegExpTT.UNICODE_CHAR; }
106 {ESCAPE} "u" {ANY}{0,4} { return StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN; }
109 {ESCAPE} "0" [0-7]{1,3} { return RegExpTT.OCT_CHAR; }
110 {ESCAPE} "0" { return RegExpTT.BAD_OCT_VALUE; }
112 /* single character after "\c" */
113 {ESCAPE} "c" {ANY} { if (xmlSchemaMode) { yypushback(1); return RegExpTT.CHAR_CLASS; } else return RegExpTT.CTRL; }
115 {ESCAPE} {XML_CLASS} { if (xmlSchemaMode) return RegExpTT.CHAR_CLASS; else return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN; }
118 /* java.util.regex.Pattern says about backrefs:
119 "In this class, \1 through \9 are always interpreted as back references,
120 and a larger number is accepted as a back reference if at least that many
121 subexpressions exist at that point in the regular expression, otherwise the
122 parser will drop digits until the number is smaller or equal to the existing
123 number of groups or it is one digit."
125 So, for 100% compatibility, backrefs > 9 should be resolved by the parser, but
126 I'm not sure if it's worth the effort - at least not atm.
129 {ESCAPE} {DIGITS} { return yystate() != CLASS2 ? RegExpTT.BACKREF : RegExpTT.ESC_CHARACTER; }
131 {ESCAPE} "-" { return RegExpTT.ESC_CHARACTER; }
132 {ESCAPE} {META} { return RegExpTT.ESC_CHARACTER; }
133 {ESCAPE} {CLASS} { return RegExpTT.CHAR_CLASS; }
134 {ESCAPE} {PROP} { return RegExpTT.PROPERTY; }
136 {ESCAPE} {BOUNDARY} { return yystate() != CLASS2 ? RegExpTT.BOUNDARY : RegExpTT.ESC_CHARACTER; }
137 {ESCAPE} {CONTROL} { return RegExpTT.ESC_CTRL_CHARACTER; }
139 {ESCAPE} [:letter:] { return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN; }
140 {ESCAPE} {ANY} { return RegExpTT.REDUNDANT_ESCAPE; }
142 /* "{" \d+(,\d*)? "}" */
143 /* "}" outside counted closure is treated as regular character */
144 {LBRACE} { yypushstate(EMBRACED); return RegExpTT.LBRACE; }
147 [:letter:]+ { return RegExpTT.NAME; }
148 [:digit:]+ { return RegExpTT.NUMBER; }
149 "," { return RegExpTT.COMMA; }
151 {RBRACE} { yypopstate(); return RegExpTT.RBRACE; }
152 {ANY} { return RegExpTT.BAD_CHARACTER; }
155 "-" { return RegExpTT.MINUS; }
156 "^" { return RegExpTT.CARET; }
158 {LBRACKET} / {RBRACKET} { yypushstate(CLASS1); return RegExpTT.CLASS_BEGIN; }
159 {LBRACKET} { yypushstate(CLASS2); return RegExpTT.CLASS_BEGIN; }
161 /* []abc] is legal. The first ] is treated as literal character */
163 {RBRACKET} { yybegin(CLASS2); return RegExpTT.CHARACTER; }
164 . { assert false : yytext(); }
168 {RBRACKET} { yypopstate(); return RegExpTT.CLASS_END; }
170 "&&" { return RegExpTT.ANDAND; }
171 {ANY} { return RegExpTT.CHARACTER; }
175 {LPAREN} { return RegExpTT.GROUP_BEGIN; }
176 {RPAREN} { return RegExpTT.GROUP_END; }
178 "|" { return RegExpTT.UNION; }
179 "?" { return RegExpTT.QUEST; }
180 "*" { return RegExpTT.STAR; }
181 "+" { return RegExpTT.PLUS; }
182 "$" { return RegExpTT.DOLLAR; }
183 {DOT} { return RegExpTT.DOT; }
185 "(?:" { return RegExpTT.NON_CAPT_GROUP; }
186 "(?=" { return RegExpTT.POS_LOOKAHEAD; }
187 "(?!" { return RegExpTT.NEG_LOOKAHEAD; }
188 "(?<=" { return RegExpTT.POS_LOOKBEHIND; }
189 "(?<!" { return RegExpTT.NEG_LOOKBEHIND; }
191 "(?" { yybegin(OPTIONS); return RegExpTT.SET_OPTIONS; }
195 [:letter:]* { handleOptions(); return RegExpTT.OPTIONS_ON; }
196 ("-" [:letter:]*) { handleOptions(); return RegExpTT.OPTIONS_OFF; }
198 ":" { yybegin(YYINITIAL); return RegExpTT.COLON; }
199 ")" { yybegin(YYINITIAL); return RegExpTT.GROUP_END; }
201 {ANY} { yybegin(YYINITIAL); return RegExpTT.BAD_CHARACTER; }
205 <YYINITIAL> {RBRACKET} { return RegExpTT.CHARACTER; }
208 "#" { if (commentMode) { yypushstate(COMMENT); return RegExpTT.COMMENT; } else return RegExpTT.CHARACTER; }
210 [^\r\n]*[\r\n]? { yypopstate(); return RegExpTT.COMMENT; }
213 " " { return commentMode ? com.intellij.psi.TokenType.WHITE_SPACE : RegExpTT.CHARACTER; }
214 [\b\t\r\f] { return commentMode ? com.intellij.psi.TokenType.WHITE_SPACE : RegExpTT.CTRL_CHARACTER; }
215 \n { return commentMode ? com.intellij.psi.TokenType.WHITE_SPACE : RegExpTT.ESC_CHARACTER; }
217 {ANY} { return RegExpTT.CHARACTER; }