flex source code of lexers is now available as part of CE source
[fedora-idea.git] / xml / impl / src / com / intellij / lexer / _HtmlLexer.flex
blob8def2be62dfb6ac9412e601a85f9a8a7229532c4
1 /* It's an automatically generated code. Do not modify it. */
2 package com.intellij.lexer;
4 import com.intellij.psi.tree.IElementType;
5 import com.intellij.psi.*;
6 import com.intellij.psi.xml.*;
8 %%
10 %unicode
13   private IElementType elTokenType = XmlTokenType.XML_DATA_CHARACTERS;
14   private IElementType elTokenType2 = XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN;
16   public void setElTypes(IElementType _elTokenType,IElementType _elTokenType2) {
17     elTokenType = _elTokenType;
18     elTokenType2 = _elTokenType2;
19   }
21   public _HtmlLexer() {
22     this((java.io.Reader)null);
23   }
26 %class _HtmlLexer
27 %public
28 %implements FlexLexer,ELHostLexer
29 %function advance
30 %type IElementType
31 %eof{ return;
32 %eof}
34 %state DOC_TYPE
35 %state COMMENT
36 %state START_TAG_NAME
37 %state END_TAG_NAME
38 %state TAG_ATTRIBUTES
39 %state ATTRIBUTE_VALUE_START
40 %state ATTRIBUTE_VALUE_DQ
41 %state ATTRIBUTE_VALUE_SQ
42 %state PROCESSING_INSTRUCTION
43 %state START_TAG_NAME2
44 %state END_TAG_NAME2
45 %state TAG_CHARACTERS
46 %state C_COMMENT_START
47 %state C_COMMENT_END
48 /* IMPORTANT! number of states should not exceed 16. See JspHighlightingLexer. */
50 ALPHA=[:letter:]
51 DIGIT=[0-9]
52 WHITE_SPACE_CHARS=[ \n\r\t\f]+
54 TAG_NAME=({ALPHA}|"_"|":")({ALPHA}|{DIGIT}|"_"|":"|"."|"-")*
55 TAG_NAME_FWT=("#")({ALPHA}|{DIGIT}|"_"|":"|"."|"-")*
56 ATTRIBUTE_NAME=({ALPHA}|"_"|":")({ALPHA}|{DIGIT}|"_"|":"|"."|"-")*
58 DTD_REF= "\"" [^\"]* "\"" | "'" [^']* "'"
59 DOCTYPE= "<!" (D|d)(O|o)(C|c)(T|t)(Y|y)(P|p)(E|e)
60 HTML= (H|h)(T|t)(M|m)(L|l)
61 PUBLIC= (P|p)(U|u)(B|b)(L|l)(I|i)(C|c)
62 EL_EMBEDDMENT="${" [^\}]* "}"
64 END_COMMENT="--"[ \n\r\t\f]*">"
66 CONDITIONAL_COMMENT_CONDITION=({ALPHA})({ALPHA}|{WHITE_SPACE_CHARS}|{DIGIT}|"."|"("|")"|"|"|"!"|"&")*
69 <YYINITIAL> "<?" { yybegin(PROCESSING_INSTRUCTION); return XmlTokenType.XML_PI_START; }
70 <PROCESSING_INSTRUCTION> "?"? ">" { yybegin(YYINITIAL); return XmlTokenType.XML_PI_END; }
71 <PROCESSING_INSTRUCTION> ([^\?\>] | (\?[^\>]))* { return XmlTokenType.XML_PI_TARGET; }
73 <YYINITIAL> {DOCTYPE} { yybegin(DOC_TYPE); return XmlTokenType.XML_DOCTYPE_START; }
74 <DOC_TYPE> {HTML} { return XmlTokenType.XML_NAME; }
75 <DOC_TYPE> {PUBLIC} { return XmlTokenType.XML_DOCTYPE_PUBLIC; }
76 <DOC_TYPE> {DTD_REF} { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN;}
77 <DOC_TYPE> ">" { yybegin(YYINITIAL); return XmlTokenType.XML_DOCTYPE_END; }
78 <YYINITIAL> {WHITE_SPACE_CHARS} { return XmlTokenType.XML_REAL_WHITE_SPACE; }
79 <DOC_TYPE,TAG_ATTRIBUTES,ATTRIBUTE_VALUE_START,PROCESSING_INSTRUCTION, START_TAG_NAME, END_TAG_NAME, END_TAG_NAME2, TAG_CHARACTERS> {WHITE_SPACE_CHARS} { return XmlTokenType.XML_WHITE_SPACE; }
80 <YYINITIAL> "<" {TAG_NAME} { yybegin(START_TAG_NAME); yypushback(yylength()); }
81 <YYINITIAL> "<" {TAG_NAME_FWT} { yybegin(START_TAG_NAME2); yypushback(yylength()); }
82 <START_TAG_NAME, START_TAG_NAME2, TAG_CHARACTERS> "<" { return XmlTokenType.XML_START_TAG_START; }
84 <YYINITIAL> "</" {TAG_NAME} { yybegin(END_TAG_NAME); yypushback(yylength()); }
85 <YYINITIAL> "</" {TAG_NAME_FWT} { yybegin(END_TAG_NAME2); yypushback(yylength()); }
86 <YYINITIAL, END_TAG_NAME, END_TAG_NAME2> "</" { return XmlTokenType.XML_END_TAG_START; }
88 <YYINITIAL> "<!--" { yybegin(COMMENT); return XmlTokenType.XML_COMMENT_START; }
89 <COMMENT> "[" { yybegin(C_COMMENT_START); return XmlTokenType.XML_CONDITIONAL_COMMENT_START; }
90 <COMMENT> "<![" { yybegin(C_COMMENT_END); return XmlTokenType.XML_CONDITIONAL_COMMENT_END_START; }
91 <COMMENT> {END_COMMENT} { yybegin(YYINITIAL); return XmlTokenType.XML_COMMENT_END; }
92 <COMMENT> [^] { return XmlTokenType.XML_COMMENT_CHARACTERS; }
94 <C_COMMENT_START,C_COMMENT_END> {CONDITIONAL_COMMENT_CONDITION} { return XmlTokenType.XML_COMMENT_CHARACTERS; }
95 <C_COMMENT_START> [^] { yybegin(COMMENT); return XmlTokenType.XML_COMMENT_CHARACTERS; }
96 <C_COMMENT_START> "]>" { yybegin(COMMENT); return XmlTokenType.XML_CONDITIONAL_COMMENT_START_END; }
97 <C_COMMENT_START,C_COMMENT_END> {END_COMMENT} { yybegin(YYINITIAL); return XmlTokenType.XML_COMMENT_END; }
98 <C_COMMENT_END> "]" { yybegin(COMMENT); return XmlTokenType.XML_CONDITIONAL_COMMENT_END; }
99 <C_COMMENT_END> [^] { yybegin(COMMENT); return XmlTokenType.XML_COMMENT_CHARACTERS; }
101 <YYINITIAL> \\\$ {
102   return XmlTokenType.XML_DATA_CHARACTERS;
105 <YYINITIAL> {EL_EMBEDDMENT} {
106   return elTokenType;
109 <START_TAG_NAME, END_TAG_NAME> {TAG_NAME} { yybegin(TAG_ATTRIBUTES); return XmlTokenType.XML_NAME; }
110 <END_TAG_NAME2> {TAG_NAME_FWT} { return XmlTokenType.XML_NAME; }
111 <START_TAG_NAME2> {TAG_NAME_FWT} { yybegin(TAG_CHARACTERS); return XmlTokenType.XML_NAME; }
113 <TAG_ATTRIBUTES, END_TAG_NAME2, TAG_CHARACTERS> ">" { yybegin(YYINITIAL); return XmlTokenType.XML_TAG_END; }
114 <TAG_ATTRIBUTES, TAG_CHARACTERS> "/>" { yybegin(YYINITIAL); return XmlTokenType.XML_EMPTY_ELEMENT_END; }
115 <TAG_ATTRIBUTES> {ATTRIBUTE_NAME} { return XmlTokenType.XML_NAME; }
116 <TAG_ATTRIBUTES> "=" { yybegin(ATTRIBUTE_VALUE_START); return XmlTokenType.XML_EQ; }
117 <TAG_ATTRIBUTES,START_TAG_NAME, END_TAG_NAME, END_TAG_NAME2> [^] { yybegin(YYINITIAL); yypushback(1); break; }
119 <TAG_CHARACTERS> [^] { return XmlTokenType.XML_TAG_CHARACTERS; }
121 <ATTRIBUTE_VALUE_START> ">" { yybegin(YYINITIAL); return XmlTokenType.XML_TAG_END; }
122 <ATTRIBUTE_VALUE_START> "/>" { yybegin(YYINITIAL); return XmlTokenType.XML_EMPTY_ELEMENT_END; }
123  <ATTRIBUTE_VALUE_START> {EL_EMBEDDMENT} {
124   return elTokenType2;
127 <ATTRIBUTE_VALUE_START> ([^ \n\r\t\f'\"\>]|(\/[^\>]))* { yybegin(TAG_ATTRIBUTES); return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN; }
128 <ATTRIBUTE_VALUE_START> "\"" { yybegin(ATTRIBUTE_VALUE_DQ); return XmlTokenType.XML_ATTRIBUTE_VALUE_START_DELIMITER; }
129 <ATTRIBUTE_VALUE_START> "'" { yybegin(ATTRIBUTE_VALUE_SQ); return XmlTokenType.XML_ATTRIBUTE_VALUE_START_DELIMITER; }
131 <ATTRIBUTE_VALUE_DQ> {
132   "\"" { yybegin(TAG_ATTRIBUTES); return XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER; }
133   \\\$ { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN; }
134   "${" [^\}\"]* "}" { return elTokenType2; }
135   [^] { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN;}
138 <ATTRIBUTE_VALUE_SQ> {
139   "'" { yybegin(TAG_ATTRIBUTES); return XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER; }
140   \\\$ { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN; }
141   "${" [^\}\']* "}" { return elTokenType2; }
142   [^] { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN;}
145 "&lt;" |
146 "&gt;" |
147 "&apos;" |
148 "&quot;" |
149 "&nbsp;" |
150 "&amp;" |
151 "&#"{DIGIT}+";" |
152 "&#x"({DIGIT}|[a-fA-F])+";" { return XmlTokenType.XML_CHAR_ENTITY_REF; }
153 "&"{TAG_NAME}";" { return XmlTokenType.XML_ENTITY_REF_TOKEN; }
155 <YYINITIAL> ([^<&\$# \n\r\t\f]|(\\\$)|(\\#))* { return XmlTokenType.XML_DATA_CHARACTERS; }
156 <YYINITIAL> [^] { return XmlTokenType.XML_DATA_CHARACTERS; }
157 [^] { return XmlTokenType.XML_BAD_CHARACTER; }