update copyright
[fedora-idea.git] / xml / impl / src / com / intellij / lexer / BaseHtmlLexer.java
blob5212dd049cc79a05e5cef81db01947469a194dc6
1 /*
2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package com.intellij.lexer;
18 import com.intellij.codeInsight.completion.CompletionUtil;
19 import com.intellij.psi.impl.source.tree.TreeUtil;
20 import com.intellij.psi.tree.IElementType;
21 import com.intellij.psi.tree.TokenSet;
22 import com.intellij.psi.xml.XmlTokenType;
23 import com.intellij.util.text.CharArrayUtil;
24 import org.jetbrains.annotations.NonNls;
26 import java.util.HashMap;
28 /**
29 * Created by IntelliJ IDEA.
30 * User: Maxim.Mossienko
31 * Date: Oct 7, 2004
32 * Time: 2:29:06 PM
33 * To change this template use File | Settings | File Templates.
35 abstract class BaseHtmlLexer extends DelegateLexer {
36 protected static final int BASE_STATE_MASK = 0x3F;
37 private static final int SEEN_STYLE = 0x40;
38 private static final int SEEN_TAG = 0x80;
39 private static final int SEEN_SCRIPT = 0x100;
40 private static final int SEEN_ATTRIBUTE = 0x200;
41 private static final int SEEN_CONTENT_TYPE = 0x400;
42 protected static final int BASE_STATE_SHIFT = 11;
44 private boolean seenTag;
45 private boolean seenAttribute;
46 private boolean seenStyle;
47 private boolean seenScript;
48 private final boolean caseInsensitive;
49 private boolean seenContentType;
51 static final TokenSet TOKENS_TO_MERGE = TokenSet.create(XmlTokenType.XML_COMMENT_CHARACTERS, XmlTokenType.XML_WHITE_SPACE, XmlTokenType.XML_REAL_WHITE_SPACE,
52 XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN, XmlTokenType.XML_DATA_CHARACTERS,
53 XmlTokenType.XML_TAG_CHARACTERS);
55 public interface TokenHandler {
56 void handleElement(Lexer lexer);
59 public class XmlNameHandler implements TokenHandler {
60 @NonNls private static final String TOKEN_SCRIPT = "script";
61 @NonNls private static final String TOKEN_STYLE = "style";
62 @NonNls private static final String TOKEN_ON = "on";
64 public void handleElement(Lexer lexer) {
65 final CharSequence buffer = lexer.getBufferSequence();
66 final char firstCh = buffer.charAt(lexer.getTokenStart());
67 // support for style in any attribute that ends with style
68 //final int i = lexer.getTokenEnd() - "style".length();
69 //final char ch = i > lexer.getTokenStart() ? buffer[i]:firstCh;
71 if (seenScript && !seenTag) {
72 seenContentType = false;
74 if (((firstCh == 'l' || firstCh == 't') || (caseInsensitive && (firstCh == 'L' || firstCh == 'T')))) {
75 @NonNls String name = TreeUtil.getTokenText(lexer);
76 if (caseInsensitive) name = name.toLowerCase();
78 if ("language".equals(name) || "type".equals(name)) {
79 seenContentType = true;
83 return;
86 if ( /*ch !='s' &&*/
87 firstCh !='o' && firstCh !='s' &&
88 (!caseInsensitive || (/*ch !='S' &&*/ firstCh !='S' && firstCh !='O') )
89 ) {
90 return; // optimization
93 String name = TreeUtil.getTokenText(lexer);
94 if (caseInsensitive) name = name.toLowerCase();
96 final boolean style = name.equals(TOKEN_STYLE); //name.endsWith("style");
97 final int state = getState() & BASE_STATE_MASK;
98 final boolean script = name.equals(TOKEN_SCRIPT) ||
99 ((name.startsWith(TOKEN_ON) && name.indexOf(':') == -1 && !isHtmlTagState(state)));
101 if (style || script) {
102 // encountered tag name in end of tag
103 if (seenTag) {
104 seenTag = false;
105 return;
108 seenStyle = style;
109 seenScript = script;
111 if (!isHtmlTagState(state)) {
112 seenAttribute=true;
118 class XmlAttributeValueEndHandler implements TokenHandler {
119 public void handleElement(Lexer lexer) {
120 if (seenAttribute) {
121 seenStyle = false;
122 seenScript = false;
123 seenAttribute = false;
125 seenContentType = false;
129 class XmlAttributeValueHandler implements TokenHandler {
130 public void handleElement(Lexer lexer) {
131 if (seenContentType) {
132 if(!seenScript || seenAttribute) {
133 return; // something invalid
136 @NonNls String name = TreeUtil.getTokenText(lexer);
137 if (caseInsensitive) name = name.toLowerCase();
138 if (name.indexOf("javascript") == -1 && name.indexOf("jscript") == -1) {
139 seenScript = false;
140 seenTag = true; // will be switched of on tag name in end
146 class XmlTagClosedHandler implements TokenHandler {
147 public void handleElement(Lexer lexer) {
148 if (seenAttribute) {
149 seenScript=false;
150 seenStyle=false;
152 seenAttribute=false;
153 } else {
154 if (seenStyle || seenScript) {
155 seenTag=true;
161 class XmlTagEndHandler implements TokenHandler {
162 public void handleElement(Lexer lexer) {
163 seenStyle=false;
164 seenScript=false;
165 seenAttribute=false;
166 seenContentType=false;
170 private final HashMap<IElementType,TokenHandler> tokenHandlers = new HashMap<IElementType, TokenHandler>();
172 protected BaseHtmlLexer(Lexer _baseLexer, boolean _caseInsensitive) {
173 super(_baseLexer);
174 caseInsensitive = _caseInsensitive;
176 XmlNameHandler value = new XmlNameHandler();
177 tokenHandlers.put(XmlTokenType.XML_NAME,value);
178 tokenHandlers.put(XmlTokenType.XML_TAG_NAME,value);
179 tokenHandlers.put(XmlTokenType.XML_TAG_END,new XmlTagClosedHandler());
180 tokenHandlers.put(XmlTokenType.XML_END_TAG_START,new XmlTagEndHandler());
181 tokenHandlers.put(XmlTokenType.XML_EMPTY_ELEMENT_END,new XmlTagEndHandler());
182 tokenHandlers.put(XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER,new XmlAttributeValueEndHandler());
183 tokenHandlers.put(XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN,new XmlAttributeValueHandler());
186 protected void registerHandler(IElementType elementType, TokenHandler value) {
187 final TokenHandler tokenHandler = tokenHandlers.get(elementType);
189 if (tokenHandler != null) {
190 final TokenHandler newHandler = value;
191 value = new TokenHandler() {
192 public void handleElement(final Lexer lexer) {
193 tokenHandler.handleElement(lexer);
194 newHandler.handleElement(lexer);
199 tokenHandlers.put(elementType,value);
202 public void start(final CharSequence buffer, final int startOffset, final int endOffset, final int initialState) {
203 initState(initialState);
204 super.start(buffer, startOffset, endOffset, initialState & BASE_STATE_MASK);
207 private void initState(final int initialState) {
208 seenScript = (initialState & SEEN_SCRIPT)!=0;
209 seenStyle = (initialState & SEEN_STYLE)!=0;
210 seenTag = (initialState & SEEN_TAG)!=0;
211 seenAttribute = (initialState & SEEN_ATTRIBUTE)!=0;
212 seenContentType = (initialState & SEEN_CONTENT_TYPE) != 0;
215 protected int skipToTheEndOfTheEmbeddment() {
216 Lexer base = getDelegate();
217 int tokenEnd = base.getTokenEnd();
218 int lastState = 0;
219 int lastStart = 0;
221 final CharSequence buf = base.getBufferSequence();
222 final char[] bufArray = CharArrayUtil.fromSequenceWithoutCopying(buf);
224 if (seenTag) {
225 FoundEnd:
226 while(true) {
227 FoundEndOfTag:
228 while(base.getTokenType() != XmlTokenType.XML_END_TAG_START) {
229 if (base.getTokenType() == XmlTokenType.XML_COMMENT_CHARACTERS) {
230 // we should terminate on first occurence of </
231 final int end = base.getTokenEnd();
233 for(int i = base.getTokenStart(); i < end; ++i) {
234 if ((bufArray != null ? bufArray[i ]:buf.charAt(i)) == '<' &&
235 i + 1 < end &&
236 (bufArray != null ? bufArray[i+1]:buf.charAt(i+1)) == '/') {
237 tokenEnd = i;
238 lastStart = i - 1;
239 lastState = 0;
241 break FoundEndOfTag;
246 lastState = base.getState();
247 tokenEnd = base.getTokenEnd();
248 lastStart = base.getTokenStart();
249 if (tokenEnd == getBufferEnd()) break FoundEnd;
250 base.advance();
253 // check if next is script
254 if (base.getTokenType() != XmlTokenType.XML_END_TAG_START) { // we are inside comment
255 base.start(buf,lastStart+1,getBufferEnd(),lastState);
256 base.getTokenType();
257 base.advance();
258 } else {
259 base.advance();
262 while(XmlTokenType.WHITESPACES.contains(base.getTokenType())) {
263 base.advance();
266 if (base.getTokenType() == XmlTokenType.XML_NAME) {
267 String name = TreeUtil.getTokenText(base);
268 if (caseInsensitive) name = name.toLowerCase();
270 if((hasSeenScript() && XmlNameHandler.TOKEN_SCRIPT.equals(name)) ||
271 (hasSeenStyle() && XmlNameHandler.TOKEN_STYLE.equals(name)) ||
272 CompletionUtil.DUMMY_IDENTIFIER_TRIMMED.equalsIgnoreCase(name)) {
273 break; // really found end
278 base.start(buf,lastStart,getBufferEnd(),lastState);
279 base.getTokenType();
280 } else if (seenAttribute) {
281 while(true) {
282 if (!isValidAttributeValueTokenType(base.getTokenType())) break;
284 tokenEnd = base.getTokenEnd();
285 lastState = base.getState();
286 lastStart = base.getTokenStart();
288 if (tokenEnd == getBufferEnd()) break;
289 base.advance();
292 base.start(buf,lastStart,getBufferEnd(),lastState);
293 base.getTokenType();
295 return tokenEnd;
298 protected boolean isValidAttributeValueTokenType(final IElementType tokenType) {
299 return tokenType == XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN ||
300 tokenType == XmlTokenType.XML_ENTITY_REF_TOKEN ||
301 tokenType == XmlTokenType.XML_CHAR_ENTITY_REF;
304 public void advance() {
305 super.advance();
306 IElementType type = getDelegate().getTokenType();
307 TokenHandler tokenHandler = tokenHandlers.get(type);
308 if (tokenHandler!=null) tokenHandler.handleElement(this);
312 public int getState() {
313 int state = super.getState();
315 state |= ((seenScript)?SEEN_SCRIPT:0);
316 state |= ((seenTag)?SEEN_TAG:0);
317 state |= ((seenStyle)?SEEN_STYLE:0);
318 state |= ((seenAttribute)?SEEN_ATTRIBUTE:0);
319 state |= ((seenContentType)?SEEN_CONTENT_TYPE:0);
321 return state;
324 protected final boolean hasSeenStyle() {
325 return seenStyle;
328 protected final boolean hasSeenAttribute() {
329 return seenAttribute;
332 protected final boolean hasSeenTag() {
333 return seenTag;
336 protected boolean hasSeenScript() {
337 return seenScript;
340 protected abstract boolean isHtmlTagState(int state);