2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package com
.intellij
.lexer
;
18 import com
.intellij
.codeInsight
.completion
.CompletionUtil
;
19 import com
.intellij
.psi
.impl
.source
.tree
.TreeUtil
;
20 import com
.intellij
.psi
.tree
.IElementType
;
21 import com
.intellij
.psi
.tree
.TokenSet
;
22 import com
.intellij
.psi
.xml
.XmlTokenType
;
23 import com
.intellij
.util
.text
.CharArrayUtil
;
24 import org
.jetbrains
.annotations
.NonNls
;
26 import java
.util
.HashMap
;
29 * @author Maxim.Mossienko
31 abstract class BaseHtmlLexer
extends DelegateLexer
{
32 protected static final int BASE_STATE_MASK
= 0x3F;
33 private static final int SEEN_STYLE
= 0x40;
34 private static final int SEEN_TAG
= 0x80;
35 private static final int SEEN_SCRIPT
= 0x100;
36 private static final int SEEN_ATTRIBUTE
= 0x200;
37 private static final int SEEN_CONTENT_TYPE
= 0x400;
38 protected static final int BASE_STATE_SHIFT
= 11;
40 private boolean seenTag
;
41 private boolean seenAttribute
;
42 private boolean seenStyle
;
43 private boolean seenScript
;
44 private final boolean caseInsensitive
;
45 private boolean seenContentType
;
47 static final TokenSet TOKENS_TO_MERGE
= TokenSet
.create(XmlTokenType
.XML_COMMENT_CHARACTERS
, XmlTokenType
.XML_WHITE_SPACE
, XmlTokenType
.XML_REAL_WHITE_SPACE
,
48 XmlTokenType
.XML_ATTRIBUTE_VALUE_TOKEN
, XmlTokenType
.XML_DATA_CHARACTERS
,
49 XmlTokenType
.XML_TAG_CHARACTERS
);
51 public interface TokenHandler
{
52 void handleElement(Lexer lexer
);
55 public class XmlNameHandler
implements TokenHandler
{
56 @NonNls private static final String TOKEN_SCRIPT
= "script";
57 @NonNls private static final String TOKEN_STYLE
= "style";
58 @NonNls private static final String TOKEN_ON
= "on";
60 public void handleElement(Lexer lexer
) {
61 final CharSequence buffer
= lexer
.getBufferSequence();
62 final char firstCh
= buffer
.charAt(lexer
.getTokenStart());
63 // support for style in any attribute that ends with style
64 //final int i = lexer.getTokenEnd() - "style".length();
65 //final char ch = i > lexer.getTokenStart() ? buffer[i]:firstCh;
67 if (seenScript
&& !seenTag
) {
68 seenContentType
= false;
70 if (((firstCh
== 'l' || firstCh
== 't') || (caseInsensitive
&& (firstCh
== 'L' || firstCh
== 'T')))) {
71 @NonNls String name
= TreeUtil
.getTokenText(lexer
);
72 if (caseInsensitive
) name
= name
.toLowerCase();
74 if ("language".equals(name
) || "type".equals(name
)) {
75 seenContentType
= true;
83 firstCh
!='o' && firstCh
!='s' &&
84 (!caseInsensitive
|| (/*ch !='S' &&*/ firstCh
!='S' && firstCh
!='O') )
86 return; // optimization
89 String name
= TreeUtil
.getTokenText(lexer
);
90 if (caseInsensitive
) name
= name
.toLowerCase();
92 final boolean style
= name
.equals(TOKEN_STYLE
); //name.endsWith("style");
93 final int state
= getState() & BASE_STATE_MASK
;
94 final boolean script
= name
.equals(TOKEN_SCRIPT
) ||
95 ((name
.startsWith(TOKEN_ON
) && name
.indexOf(':') == -1 && !isHtmlTagState(state
)));
97 if (style
|| script
) {
98 // encountered tag name in end of tag
107 if (!isHtmlTagState(state
)) {
114 class XmlAttributeValueEndHandler
implements TokenHandler
{
115 public void handleElement(Lexer lexer
) {
119 seenAttribute
= false;
121 seenContentType
= false;
125 class XmlAttributeValueHandler
implements TokenHandler
{
126 public void handleElement(Lexer lexer
) {
127 if (seenContentType
) {
128 if(!seenScript
|| seenAttribute
) {
129 return; // something invalid
132 @NonNls String name
= TreeUtil
.getTokenText(lexer
);
133 if (caseInsensitive
) name
= name
.toLowerCase();
134 if (name
.indexOf("javascript") == -1 && name
.indexOf("jscript") == -1) {
136 seenTag
= true; // will be switched of on tag name in end
142 class XmlTagClosedHandler
implements TokenHandler
{
143 public void handleElement(Lexer lexer
) {
150 if (seenStyle
|| seenScript
) {
157 class XmlTagEndHandler
implements TokenHandler
{
158 public void handleElement(Lexer lexer
) {
162 seenContentType
=false;
166 private final HashMap
<IElementType
,TokenHandler
> tokenHandlers
= new HashMap
<IElementType
, TokenHandler
>();
168 protected BaseHtmlLexer(Lexer _baseLexer
, boolean _caseInsensitive
) {
170 caseInsensitive
= _caseInsensitive
;
172 XmlNameHandler value
= new XmlNameHandler();
173 tokenHandlers
.put(XmlTokenType
.XML_NAME
,value
);
174 tokenHandlers
.put(XmlTokenType
.XML_TAG_NAME
,value
);
175 tokenHandlers
.put(XmlTokenType
.XML_TAG_END
,new XmlTagClosedHandler());
176 tokenHandlers
.put(XmlTokenType
.XML_END_TAG_START
,new XmlTagEndHandler());
177 tokenHandlers
.put(XmlTokenType
.XML_EMPTY_ELEMENT_END
,new XmlTagEndHandler());
178 tokenHandlers
.put(XmlTokenType
.XML_ATTRIBUTE_VALUE_END_DELIMITER
,new XmlAttributeValueEndHandler());
179 tokenHandlers
.put(XmlTokenType
.XML_ATTRIBUTE_VALUE_TOKEN
,new XmlAttributeValueHandler());
182 protected void registerHandler(IElementType elementType
, TokenHandler value
) {
183 final TokenHandler tokenHandler
= tokenHandlers
.get(elementType
);
185 if (tokenHandler
!= null) {
186 final TokenHandler newHandler
= value
;
187 value
= new TokenHandler() {
188 public void handleElement(final Lexer lexer
) {
189 tokenHandler
.handleElement(lexer
);
190 newHandler
.handleElement(lexer
);
195 tokenHandlers
.put(elementType
,value
);
198 public void start(final CharSequence buffer
, final int startOffset
, final int endOffset
, final int initialState
) {
199 initState(initialState
);
200 super.start(buffer
, startOffset
, endOffset
, initialState
& BASE_STATE_MASK
);
203 private void initState(final int initialState
) {
204 seenScript
= (initialState
& SEEN_SCRIPT
)!=0;
205 seenStyle
= (initialState
& SEEN_STYLE
)!=0;
206 seenTag
= (initialState
& SEEN_TAG
)!=0;
207 seenAttribute
= (initialState
& SEEN_ATTRIBUTE
)!=0;
208 seenContentType
= (initialState
& SEEN_CONTENT_TYPE
) != 0;
211 protected int skipToTheEndOfTheEmbeddment() {
212 Lexer base
= getDelegate();
213 int tokenEnd
= base
.getTokenEnd();
217 final CharSequence buf
= base
.getBufferSequence();
218 final char[] bufArray
= CharArrayUtil
.fromSequenceWithoutCopying(buf
);
224 while(base
.getTokenType() != XmlTokenType
.XML_END_TAG_START
) {
225 if (base
.getTokenType() == XmlTokenType
.XML_COMMENT_CHARACTERS
) {
226 // we should terminate on first occurence of </
227 final int end
= base
.getTokenEnd();
229 for(int i
= base
.getTokenStart(); i
< end
; ++i
) {
230 if ((bufArray
!= null ? bufArray
[i
]:buf
.charAt(i
)) == '<' &&
232 (bufArray
!= null ? bufArray
[i
+1]:buf
.charAt(i
+1)) == '/') {
242 lastState
= base
.getState();
243 tokenEnd
= base
.getTokenEnd();
244 lastStart
= base
.getTokenStart();
245 if (tokenEnd
== getBufferEnd()) break FoundEnd
;
249 // check if next is script
250 if (base
.getTokenType() != XmlTokenType
.XML_END_TAG_START
) { // we are inside comment
251 base
.start(buf
,lastStart
+1,getBufferEnd(),lastState
);
258 while(XmlTokenType
.WHITESPACES
.contains(base
.getTokenType())) {
262 if (base
.getTokenType() == XmlTokenType
.XML_NAME
) {
263 String name
= TreeUtil
.getTokenText(base
);
264 if (caseInsensitive
) name
= name
.toLowerCase();
266 if((hasSeenScript() && XmlNameHandler
.TOKEN_SCRIPT
.equals(name
)) ||
267 (hasSeenStyle() && XmlNameHandler
.TOKEN_STYLE
.equals(name
)) ||
268 CompletionUtil
.DUMMY_IDENTIFIER_TRIMMED
.equalsIgnoreCase(name
)) {
269 break; // really found end
274 base
.start(buf
,lastStart
,getBufferEnd(),lastState
);
276 } else if (seenAttribute
) {
278 if (!isValidAttributeValueTokenType(base
.getTokenType())) break;
280 tokenEnd
= base
.getTokenEnd();
281 lastState
= base
.getState();
282 lastStart
= base
.getTokenStart();
284 if (tokenEnd
== getBufferEnd()) break;
288 base
.start(buf
,lastStart
,getBufferEnd(),lastState
);
294 protected boolean isValidAttributeValueTokenType(final IElementType tokenType
) {
295 return tokenType
== XmlTokenType
.XML_ATTRIBUTE_VALUE_TOKEN
||
296 tokenType
== XmlTokenType
.XML_ENTITY_REF_TOKEN
||
297 tokenType
== XmlTokenType
.XML_CHAR_ENTITY_REF
;
300 public void advance() {
302 IElementType type
= getDelegate().getTokenType();
303 TokenHandler tokenHandler
= tokenHandlers
.get(type
);
304 if (tokenHandler
!=null) tokenHandler
.handleElement(this);
308 public int getState() {
309 int state
= super.getState();
311 state
|= ((seenScript
)?SEEN_SCRIPT
:0);
312 state
|= ((seenTag
)?SEEN_TAG
:0);
313 state
|= ((seenStyle
)?SEEN_STYLE
:0);
314 state
|= ((seenAttribute
)?SEEN_ATTRIBUTE
:0);
315 state
|= ((seenContentType
)?SEEN_CONTENT_TYPE
:0);
320 protected final boolean hasSeenStyle() {
324 protected final boolean hasSeenAttribute() {
325 return seenAttribute
;
328 protected final boolean hasSeenTag() {
332 protected boolean hasSeenScript() {
336 protected abstract boolean isHtmlTagState(int state
);