xml/impl/src/com/intellij/lexer/BaseHtmlLexer.java

   1 /*
   2  * Copyright 2000-2009 JetBrains s.r.o.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 package com.intellij.lexer;
  17
  18 import com.intellij.codeInsight.completion.CompletionUtil;
  19 import com.intellij.psi.impl.source.tree.TreeUtil;
  20 import com.intellij.psi.tree.IElementType;
  21 import com.intellij.psi.tree.TokenSet;
  22 import com.intellij.psi.xml.XmlTokenType;
  23 import com.intellij.util.text.CharArrayUtil;
  24 import org.jetbrains.annotations.NonNls;
  25
  26 import java.util.HashMap;
  27
  28 /**
  29  * Created by IntelliJ IDEA.
  30  * User: Maxim.Mossienko
  31  * Date: Oct 7, 2004
  32  * Time: 2:29:06 PM
  33  * To change this template use File | Settings | File Templates.
  34  */
  35 abstract class BaseHtmlLexer extends DelegateLexer {
  36   protected static final int BASE_STATE_MASK = 0x3F;
  37   private static final int SEEN_STYLE = 0x40;
  38   private static final int SEEN_TAG = 0x80;
  39   private static final int SEEN_SCRIPT = 0x100;
  40   private static final int SEEN_ATTRIBUTE = 0x200;
  41   private static final int SEEN_CONTENT_TYPE = 0x400;
  42   protected static final int BASE_STATE_SHIFT = 11;
  43
  44   private boolean seenTag;
  45   private boolean seenAttribute;
  46   private boolean seenStyle;
  47   private boolean seenScript;
  48   private final boolean caseInsensitive;
  49   private boolean seenContentType;
  50
  51   static final TokenSet TOKENS_TO_MERGE = TokenSet.create(XmlTokenType.XML_COMMENT_CHARACTERS, XmlTokenType.XML_WHITE_SPACE, XmlTokenType.XML_REAL_WHITE_SPACE,
  52                                                           XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN, XmlTokenType.XML_DATA_CHARACTERS,
  53                                                           XmlTokenType.XML_TAG_CHARACTERS);
  54
  55   public interface TokenHandler {
  56     void handleElement(Lexer lexer);
  57   }
  58
  59   public class XmlNameHandler implements TokenHandler {
  60     @NonNls private static final String TOKEN_SCRIPT = "script";
  61     @NonNls private static final String TOKEN_STYLE = "style";
  62     @NonNls private static final String TOKEN_ON = "on";
  63
  64     public void handleElement(Lexer lexer) {
  65       final CharSequence buffer = lexer.getBufferSequence();
  66       final char firstCh = buffer.charAt(lexer.getTokenStart());
  67       // support for style in any attribute that ends with style
  68       //final int i = lexer.getTokenEnd() - "style".length();
  69       //final char ch = i > lexer.getTokenStart() ? buffer[i]:firstCh;
  70
  71       if (seenScript && !seenTag) {
  72         seenContentType = false;
  73
  74         if (((firstCh == 'l' || firstCh == 't') || (caseInsensitive && (firstCh == 'L' || firstCh == 'T')))) {
  75           @NonNls String name = TreeUtil.getTokenText(lexer);
  76           if (caseInsensitive) name = name.toLowerCase();
  77
  78           if ("language".equals(name) || "type".equals(name)) {
  79             seenContentType = true;
  80           }
  81         }
  82
  83         return;
  84       }
  85
  86       if ( /*ch !='s' &&*/
  87           firstCh !='o' && firstCh !='s' &&
  88           (!caseInsensitive || (/*ch !='S' &&*/ firstCh !='S' && firstCh !='O') )
  89           ) {
  90         return; // optimization
  91       }
  92
  93       String name = TreeUtil.getTokenText(lexer);
  94       if (caseInsensitive) name = name.toLowerCase();
  95
  96       final boolean style = name.equals(TOKEN_STYLE); //name.endsWith("style");
  97       final int state = getState() & BASE_STATE_MASK;
  98       final boolean script = name.equals(TOKEN_SCRIPT) ||
  99                        ((name.startsWith(TOKEN_ON) && name.indexOf(':') == -1 && !isHtmlTagState(state)));
 100
 101       if (style || script) {
 102         // encountered tag name in end of tag
 103         if (seenTag) {
 104           seenTag = false;
 105           return;
 106         }
 107
 108         seenStyle = style;
 109         seenScript = script;
 110
 111         if (!isHtmlTagState(state)) {
 112           seenAttribute=true;
 113         }
 114       }
 115     }
 116   }
 117
 118   class XmlAttributeValueEndHandler implements TokenHandler {
 119     public void handleElement(Lexer lexer) {
 120       if (seenAttribute) {
 121         seenStyle = false;
 122         seenScript = false;
 123         seenAttribute = false;
 124       }
 125       seenContentType = false;
 126     }
 127   }
 128
 129   class XmlAttributeValueHandler implements TokenHandler {
 130     public void handleElement(Lexer lexer) {
 131       if (seenContentType) {
 132         if(!seenScript || seenAttribute) {
 133           return; // something invalid
 134         }
 135
 136         @NonNls String name = TreeUtil.getTokenText(lexer);
 137         if (caseInsensitive) name = name.toLowerCase();
 138         if (name.indexOf("javascript") == -1 && name.indexOf("jscript") == -1) {
 139           seenScript = false;
 140           seenTag = true;    // will be switched of on tag name in end
 141         }
 142       }
 143     }
 144   }
 145
 146   class XmlTagClosedHandler implements TokenHandler {
 147     public void handleElement(Lexer lexer) {
 148       if (seenAttribute) {
 149         seenScript=false;
 150         seenStyle=false;
 151
 152         seenAttribute=false;
 153       } else {
 154         if (seenStyle || seenScript) {
 155           seenTag=true;
 156         }
 157       }
 158     }
 159   }
 160
 161   class XmlTagEndHandler implements TokenHandler {
 162     public void handleElement(Lexer lexer) {
 163       seenStyle=false;
 164       seenScript=false;
 165       seenAttribute=false;
 166       seenContentType=false;
 167     }
 168   }
 169
 170   private final HashMap<IElementType,TokenHandler> tokenHandlers = new HashMap<IElementType, TokenHandler>();
 171
 172   protected BaseHtmlLexer(Lexer _baseLexer, boolean _caseInsensitive)  {
 173     super(_baseLexer);
 174     caseInsensitive = _caseInsensitive;
 175
 176     XmlNameHandler value = new XmlNameHandler();
 177     tokenHandlers.put(XmlTokenType.XML_NAME,value);
 178     tokenHandlers.put(XmlTokenType.XML_TAG_NAME,value);
 179     tokenHandlers.put(XmlTokenType.XML_TAG_END,new XmlTagClosedHandler());
 180     tokenHandlers.put(XmlTokenType.XML_END_TAG_START,new XmlTagEndHandler());
 181     tokenHandlers.put(XmlTokenType.XML_EMPTY_ELEMENT_END,new XmlTagEndHandler());
 182     tokenHandlers.put(XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER,new XmlAttributeValueEndHandler());
 183     tokenHandlers.put(XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN,new XmlAttributeValueHandler());
 184   }
 185
 186   protected void registerHandler(IElementType elementType, TokenHandler value) {
 187     final TokenHandler tokenHandler = tokenHandlers.get(elementType);
 188
 189     if (tokenHandler != null) {
 190       final TokenHandler newHandler = value;
 191       value = new TokenHandler() {
 192         public void handleElement(final Lexer lexer) {
 193           tokenHandler.handleElement(lexer);
 194           newHandler.handleElement(lexer);
 195         }
 196       };
 197     }
 198
 199     tokenHandlers.put(elementType,value);
 200   }
 201
 202   public void start(final CharSequence buffer, final int startOffset, final int endOffset, final int initialState) {
 203     initState(initialState);
 204     super.start(buffer, startOffset, endOffset, initialState & BASE_STATE_MASK);
 205   }
 206
 207   private void initState(final int initialState) {
 208     seenScript = (initialState & SEEN_SCRIPT)!=0;
 209     seenStyle = (initialState & SEEN_STYLE)!=0;
 210     seenTag = (initialState & SEEN_TAG)!=0;
 211     seenAttribute = (initialState & SEEN_ATTRIBUTE)!=0;
 212     seenContentType = (initialState & SEEN_CONTENT_TYPE) != 0;
 213   }
 214
 215   protected int skipToTheEndOfTheEmbeddment() {
 216     Lexer base = getDelegate();
 217     int tokenEnd = base.getTokenEnd();
 218     int lastState = 0;
 219     int lastStart = 0;
 220
 221     final CharSequence buf = base.getBufferSequence();
 222     final char[] bufArray = CharArrayUtil.fromSequenceWithoutCopying(buf);
 223
 224     if (seenTag) {
 225       FoundEnd:
 226       while(true) {
 227         FoundEndOfTag:
 228         while(base.getTokenType() != XmlTokenType.XML_END_TAG_START) {
 229           if (base.getTokenType() == XmlTokenType.XML_COMMENT_CHARACTERS) {
 230             // we should terminate on first occurence of </
 231             final int end = base.getTokenEnd();
 232
 233             for(int i = base.getTokenStart(); i < end; ++i) {
 234               if ((bufArray != null ? bufArray[i ]:buf.charAt(i)) == '<' &&
 235                   i + 1 < end &&
 236                   (bufArray != null ? bufArray[i+1]:buf.charAt(i+1)) == '/') {
 237                 tokenEnd = i;
 238                 lastStart = i - 1;
 239                 lastState = 0;
 240
 241                 break FoundEndOfTag;
 242               }
 243             }
 244           }
 245
 246           lastState = base.getState();
 247           tokenEnd = base.getTokenEnd();
 248           lastStart = base.getTokenStart();
 249           if (tokenEnd == getBufferEnd()) break FoundEnd;
 250           base.advance();
 251         }
 252
 253         // check if next is script
 254         if (base.getTokenType() != XmlTokenType.XML_END_TAG_START) { // we are inside comment
 255           base.start(buf,lastStart+1,getBufferEnd(),lastState);
 256           base.getTokenType();
 257           base.advance();
 258         } else {
 259           base.advance();
 260         }
 261
 262         while(XmlTokenType.WHITESPACES.contains(base.getTokenType())) {
 263           base.advance();
 264         }
 265
 266         if (base.getTokenType() == XmlTokenType.XML_NAME) {
 267           String name = TreeUtil.getTokenText(base);
 268           if (caseInsensitive) name = name.toLowerCase();
 269
 270           if((hasSeenScript() && XmlNameHandler.TOKEN_SCRIPT.equals(name)) ||
 271              (hasSeenStyle() && XmlNameHandler.TOKEN_STYLE.equals(name)) ||
 272              CompletionUtil.DUMMY_IDENTIFIER_TRIMMED.equalsIgnoreCase(name)) {
 273             break; // really found end
 274           }
 275         }
 276       }
 277
 278       base.start(buf,lastStart,getBufferEnd(),lastState);
 279       base.getTokenType();
 280     } else if (seenAttribute) {
 281       while(true) {
 282         if (!isValidAttributeValueTokenType(base.getTokenType())) break;
 283
 284         tokenEnd = base.getTokenEnd();
 285         lastState = base.getState();
 286         lastStart = base.getTokenStart();
 287
 288         if (tokenEnd == getBufferEnd()) break;
 289         base.advance();
 290       }
 291
 292       base.start(buf,lastStart,getBufferEnd(),lastState);
 293       base.getTokenType();
 294     }
 295     return tokenEnd;
 296   }
 297
 298   protected boolean isValidAttributeValueTokenType(final IElementType tokenType) {
 299     return tokenType == XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN ||
 300            tokenType == XmlTokenType.XML_ENTITY_REF_TOKEN ||
 301             tokenType == XmlTokenType.XML_CHAR_ENTITY_REF;
 302   }
 303
 304   public void advance() {
 305     super.advance();
 306     IElementType type = getDelegate().getTokenType();
 307     TokenHandler tokenHandler = tokenHandlers.get(type);
 308     if (tokenHandler!=null) tokenHandler.handleElement(this);
 309   }
 310
 311
 312   public int getState() {
 313     int state = super.getState();
 314
 315     state |= ((seenScript)?SEEN_SCRIPT:0);
 316     state |= ((seenTag)?SEEN_TAG:0);
 317     state |= ((seenStyle)?SEEN_STYLE:0);
 318     state |= ((seenAttribute)?SEEN_ATTRIBUTE:0);
 319     state |= ((seenContentType)?SEEN_CONTENT_TYPE:0);
 320
 321     return state;
 322   }
 323
 324   protected final boolean hasSeenStyle() {
 325     return seenStyle;
 326   }
 327
 328   protected final boolean hasSeenAttribute() {
 329     return seenAttribute;
 330   }
 331
 332   protected final boolean hasSeenTag() {
 333     return seenTag;
 334   }
 335
 336   protected boolean hasSeenScript() {
 337     return seenScript;
 338   }
 339
 340   protected abstract boolean isHtmlTagState(int state);
 341 }