xml/impl/src/com/intellij/lexer/BaseHtmlLexer.java

   1 /*
   2  * Copyright 2000-2009 JetBrains s.r.o.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 package com.intellij.lexer;
  17
  18 import com.intellij.codeInsight.completion.CompletionUtil;
  19 import com.intellij.psi.impl.source.tree.TreeUtil;
  20 import com.intellij.psi.tree.IElementType;
  21 import com.intellij.psi.tree.TokenSet;
  22 import com.intellij.psi.xml.XmlTokenType;
  23 import com.intellij.util.text.CharArrayUtil;
  24 import org.jetbrains.annotations.NonNls;
  25
  26 import java.util.HashMap;
  27
  28 /**
  29  * @author Maxim.Mossienko
  30  */
  31 abstract class BaseHtmlLexer extends DelegateLexer {
  32   protected static final int BASE_STATE_MASK = 0x3F;
  33   private static final int SEEN_STYLE = 0x40;
  34   private static final int SEEN_TAG = 0x80;
  35   private static final int SEEN_SCRIPT = 0x100;
  36   private static final int SEEN_ATTRIBUTE = 0x200;
  37   private static final int SEEN_CONTENT_TYPE = 0x400;
  38   protected static final int BASE_STATE_SHIFT = 11;
  39
  40   private boolean seenTag;
  41   private boolean seenAttribute;
  42   private boolean seenStyle;
  43   private boolean seenScript;
  44   private final boolean caseInsensitive;
  45   private boolean seenContentType;
  46
  47   static final TokenSet TOKENS_TO_MERGE = TokenSet.create(XmlTokenType.XML_COMMENT_CHARACTERS, XmlTokenType.XML_WHITE_SPACE, XmlTokenType.XML_REAL_WHITE_SPACE,
  48                                                           XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN, XmlTokenType.XML_DATA_CHARACTERS,
  49                                                           XmlTokenType.XML_TAG_CHARACTERS);
  50
  51   public interface TokenHandler {
  52     void handleElement(Lexer lexer);
  53   }
  54
  55   public class XmlNameHandler implements TokenHandler {
  56     @NonNls private static final String TOKEN_SCRIPT = "script";
  57     @NonNls private static final String TOKEN_STYLE = "style";
  58     @NonNls private static final String TOKEN_ON = "on";
  59
  60     public void handleElement(Lexer lexer) {
  61       final CharSequence buffer = lexer.getBufferSequence();
  62       final char firstCh = buffer.charAt(lexer.getTokenStart());
  63       // support for style in any attribute that ends with style
  64       //final int i = lexer.getTokenEnd() - "style".length();
  65       //final char ch = i > lexer.getTokenStart() ? buffer[i]:firstCh;
  66
  67       if (seenScript && !seenTag) {
  68         seenContentType = false;
  69
  70         if (((firstCh == 'l' || firstCh == 't') || (caseInsensitive && (firstCh == 'L' || firstCh == 'T')))) {
  71           @NonNls String name = TreeUtil.getTokenText(lexer);
  72           if (caseInsensitive) name = name.toLowerCase();
  73
  74           if ("language".equals(name) || "type".equals(name)) {
  75             seenContentType = true;
  76           }
  77         }
  78
  79         return;
  80       }
  81
  82       if ( /*ch !='s' &&*/
  83           firstCh !='o' && firstCh !='s' &&
  84           (!caseInsensitive || (/*ch !='S' &&*/ firstCh !='S' && firstCh !='O') )
  85           ) {
  86         return; // optimization
  87       }
  88
  89       String name = TreeUtil.getTokenText(lexer);
  90       if (caseInsensitive) name = name.toLowerCase();
  91
  92       final boolean style = name.equals(TOKEN_STYLE); //name.endsWith("style");
  93       final int state = getState() & BASE_STATE_MASK;
  94       final boolean script = name.equals(TOKEN_SCRIPT) ||
  95                        ((name.startsWith(TOKEN_ON) && name.indexOf(':') == -1 && !isHtmlTagState(state)));
  96
  97       if (style || script) {
  98         // encountered tag name in end of tag
  99         if (seenTag) {
 100           seenTag = false;
 101           return;
 102         }
 103
 104         seenStyle = style;
 105         seenScript = script;
 106
 107         if (!isHtmlTagState(state)) {
 108           seenAttribute=true;
 109         }
 110       }
 111     }
 112   }
 113
 114   class XmlAttributeValueEndHandler implements TokenHandler {
 115     public void handleElement(Lexer lexer) {
 116       if (seenAttribute) {
 117         seenStyle = false;
 118         seenScript = false;
 119         seenAttribute = false;
 120       }
 121       seenContentType = false;
 122     }
 123   }
 124
 125   class XmlAttributeValueHandler implements TokenHandler {
 126     public void handleElement(Lexer lexer) {
 127       if (seenContentType) {
 128         if(!seenScript || seenAttribute) {
 129           return; // something invalid
 130         }
 131
 132         @NonNls String name = TreeUtil.getTokenText(lexer);
 133         if (caseInsensitive) name = name.toLowerCase();
 134         if (name.indexOf("javascript") == -1 && name.indexOf("jscript") == -1) {
 135           seenScript = false;
 136           seenTag = true;    // will be switched of on tag name in end
 137         }
 138       }
 139     }
 140   }
 141
 142   class XmlTagClosedHandler implements TokenHandler {
 143     public void handleElement(Lexer lexer) {
 144       if (seenAttribute) {
 145         seenScript=false;
 146         seenStyle=false;
 147
 148         seenAttribute=false;
 149       } else {
 150         if (seenStyle || seenScript) {
 151           seenTag=true;
 152         }
 153       }
 154     }
 155   }
 156
 157   class XmlTagEndHandler implements TokenHandler {
 158     public void handleElement(Lexer lexer) {
 159       seenStyle=false;
 160       seenScript=false;
 161       seenAttribute=false;
 162       seenContentType=false;
 163     }
 164   }
 165
 166   private final HashMap<IElementType,TokenHandler> tokenHandlers = new HashMap<IElementType, TokenHandler>();
 167
 168   protected BaseHtmlLexer(Lexer _baseLexer, boolean _caseInsensitive)  {
 169     super(_baseLexer);
 170     caseInsensitive = _caseInsensitive;
 171
 172     XmlNameHandler value = new XmlNameHandler();
 173     tokenHandlers.put(XmlTokenType.XML_NAME,value);
 174     tokenHandlers.put(XmlTokenType.XML_TAG_NAME,value);
 175     tokenHandlers.put(XmlTokenType.XML_TAG_END,new XmlTagClosedHandler());
 176     tokenHandlers.put(XmlTokenType.XML_END_TAG_START,new XmlTagEndHandler());
 177     tokenHandlers.put(XmlTokenType.XML_EMPTY_ELEMENT_END,new XmlTagEndHandler());
 178     tokenHandlers.put(XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER,new XmlAttributeValueEndHandler());
 179     tokenHandlers.put(XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN,new XmlAttributeValueHandler());
 180   }
 181
 182   protected void registerHandler(IElementType elementType, TokenHandler value) {
 183     final TokenHandler tokenHandler = tokenHandlers.get(elementType);
 184
 185     if (tokenHandler != null) {
 186       final TokenHandler newHandler = value;
 187       value = new TokenHandler() {
 188         public void handleElement(final Lexer lexer) {
 189           tokenHandler.handleElement(lexer);
 190           newHandler.handleElement(lexer);
 191         }
 192       };
 193     }
 194
 195     tokenHandlers.put(elementType,value);
 196   }
 197
 198   public void start(final CharSequence buffer, final int startOffset, final int endOffset, final int initialState) {
 199     initState(initialState);
 200     super.start(buffer, startOffset, endOffset, initialState & BASE_STATE_MASK);
 201   }
 202
 203   private void initState(final int initialState) {
 204     seenScript = (initialState & SEEN_SCRIPT)!=0;
 205     seenStyle = (initialState & SEEN_STYLE)!=0;
 206     seenTag = (initialState & SEEN_TAG)!=0;
 207     seenAttribute = (initialState & SEEN_ATTRIBUTE)!=0;
 208     seenContentType = (initialState & SEEN_CONTENT_TYPE) != 0;
 209   }
 210
 211   protected int skipToTheEndOfTheEmbeddment() {
 212     Lexer base = getDelegate();
 213     int tokenEnd = base.getTokenEnd();
 214     int lastState = 0;
 215     int lastStart = 0;
 216
 217     final CharSequence buf = base.getBufferSequence();
 218     final char[] bufArray = CharArrayUtil.fromSequenceWithoutCopying(buf);
 219
 220     if (seenTag) {
 221       FoundEnd:
 222       while(true) {
 223         FoundEndOfTag:
 224         while(base.getTokenType() != XmlTokenType.XML_END_TAG_START) {
 225           if (base.getTokenType() == XmlTokenType.XML_COMMENT_CHARACTERS) {
 226             // we should terminate on first occurence of </
 227             final int end = base.getTokenEnd();
 228
 229             for(int i = base.getTokenStart(); i < end; ++i) {
 230               if ((bufArray != null ? bufArray[i ]:buf.charAt(i)) == '<' &&
 231                   i + 1 < end &&
 232                   (bufArray != null ? bufArray[i+1]:buf.charAt(i+1)) == '/') {
 233                 tokenEnd = i;
 234                 lastStart = i - 1;
 235                 lastState = 0;
 236
 237                 break FoundEndOfTag;
 238               }
 239             }
 240           }
 241
 242           lastState = base.getState();
 243           tokenEnd = base.getTokenEnd();
 244           lastStart = base.getTokenStart();
 245           if (tokenEnd == getBufferEnd()) break FoundEnd;
 246           base.advance();
 247         }
 248
 249         // check if next is script
 250         if (base.getTokenType() != XmlTokenType.XML_END_TAG_START) { // we are inside comment
 251           base.start(buf,lastStart+1,getBufferEnd(),lastState);
 252           base.getTokenType();
 253           base.advance();
 254         } else {
 255           base.advance();
 256         }
 257
 258         while(XmlTokenType.WHITESPACES.contains(base.getTokenType())) {
 259           base.advance();
 260         }
 261
 262         if (base.getTokenType() == XmlTokenType.XML_NAME) {
 263           String name = TreeUtil.getTokenText(base);
 264           if (caseInsensitive) name = name.toLowerCase();
 265
 266           if((hasSeenScript() && XmlNameHandler.TOKEN_SCRIPT.equals(name)) ||
 267              (hasSeenStyle() && XmlNameHandler.TOKEN_STYLE.equals(name)) ||
 268              CompletionUtil.DUMMY_IDENTIFIER_TRIMMED.equalsIgnoreCase(name)) {
 269             break; // really found end
 270           }
 271         }
 272       }
 273
 274       base.start(buf,lastStart,getBufferEnd(),lastState);
 275       base.getTokenType();
 276     } else if (seenAttribute) {
 277       while(true) {
 278         if (!isValidAttributeValueTokenType(base.getTokenType())) break;
 279
 280         tokenEnd = base.getTokenEnd();
 281         lastState = base.getState();
 282         lastStart = base.getTokenStart();
 283
 284         if (tokenEnd == getBufferEnd()) break;
 285         base.advance();
 286       }
 287
 288       base.start(buf,lastStart,getBufferEnd(),lastState);
 289       base.getTokenType();
 290     }
 291     return tokenEnd;
 292   }
 293
 294   protected boolean isValidAttributeValueTokenType(final IElementType tokenType) {
 295     return tokenType == XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN ||
 296            tokenType == XmlTokenType.XML_ENTITY_REF_TOKEN ||
 297             tokenType == XmlTokenType.XML_CHAR_ENTITY_REF;
 298   }
 299
 300   public void advance() {
 301     super.advance();
 302     IElementType type = getDelegate().getTokenType();
 303     TokenHandler tokenHandler = tokenHandlers.get(type);
 304     if (tokenHandler!=null) tokenHandler.handleElement(this);
 305   }
 306
 307
 308   public int getState() {
 309     int state = super.getState();
 310
 311     state |= ((seenScript)?SEEN_SCRIPT:0);
 312     state |= ((seenTag)?SEEN_TAG:0);
 313     state |= ((seenStyle)?SEEN_STYLE:0);
 314     state |= ((seenAttribute)?SEEN_ATTRIBUTE:0);
 315     state |= ((seenContentType)?SEEN_CONTENT_TYPE:0);
 316
 317     return state;
 318   }
 319
 320   protected final boolean hasSeenStyle() {
 321     return seenStyle;
 322   }
 323
 324   protected final boolean hasSeenAttribute() {
 325     return seenAttribute;
 326   }
 327
 328   protected final boolean hasSeenTag() {
 329     return seenTag;
 330   }
 331
 332   protected boolean hasSeenScript() {
 333     return seenScript;
 334   }
 335
 336   protected abstract boolean isHtmlTagState(int state);
 337 }