2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 package com
.intellij
.lang
.html
;
22 import com
.intellij
.codeInsight
.completion
.CompletionUtil
;
23 import com
.intellij
.codeInsight
.daemon
.XmlErrorMessages
;
24 import com
.intellij
.lang
.PsiBuilder
;
25 import com
.intellij
.psi
.impl
.source
.codeStyle
.Helper
;
26 import com
.intellij
.psi
.tree
.CustomParsingType
;
27 import com
.intellij
.psi
.tree
.IElementType
;
28 import com
.intellij
.psi
.tree
.ILazyParseableElementType
;
29 import com
.intellij
.psi
.xml
.XmlElementType
;
30 import com
.intellij
.psi
.xml
.XmlTokenType
;
31 import com
.intellij
.xml
.util
.HtmlUtil
;
32 import org
.jetbrains
.annotations
.NonNls
;
33 import org
.jetbrains
.annotations
.NotNull
;
34 import org
.jetbrains
.annotations
.Nullable
;
36 import java
.util
.Stack
;
38 public class HtmlParsing
{
39 @NonNls private static final String TR_TAG
= "tr";
40 @NonNls private static final String TD_TAG
= "td";
41 @NonNls private static final String DD_TAG
= "dd";
42 @NonNls private static final String DT_TAG
= "dt";
43 @NonNls private static final String TABLE_TAG
= "table";
45 private final PsiBuilder myBuilder
;
46 private final Stack
<String
> myTagNamesStack
= new Stack
<String
>();
47 private final Stack
<PsiBuilder
.Marker
> myTagMarkersStack
= new Stack
<PsiBuilder
.Marker
>();
48 @NonNls private static final String COMPLETION_NAME
= CompletionUtil
.DUMMY_IDENTIFIER_TRIMMED
.toLowerCase();
50 public HtmlParsing(final PsiBuilder builder
) {
54 public void parseDocument() {
55 final PsiBuilder
.Marker document
= mark();
57 while (token() == XmlTokenType
.XML_COMMENT_START
) {
63 PsiBuilder
.Marker error
= null;
65 final IElementType tt
= token();
66 if (tt
== XmlTokenType
.XML_START_TAG_START
) {
67 error
= flushError(error
);
69 myTagMarkersStack
.clear();
70 myTagNamesStack
.clear();
72 else if (tt
== XmlTokenType
.XML_COMMENT_START
) {
73 error
= flushError(error
);
76 else if (tt
== XmlTokenType
.XML_PI_START
) {
77 error
= flushError(error
);
78 parseProcessingInstruction();
80 else if (tt
== XmlTokenType
.XML_REAL_WHITE_SPACE
|| tt
== XmlTokenType
.XML_CHAR_ENTITY_REF
|| tt
== XmlTokenType
.XML_DATA_CHARACTERS
) {
81 error
= flushError(error
);
85 if (error
== null) error
= mark();
91 error
.error(XmlErrorMessages
.message("top.level.element.is.not.completed"));
95 document
.done(XmlElementType
.HTML_DOCUMENT
);
99 private static PsiBuilder
.Marker
flushError(PsiBuilder
.Marker error
) {
101 error
.error(XmlErrorMessages
.message("xml.parsing.unexpected.tokens"));
107 private void parseDoctype() {
108 assert token() == XmlTokenType
.XML_DOCTYPE_START
: "Doctype start expected";
109 final PsiBuilder
.Marker doctype
= mark();
112 while (token() != XmlTokenType
.XML_DOCTYPE_END
&& !eof()) advance();
114 error(XmlErrorMessages
.message("xml.parsing.unexpected.end.of.file"));
120 doctype
.done(XmlElementType
.XML_DOCTYPE
);
123 private static boolean ddordt(String name
) {
124 return DT_TAG
.equals(name
) || DD_TAG
.equals(name
);
127 private boolean parseTag(String parentName
) {
128 assert token() == XmlTokenType
.XML_START_TAG_START
: "Tag start expected";
129 final PsiBuilder
.Marker tag
= mark();
130 myTagMarkersStack
.push(tag
);
134 final String originalTagName
;
135 if (token() != XmlTokenType
.XML_NAME
) {
136 error(XmlErrorMessages
.message("xml.parsing.tag.name.expected"));
137 originalTagName
= "";
140 originalTagName
= myBuilder
.getTokenText();
144 String tagName
= originalTagName
.toLowerCase();
145 if ((ddordt(tagName
) && ddordt(parentName
)) ||
146 (tagName
.equals(parentName
) && HtmlUtil
.isOptionalEndForHtmlTagL(tagName
)) ||
147 myTagMarkersStack
.size() > MAGIC_FRAME_COUNT
// no chance for evil guys wanting us to have stack overflow
150 myTagMarkersStack
.pop();
154 myTagNamesStack
.push(tagName
);
156 boolean freeMakerTag
= tagName
.length() > 0 && '#' == tagName
.charAt(0);
159 final IElementType tt
= token();
161 if (tt
== XmlTokenType
.XML_EMPTY_ELEMENT_END
||
162 tt
== XmlTokenType
.XML_TAG_END
||
163 tt
== XmlTokenType
.XML_END_TAG_START
||
164 tt
== XmlTokenType
.XML_START_TAG_START
) break;
168 if (tt
== XmlTokenType
.XML_NAME
) {
171 else if (tt
== XmlTokenType
.XML_CHAR_ENTITY_REF
|| tt
== XmlTokenType
.XML_ENTITY_REF_TOKEN
) {
181 if (token() == XmlTokenType
.XML_EMPTY_ELEMENT_END
) {
183 tag
.done(XmlElementType
.HTML_TAG
);
187 if (token() == XmlTokenType
.XML_TAG_END
) {
191 error(XmlErrorMessages
.message("tag.start.is.not.closed"));
192 tag
.done(XmlElementType
.HTML_TAG
);
196 if (HtmlUtil
.isSingleHtmlTagL(tagName
)) {
197 final PsiBuilder
.Marker footer
= mark();
198 if (token() == XmlTokenType
.XML_END_TAG_START
) {
200 if (token() == XmlTokenType
.XML_NAME
) {
201 if (tagName
.equalsIgnoreCase(myBuilder
.getTokenText())) {
204 if (token() == XmlTokenType
.XML_TAG_END
) {
207 tag
.done(XmlElementType
.HTML_TAG
);
214 tag
.done(XmlElementType
.HTML_TAG
);
218 // Done header, start content
220 boolean isInlineTagContainer
= HtmlUtil
.isInlineTagContainerL(tagName
);
221 boolean isOptionalTagEnd
= HtmlUtil
.isOptionalEndForHtmlTagL(tagName
);
223 PsiBuilder
.Marker firstBlockChild
= null;
225 PsiBuilder
.Marker xmlText
= null;
227 final IElementType tt
= token();
228 if (tt
== XmlTokenType
.XML_START_TAG_START
) {
229 xmlText
= terminateText(xmlText
);
230 if (!parseTag(tagName
)) {
231 tag
.done(XmlElementType
.HTML_TAG
);
235 PsiBuilder
.Marker childMarker
= myTagMarkersStack
.pop();
236 String childName
= myTagNamesStack
.pop();
238 if (isOptionalTagEnd
) {
239 boolean foundMatch
= childTerminatesParentInStack(childName
, true);
241 myTagMarkersStack
.pop();
242 myTagNamesStack
.pop();
244 myTagMarkersStack
.push(childMarker
);
245 myTagNamesStack
.push(childName
);
247 tag
.doneBefore(XmlElementType
.HTML_TAG
, childMarker
);
253 if (isInlineTagContainer
&& HtmlUtil
.isHtmlBlockTagL(childName
) && isOptionalTagEnd
&& !HtmlUtil
.isPossiblyInlineTag(childName
)) {
254 tag
.doneBefore(XmlElementType
.HTML_TAG
, childMarker
);
257 else if (isOptionalTagEnd
&& firstBlockChild
== null && HtmlUtil
.isHtmlBlockTagL(childName
) && !HtmlUtil
.isHtmlBlockTagL(tagName
) && canTerminate(childName
, tagName
)) {
258 firstBlockChild
= childMarker
;
261 else if (tt
== XmlTokenType
.XML_PI_START
) {
262 xmlText
= terminateText(xmlText
);
263 parseProcessingInstruction();
265 else if (tt
== XmlTokenType
.XML_ENTITY_REF_TOKEN
) {
266 xmlText
= terminateText(xmlText
);
269 else if (tt
== XmlTokenType
.XML_CHAR_ENTITY_REF
) {
270 xmlText
= startText(xmlText
);
273 else if (tt
== XmlTokenType
.XML_CDATA_START
) {
274 xmlText
= startText(xmlText
);
277 else if (tt
== XmlTokenType
.XML_COMMENT_START
) {
278 xmlText
= startText(xmlText
);
281 else if (tt
== XmlTokenType
.XML_BAD_CHARACTER
) {
282 xmlText
= startText(xmlText
);
283 final PsiBuilder
.Marker error
= mark();
285 error
.error(XmlErrorMessages
.message("unescaped.ampersand.or.nonterminated.character.entity.reference"));
287 else if (tt
instanceof CustomParsingType
|| tt
instanceof ILazyParseableElementType
) {
288 xmlText
= terminateText(xmlText
);
291 else if (token() == XmlTokenType
.XML_END_TAG_START
) {
292 xmlText
= terminateText(xmlText
);
293 final PsiBuilder
.Marker footer
= mark();
296 if (token() == XmlTokenType
.XML_NAME
) {
297 String endName
= myBuilder
.getTokenText().toLowerCase();
298 if (!tagName
.equals(endName
) && !endName
.endsWith(COMPLETION_NAME
)) {
299 final boolean hasChancesToMatch
= HtmlUtil
.isOptionalEndForHtmlTagL(endName
) ?
childTerminatesParentInStack(endName
, false) : myTagNamesStack
.contains(endName
);
300 if (hasChancesToMatch
) {
302 if (isOptionalTagEnd
) {
303 if (firstBlockChild
!= null) {
304 tag
.doneBefore(XmlElementType
.HTML_TAG
, firstBlockChild
);
307 tag
.done(XmlElementType
.HTML_TAG
);
311 error(XmlErrorMessages
.message("named.element.is.not.closed", originalTagName
));
312 tag
.done(XmlElementType
.HTML_TAG
);
318 if (token() == XmlTokenType
.XML_TAG_END
) advance();
319 footer
.error(XmlErrorMessages
.message("xml.parsing.closing.tag.mathes.nothing"));
326 while (token() != XmlTokenType
.XML_TAG_END
&& token() != XmlTokenType
.XML_START_TAG_START
&& token() != XmlTokenType
.XML_END_TAG_START
&& !eof()) {
327 error(XmlErrorMessages
.message("xml.parsing.unexpected.token"));
332 error(XmlErrorMessages
.message("xml.parsing.closing.tag.name.missing"));
336 if (token() == XmlTokenType
.XML_TAG_END
) {
340 error(XmlErrorMessages
.message("xml.parsing.closing.tag.is.not.done"));
343 tag
.done(XmlElementType
.HTML_TAG
);
347 xmlText
= startText(xmlText
);
352 xmlText
= terminateText(xmlText
);
354 if (isOptionalTagEnd
) {
355 if (firstBlockChild
!= null) {
356 tag
.doneBefore(XmlElementType
.HTML_TAG
, firstBlockChild
);
359 tag
.done(XmlElementType
.HTML_TAG
);
363 error(XmlErrorMessages
.message("named.element.is.not.closed", originalTagName
));
364 tag
.done(XmlElementType
.HTML_TAG
);
370 private static boolean canTerminate(final String childTagName
,final String tagName
) {
372 return !(tagName
.equalsIgnoreCase(TR_TAG
) && childTagName
.equalsIgnoreCase(TD_TAG
)) ||
373 (tagName
.equalsIgnoreCase(TABLE_TAG
) && childTagName
.equalsIgnoreCase(TR_TAG
));
376 private boolean childTerminatesParentInStack(final String childName
, final boolean terminateOnNonOptionalTag
) {
377 boolean isTD
= TD_TAG
.equals(childName
);
378 boolean isTR
= TR_TAG
.equals(childName
);
380 for (int i
= myTagNamesStack
.size() - 1; i
>= 0; i
--) {
381 String parentName
= myTagNamesStack
.get(i
);
382 if (terminateOnNonOptionalTag
&& !HtmlUtil
.isOptionalEndForHtmlTagL(parentName
)) return false;
383 if (isTD
&& (TR_TAG
.equals(parentName
) || TABLE_TAG
.equals(parentName
)) ||
384 isTR
&& TABLE_TAG
.equals(parentName
)) {
388 if (childName
.equals(parentName
)) {
397 private PsiBuilder
.Marker
startText(@Nullable PsiBuilder
.Marker xmlText
) {
398 if (xmlText
== null) {
400 assert xmlText
!= null;
405 protected final PsiBuilder
.Marker
mark() {
406 return myBuilder
.mark();
410 private static PsiBuilder
.Marker
terminateText(@Nullable PsiBuilder
.Marker xmlText
) {
411 if (xmlText
!= null) {
412 xmlText
.done(XmlElementType
.XML_TEXT
);
418 private void parseCData() {
419 assert token() == XmlTokenType
.XML_CDATA_START
;
420 final PsiBuilder
.Marker cdata
= mark();
421 while (token() != XmlTokenType
.XML_CDATA_END
&& !eof()) {
429 cdata
.done(XmlElementType
.XML_CDATA
);
432 protected void parseComment() {
433 final PsiBuilder
.Marker comment
= mark();
436 final IElementType tt
= token();
437 if (tt
== XmlTokenType
.XML_COMMENT_CHARACTERS
|| tt
== XmlTokenType
.XML_CHAR_ENTITY_REF
|| tt
== XmlTokenType
.XML_CONDITIONAL_COMMENT_START
438 || tt
== XmlTokenType
.XML_CONDITIONAL_COMMENT_START_END
|| tt
== XmlTokenType
.XML_CONDITIONAL_COMMENT_END_START
439 || tt
== XmlTokenType
.XML_CONDITIONAL_COMMENT_END
) {
443 else if (tt
== XmlTokenType
.XML_BAD_CHARACTER
) {
444 final PsiBuilder
.Marker error
= mark();
446 error
.error(XmlErrorMessages
.message("xml.parsing.bad.character"));
449 if (tt
== XmlTokenType
.XML_COMMENT_END
) {
454 comment
.done(XmlElementType
.XML_COMMENT
);
457 private void parseReference() {
458 if (token() == XmlTokenType
.XML_CHAR_ENTITY_REF
) {
461 else if (token() == XmlTokenType
.XML_ENTITY_REF_TOKEN
) {
462 final PsiBuilder
.Marker ref
= mark();
464 ref
.done(XmlElementType
.XML_ENTITY_REF
);
467 assert false : "Unexpected token";
471 private void parseAttribute() {
472 assert token() == XmlTokenType
.XML_NAME
;
473 final PsiBuilder
.Marker att
= mark();
475 if (token() == XmlTokenType
.XML_EQ
) {
477 parseAttributeValue();
478 att
.done(XmlElementType
.XML_ATTRIBUTE
);
481 att
.done(XmlElementType
.XML_ATTRIBUTE
);
485 private void parseAttributeValue() {
486 final PsiBuilder
.Marker attValue
= mark();
487 if (token() == XmlTokenType
.XML_ATTRIBUTE_VALUE_START_DELIMITER
) {
489 final IElementType tt
= token();
490 if (tt
== null || tt
== XmlTokenType
.XML_ATTRIBUTE_VALUE_END_DELIMITER
|| tt
== XmlTokenType
.XML_END_TAG_START
|| tt
== XmlTokenType
491 .XML_EMPTY_ELEMENT_END
||
492 tt
== XmlTokenType
.XML_START_TAG_START
) {
496 if (tt
== XmlTokenType
.XML_BAD_CHARACTER
) {
497 final PsiBuilder
.Marker error
= mark();
499 error
.error(XmlErrorMessages
.message("unescaped.ampersand.or.nonterminated.character.entity.reference"));
501 else if (tt
== XmlTokenType
.XML_ENTITY_REF_TOKEN
) {
509 if (token() == XmlTokenType
.XML_ATTRIBUTE_VALUE_END_DELIMITER
) {
513 error(XmlErrorMessages
.message("xml.parsing.unclosed.attribute.value"));
517 if (token() != XmlTokenType
.XML_TAG_END
&& token() != XmlTokenType
.XML_EMPTY_ELEMENT_END
) {
518 advance(); // Single token att value
522 attValue
.done(XmlElementType
.XML_ATTRIBUTE_VALUE
);
525 private void parseProlog() {
527 final IElementType tt
= token();
528 if (tt
== XmlTokenType
.XML_COMMENT_START
) {
531 else if (tt
== XmlTokenType
.XML_REAL_WHITE_SPACE
) {
539 final PsiBuilder
.Marker prolog
= mark();
541 final IElementType tt
= token();
542 if (tt
== XmlTokenType
.XML_PI_START
) {
543 parseProcessingInstruction();
545 else if (tt
== XmlTokenType
.XML_DOCTYPE_START
) {
548 else if (tt
== XmlTokenType
.XML_COMMENT_START
) {
551 else if (tt
== XmlTokenType
.XML_REAL_WHITE_SPACE
) {
558 prolog
.done(XmlElementType
.XML_PROLOG
);
561 private void parseProcessingInstruction() {
562 assert token() == XmlTokenType
.XML_PI_START
;
563 final PsiBuilder
.Marker pi
= mark();
565 if (token() == XmlTokenType
.XML_NAME
|| token() == XmlTokenType
.XML_PI_TARGET
) {
569 while (token() == XmlTokenType
.XML_NAME
) {
571 if (token() == XmlTokenType
.XML_EQ
) {
575 error(XmlErrorMessages
.message("expected.attribute.eq.sign"));
577 parseAttributeValue();
580 if (token() == XmlTokenType
.XML_PI_END
) {
584 error(XmlErrorMessages
.message("xml.parsing.unterminated.processing.instruction"));
587 pi
.done(XmlElementType
.XML_PROCESSING_INSTRUCTION
);
590 protected final IElementType
token() {
591 return myBuilder
.getTokenType();
594 protected final boolean eof() {
595 return myBuilder
.eof();
598 protected final void advance() {
599 myBuilder
.advanceLexer();
602 private void error(final String message
) {
603 myBuilder
.error(message
);
606 private static final int MAGIC_FRAME_COUNT
= Helper
.TOO_BIG_WALK_THRESHOULD
+ (int)(Math
.pow(Math
.E
, Math
.PI
) * Math
.sin(Math
.random()));