refactoring
[fedora-idea.git] / xml / impl / src / com / intellij / lang / html / HtmlParsing.java
blob7c2ca9d74ead74dad54685dff3aa2a03ebbbfc7e
1 /*
2 * Copyright 2000-2009 JetBrains s.r.o.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * @author max
20 package com.intellij.lang.html;
22 import com.intellij.codeInsight.completion.CompletionUtil;
23 import com.intellij.codeInsight.daemon.XmlErrorMessages;
24 import com.intellij.lang.PsiBuilder;
25 import com.intellij.psi.impl.source.codeStyle.Helper;
26 import com.intellij.psi.tree.CustomParsingType;
27 import com.intellij.psi.tree.IElementType;
28 import com.intellij.psi.tree.ILazyParseableElementType;
29 import com.intellij.psi.xml.XmlElementType;
30 import com.intellij.psi.xml.XmlTokenType;
31 import com.intellij.xml.util.HtmlUtil;
32 import org.jetbrains.annotations.NonNls;
33 import org.jetbrains.annotations.NotNull;
34 import org.jetbrains.annotations.Nullable;
36 import java.util.Stack;
38 public class HtmlParsing {
39 @NonNls private static final String TR_TAG = "tr";
40 @NonNls private static final String TD_TAG = "td";
41 @NonNls private static final String DD_TAG = "dd";
42 @NonNls private static final String DT_TAG = "dt";
43 @NonNls private static final String TABLE_TAG = "table";
45 private final PsiBuilder myBuilder;
46 private final Stack<String> myTagNamesStack = new Stack<String>();
47 private final Stack<PsiBuilder.Marker> myTagMarkersStack = new Stack<PsiBuilder.Marker>();
48 @NonNls private static final String COMPLETION_NAME = CompletionUtil.DUMMY_IDENTIFIER_TRIMMED.toLowerCase();
50 public HtmlParsing(final PsiBuilder builder) {
51 myBuilder = builder;
54 public void parseDocument() {
55 final PsiBuilder.Marker document = mark();
57 while (token() == XmlTokenType.XML_COMMENT_START) {
58 parseComment();
61 parseProlog();
63 PsiBuilder.Marker error = null;
64 while (!eof()) {
65 final IElementType tt = token();
66 if (tt == XmlTokenType.XML_START_TAG_START) {
67 error = flushError(error);
68 parseTag("");
69 myTagMarkersStack.clear();
70 myTagNamesStack.clear();
72 else if (tt == XmlTokenType.XML_COMMENT_START) {
73 error = flushError(error);
74 parseComment();
76 else if (tt == XmlTokenType.XML_PI_START) {
77 error = flushError(error);
78 parseProcessingInstruction();
80 else if (tt == XmlTokenType.XML_REAL_WHITE_SPACE || tt == XmlTokenType.XML_CHAR_ENTITY_REF || tt == XmlTokenType.XML_DATA_CHARACTERS) {
81 error = flushError(error);
82 advance();
84 else {
85 if (error == null) error = mark();
86 advance();
90 if (error != null) {
91 error.error(XmlErrorMessages.message("top.level.element.is.not.completed"));
92 error = null;
95 document.done(XmlElementType.HTML_DOCUMENT);
98 @Nullable
99 private static PsiBuilder.Marker flushError(PsiBuilder.Marker error) {
100 if (error != null) {
101 error.error(XmlErrorMessages.message("xml.parsing.unexpected.tokens"));
102 error = null;
104 return error;
107 private void parseDoctype() {
108 assert token() == XmlTokenType.XML_DOCTYPE_START : "Doctype start expected";
109 final PsiBuilder.Marker doctype = mark();
110 advance();
112 while (token() != XmlTokenType.XML_DOCTYPE_END && !eof()) advance();
113 if (eof()) {
114 error(XmlErrorMessages.message("xml.parsing.unexpected.end.of.file"));
116 else {
117 advance();
120 doctype.done(XmlElementType.XML_DOCTYPE);
123 private static boolean ddordt(String name) {
124 return DT_TAG.equals(name) || DD_TAG.equals(name);
127 private boolean parseTag(String parentName) {
128 assert token() == XmlTokenType.XML_START_TAG_START : "Tag start expected";
129 final PsiBuilder.Marker tag = mark();
130 myTagMarkersStack.push(tag);
132 // Start tag header
133 advance();
134 final String originalTagName;
135 if (token() != XmlTokenType.XML_NAME) {
136 error(XmlErrorMessages.message("xml.parsing.tag.name.expected"));
137 originalTagName = "";
139 else {
140 originalTagName = myBuilder.getTokenText();
141 advance();
144 String tagName = originalTagName.toLowerCase();
145 if ((ddordt(tagName) && ddordt(parentName)) ||
146 (tagName.equals(parentName) && HtmlUtil.isOptionalEndForHtmlTagL(tagName)) ||
147 myTagMarkersStack.size() > MAGIC_FRAME_COUNT // no chance for evil guys wanting us to have stack overflow
149 tag.rollbackTo();
150 myTagMarkersStack.pop();
151 return false;
154 myTagNamesStack.push(tagName);
156 boolean freeMakerTag = tagName.length() > 0 && '#' == tagName.charAt(0);
158 do {
159 final IElementType tt = token();
160 if (freeMakerTag) {
161 if (tt == XmlTokenType.XML_EMPTY_ELEMENT_END ||
162 tt == XmlTokenType.XML_TAG_END ||
163 tt == XmlTokenType.XML_END_TAG_START ||
164 tt == XmlTokenType.XML_START_TAG_START) break;
165 advance();
167 else {
168 if (tt == XmlTokenType.XML_NAME) {
169 parseAttribute();
171 else if (tt == XmlTokenType.XML_CHAR_ENTITY_REF || tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
172 parseReference();
174 else {
175 break;
179 while (!eof());
181 if (token() == XmlTokenType.XML_EMPTY_ELEMENT_END) {
182 advance();
183 tag.done(XmlElementType.HTML_TAG);
184 return true;
187 if (token() == XmlTokenType.XML_TAG_END) {
188 advance();
190 else {
191 error(XmlErrorMessages.message("tag.start.is.not.closed"));
192 tag.done(XmlElementType.HTML_TAG);
193 return true;
196 if (HtmlUtil.isSingleHtmlTagL(tagName)) {
197 final PsiBuilder.Marker footer = mark();
198 if (token() == XmlTokenType.XML_END_TAG_START) {
199 advance();
200 if (token() == XmlTokenType.XML_NAME) {
201 if (tagName.equalsIgnoreCase(myBuilder.getTokenText())) {
202 advance();
203 footer.drop();
204 if (token() == XmlTokenType.XML_TAG_END) {
205 advance();
207 tag.done(XmlElementType.HTML_TAG);
208 return true;
213 footer.rollbackTo();
214 tag.done(XmlElementType.HTML_TAG);
215 return true;
218 // Done header, start content
220 boolean isInlineTagContainer = HtmlUtil.isInlineTagContainerL(tagName);
221 boolean isOptionalTagEnd = HtmlUtil.isOptionalEndForHtmlTagL(tagName);
223 PsiBuilder.Marker firstBlockChild = null;
225 PsiBuilder.Marker xmlText = null;
226 while (!eof()) {
227 final IElementType tt = token();
228 if (tt == XmlTokenType.XML_START_TAG_START) {
229 xmlText = terminateText(xmlText);
230 if (!parseTag(tagName)) {
231 tag.done(XmlElementType.HTML_TAG);
232 return true;
235 PsiBuilder.Marker childMarker = myTagMarkersStack.pop();
236 String childName = myTagNamesStack.pop();
238 if (isOptionalTagEnd) {
239 boolean foundMatch = childTerminatesParentInStack(childName, true);
240 if (foundMatch) {
241 myTagMarkersStack.pop();
242 myTagNamesStack.pop();
244 myTagMarkersStack.push(childMarker);
245 myTagNamesStack.push(childName);
247 tag.doneBefore(XmlElementType.HTML_TAG, childMarker);
248 return true;
253 if (isInlineTagContainer && HtmlUtil.isHtmlBlockTagL(childName) && isOptionalTagEnd && !HtmlUtil.isPossiblyInlineTag(childName)) {
254 tag.doneBefore(XmlElementType.HTML_TAG, childMarker);
255 return true;
257 else if (isOptionalTagEnd && firstBlockChild == null && HtmlUtil.isHtmlBlockTagL(childName) && !HtmlUtil.isHtmlBlockTagL(tagName) && canTerminate(childName, tagName)) {
258 firstBlockChild = childMarker;
261 else if (tt == XmlTokenType.XML_PI_START) {
262 xmlText = terminateText(xmlText);
263 parseProcessingInstruction();
265 else if (tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
266 xmlText = terminateText(xmlText);
267 parseReference();
269 else if (tt == XmlTokenType.XML_CHAR_ENTITY_REF) {
270 xmlText = startText(xmlText);
271 parseReference();
273 else if (tt == XmlTokenType.XML_CDATA_START) {
274 xmlText = startText(xmlText);
275 parseCData();
277 else if (tt == XmlTokenType.XML_COMMENT_START) {
278 xmlText = startText(xmlText);
279 parseComment();
281 else if (tt == XmlTokenType.XML_BAD_CHARACTER) {
282 xmlText = startText(xmlText);
283 final PsiBuilder.Marker error = mark();
284 advance();
285 error.error(XmlErrorMessages.message("unescaped.ampersand.or.nonterminated.character.entity.reference"));
287 else if (tt instanceof CustomParsingType || tt instanceof ILazyParseableElementType) {
288 xmlText = terminateText(xmlText);
289 advance();
291 else if (token() == XmlTokenType.XML_END_TAG_START) {
292 xmlText = terminateText(xmlText);
293 final PsiBuilder.Marker footer = mark();
294 advance();
296 if (token() == XmlTokenType.XML_NAME) {
297 String endName = myBuilder.getTokenText().toLowerCase();
298 if (!tagName.equals(endName) && !endName.endsWith(COMPLETION_NAME)) {
299 final boolean hasChancesToMatch = HtmlUtil.isOptionalEndForHtmlTagL(endName) ? childTerminatesParentInStack(endName, false) : myTagNamesStack.contains(endName);
300 if (hasChancesToMatch) {
301 footer.rollbackTo();
302 if (isOptionalTagEnd) {
303 if (firstBlockChild != null) {
304 tag.doneBefore(XmlElementType.HTML_TAG, firstBlockChild);
306 else {
307 tag.done(XmlElementType.HTML_TAG);
310 else {
311 error(XmlErrorMessages.message("named.element.is.not.closed", originalTagName));
312 tag.done(XmlElementType.HTML_TAG);
314 return true;
316 else {
317 advance();
318 if (token() == XmlTokenType.XML_TAG_END) advance();
319 footer.error(XmlErrorMessages.message("xml.parsing.closing.tag.mathes.nothing"));
320 continue;
324 advance();
326 while (token() != XmlTokenType.XML_TAG_END && token() != XmlTokenType.XML_START_TAG_START && token() != XmlTokenType.XML_END_TAG_START && !eof()) {
327 error(XmlErrorMessages.message("xml.parsing.unexpected.token"));
328 advance();
331 else {
332 error(XmlErrorMessages.message("xml.parsing.closing.tag.name.missing"));
334 footer.drop();
336 if (token() == XmlTokenType.XML_TAG_END) {
337 advance();
339 else {
340 error(XmlErrorMessages.message("xml.parsing.closing.tag.is.not.done"));
343 tag.done(XmlElementType.HTML_TAG);
344 return true;
346 else {
347 xmlText = startText(xmlText);
348 advance();
352 xmlText = terminateText(xmlText);
354 if (isOptionalTagEnd) {
355 if (firstBlockChild != null) {
356 tag.doneBefore(XmlElementType.HTML_TAG, firstBlockChild);
358 else {
359 tag.done(XmlElementType.HTML_TAG);
362 else {
363 error(XmlErrorMessages.message("named.element.is.not.closed", originalTagName));
364 tag.done(XmlElementType.HTML_TAG);
367 return true;
370 private static boolean canTerminate(final String childTagName,final String tagName) {
371 // TODO: make hash
372 return !(tagName.equalsIgnoreCase(TR_TAG) && childTagName.equalsIgnoreCase(TD_TAG)) ||
373 (tagName.equalsIgnoreCase(TABLE_TAG) && childTagName.equalsIgnoreCase(TR_TAG));
376 private boolean childTerminatesParentInStack(final String childName, final boolean terminateOnNonOptionalTag) {
377 boolean isTD = TD_TAG.equals(childName);
378 boolean isTR = TR_TAG.equals(childName);
380 for (int i = myTagNamesStack.size() - 1; i >= 0; i--) {
381 String parentName = myTagNamesStack.get(i);
382 if (terminateOnNonOptionalTag && !HtmlUtil.isOptionalEndForHtmlTagL(parentName)) return false;
383 if (isTD && (TR_TAG.equals(parentName) || TABLE_TAG.equals(parentName)) ||
384 isTR && TABLE_TAG.equals(parentName)) {
385 return false;
388 if (childName.equals(parentName)) {
389 return true;
392 return false;
396 @NotNull
397 private PsiBuilder.Marker startText(@Nullable PsiBuilder.Marker xmlText) {
398 if (xmlText == null) {
399 xmlText = mark();
400 assert xmlText != null;
402 return xmlText;
405 protected final PsiBuilder.Marker mark() {
406 return myBuilder.mark();
409 @Nullable
410 private static PsiBuilder.Marker terminateText(@Nullable PsiBuilder.Marker xmlText) {
411 if (xmlText != null) {
412 xmlText.done(XmlElementType.XML_TEXT);
413 xmlText = null;
415 return xmlText;
418 private void parseCData() {
419 assert token() == XmlTokenType.XML_CDATA_START;
420 final PsiBuilder.Marker cdata = mark();
421 while (token() != XmlTokenType.XML_CDATA_END && !eof()) {
422 advance();
425 if (!eof()) {
426 advance();
429 cdata.done(XmlElementType.XML_CDATA);
432 protected void parseComment() {
433 final PsiBuilder.Marker comment = mark();
434 advance();
435 while (true) {
436 final IElementType tt = token();
437 if (tt == XmlTokenType.XML_COMMENT_CHARACTERS || tt == XmlTokenType.XML_CHAR_ENTITY_REF || tt == XmlTokenType.XML_CONDITIONAL_COMMENT_START
438 || tt == XmlTokenType.XML_CONDITIONAL_COMMENT_START_END || tt == XmlTokenType.XML_CONDITIONAL_COMMENT_END_START
439 || tt == XmlTokenType.XML_CONDITIONAL_COMMENT_END) {
440 advance();
441 continue;
443 else if (tt == XmlTokenType.XML_BAD_CHARACTER) {
444 final PsiBuilder.Marker error = mark();
445 advance();
446 error.error(XmlErrorMessages.message("xml.parsing.bad.character"));
447 continue;
449 if (tt == XmlTokenType.XML_COMMENT_END) {
450 advance();
452 break;
454 comment.done(XmlElementType.XML_COMMENT);
457 private void parseReference() {
458 if (token() == XmlTokenType.XML_CHAR_ENTITY_REF) {
459 advance();
461 else if (token() == XmlTokenType.XML_ENTITY_REF_TOKEN) {
462 final PsiBuilder.Marker ref = mark();
463 advance();
464 ref.done(XmlElementType.XML_ENTITY_REF);
466 else {
467 assert false : "Unexpected token";
471 private void parseAttribute() {
472 assert token() == XmlTokenType.XML_NAME;
473 final PsiBuilder.Marker att = mark();
474 advance();
475 if (token() == XmlTokenType.XML_EQ) {
476 advance();
477 parseAttributeValue();
478 att.done(XmlElementType.XML_ATTRIBUTE);
480 else {
481 att.done(XmlElementType.XML_ATTRIBUTE);
485 private void parseAttributeValue() {
486 final PsiBuilder.Marker attValue = mark();
487 if (token() == XmlTokenType.XML_ATTRIBUTE_VALUE_START_DELIMITER) {
488 while (true) {
489 final IElementType tt = token();
490 if (tt == null || tt == XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER || tt == XmlTokenType.XML_END_TAG_START || tt == XmlTokenType
491 .XML_EMPTY_ELEMENT_END ||
492 tt == XmlTokenType.XML_START_TAG_START) {
493 break;
496 if (tt == XmlTokenType.XML_BAD_CHARACTER) {
497 final PsiBuilder.Marker error = mark();
498 advance();
499 error.error(XmlErrorMessages.message("unescaped.ampersand.or.nonterminated.character.entity.reference"));
501 else if (tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
502 parseReference();
504 else {
505 advance();
509 if (token() == XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER) {
510 advance();
512 else {
513 error(XmlErrorMessages.message("xml.parsing.unclosed.attribute.value"));
516 else {
517 if (token() != XmlTokenType.XML_TAG_END && token() != XmlTokenType.XML_EMPTY_ELEMENT_END) {
518 advance(); // Single token att value
522 attValue.done(XmlElementType.XML_ATTRIBUTE_VALUE);
525 private void parseProlog() {
526 while (true) {
527 final IElementType tt = token();
528 if (tt == XmlTokenType.XML_COMMENT_START) {
529 parseComment();
531 else if (tt == XmlTokenType.XML_REAL_WHITE_SPACE) {
532 advance();
534 else {
535 break;
539 final PsiBuilder.Marker prolog = mark();
540 while (true) {
541 final IElementType tt = token();
542 if (tt == XmlTokenType.XML_PI_START) {
543 parseProcessingInstruction();
545 else if (tt == XmlTokenType.XML_DOCTYPE_START) {
546 parseDoctype();
548 else if (tt == XmlTokenType.XML_COMMENT_START) {
549 parseComment();
551 else if (tt == XmlTokenType.XML_REAL_WHITE_SPACE) {
552 advance();
554 else {
555 break;
558 prolog.done(XmlElementType.XML_PROLOG);
561 private void parseProcessingInstruction() {
562 assert token() == XmlTokenType.XML_PI_START;
563 final PsiBuilder.Marker pi = mark();
564 advance();
565 if (token() == XmlTokenType.XML_NAME || token() == XmlTokenType.XML_PI_TARGET) {
566 advance();
569 while (token() == XmlTokenType.XML_NAME) {
570 advance();
571 if (token() == XmlTokenType.XML_EQ) {
572 advance();
574 else {
575 error(XmlErrorMessages.message("expected.attribute.eq.sign"));
577 parseAttributeValue();
580 if (token() == XmlTokenType.XML_PI_END) {
581 advance();
583 else {
584 error(XmlErrorMessages.message("xml.parsing.unterminated.processing.instruction"));
587 pi.done(XmlElementType.XML_PROCESSING_INSTRUCTION);
590 protected final IElementType token() {
591 return myBuilder.getTokenType();
594 protected final boolean eof() {
595 return myBuilder.eof();
598 protected final void advance() {
599 myBuilder.advanceLexer();
602 private void error(final String message) {
603 myBuilder.error(message);
606 private static final int MAGIC_FRAME_COUNT = Helper.TOO_BIG_WALK_THRESHOULD + (int)(Math.pow(Math.E, Math.PI) * Math.sin(Math.random()));