1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident "%Z%%M% %I% %E% SMI"
7 #include "ParserMessages.h"
8 #include "MessageArg.h"
9 #include "TokenMessageArg.h"
10 #include "StringVectorMessageArg.h"
15 namespace SP_NAMESPACE
{
18 void Parser::doInstanceStart()
24 // FIXME check here that we have a valid dtd
25 compileInstanceModes();
26 setPhase(contentPhase
);
27 Token token
= getToken(currentMode());
30 case tokenStagoNameStart
:
33 case tokenEtagoNameStart
:
39 unsigned startImpliedCount
= 0;
40 unsigned attributeListIndex
= 0;
42 IList
<Event
> eventList
;
43 if (!tryImplyTag(currentLocation(),
49 queueElementEvents(eventList
);
52 message(ParserMessages::instanceStartOmittag
);
54 currentInput()->ungetToken();
57 void Parser::endInstance()
59 // Do checking before popping entity stack so that there's a
60 // current location for error messages.
62 while (markedSectionLevel() > 0) {
63 message(ParserMessages::unclosedMarkedSection
,
64 currentMarkedSectionStartLocation());
72 void Parser::checkIdrefs()
74 IdTableIter
iter(idTableIter());
76 while ((id
= iter
.next()) != 0) {
77 for (size_t i
= 0; i
< id
->pendingRefs().size(); i
++) {
78 Messenger::setNextLocation(id
->pendingRefs()[i
]);
79 message(ParserMessages::missingId
, StringMessageArg(id
->name()));
84 void Parser::doContent()
91 Token token
= getToken(currentMode());
94 if (inputLevel() == 1) {
98 if (inputLevel() == specialParseInputLevel()) {
99 // FIXME have separate messages for each type of special parse
100 // perhaps force end of marked section or element
101 message(ParserMessages::specialParseEntityEnd
);
103 if (eventsWanted().wantInstanceMarkup())
104 eventHandler().entityEnd(new (eventAllocator())
105 EntityEndEvent(currentLocation()));
106 if (afterDocumentElement())
107 message(ParserMessages::afterDocumentElementEntityEnd
);
108 if (sd().integrallyStored()
110 && currentElement().index() != currentInputElementIndex())
111 message(ParserMessages::contentAsyncEntityRef
);
115 case tokenHcroHexDigit
:
117 if (afterDocumentElement())
118 message(ParserMessages::characterReferenceAfterDocumentElement
);
121 if (parseNumericCharRef(token
== tokenHcroHexDigit
, ch
, loc
)) {
125 if (!translateNumericCharRef(ch
, isSgmlChar
))
128 eventHandler().nonSgmlChar(new (eventAllocator())
129 NonSgmlCharEvent(ch
, loc
));
132 eventHandler().data(new (eventAllocator())
133 ImmediateDataEvent(Event::characterData
,
139 case tokenCroNameStart
:
140 if (afterDocumentElement())
141 message(ParserMessages::characterReferenceAfterDocumentElement
);
145 case tokenEroNameStart
:
147 if (afterDocumentElement())
148 message(ParserMessages::entityReferenceAfterDocumentElement
);
149 ConstPtr
<Entity
> entity
;
150 Ptr
<EntityOrigin
> origin
;
151 if (parseEntityReference(0, token
== tokenEroGrpo
, entity
, origin
)) {
152 if (!entity
.isNull()) {
153 if (entity
->isCharacterData())
154 acceptPcdata(Location(origin
.pointer(), 0));
155 if (inputLevel() == specialParseInputLevel())
156 entity
->rcdataReference(*this, origin
);
158 entity
->contentReference(*this, origin
);
163 case tokenEtagoNameStart
:
164 acceptEndTag(parseEndTag());
172 case tokenMdoNameStart
:
173 if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()))
174 currentMarkup()->addDelim(Syntax::dMDO
);
175 Syntax::ReservedName name
;
178 startLevel
= inputLevel();
179 if (parseDeclarationName(&name
)) {
181 case Syntax::rUSEMAP
:
182 if (afterDocumentElement())
183 message(ParserMessages::declarationAfterDocumentElement
,
184 StringMessageArg(syntax().reservedName(name
)));
185 result
= parseUsemapDecl();
187 case Syntax::rUSELINK
:
188 if (afterDocumentElement())
189 message(ParserMessages::declarationAfterDocumentElement
,
190 StringMessageArg(syntax().reservedName(name
)));
191 result
= parseUselinkDecl();
193 case Syntax::rDOCTYPE
:
194 case Syntax::rLINKTYPE
:
195 case Syntax::rELEMENT
:
196 case Syntax::rATTLIST
:
197 case Syntax::rENTITY
:
198 case Syntax::rNOTATION
:
199 case Syntax::rSHORTREF
:
201 case Syntax::rIDLINK
:
202 message(ParserMessages::instanceDeclaration
,
203 StringMessageArg(syntax().reservedName(name
)));
207 message(ParserMessages::noSuchDeclarationType
,
208 StringMessageArg(syntax().reservedName(name
)));
216 skipDeclaration(startLevel
);
229 if (afterDocumentElement())
230 message(ParserMessages::markedSectionAfterDocumentElement
);
231 parseMarkedSectionDeclStart();
235 handleMarkedSectionEnd();
242 parseProcessingInstruction();
244 case tokenStagoNameStart
:
248 parseEmptyStartTag();
251 parseGroupStartTag();
254 acceptPcdata(currentLocation());
255 queueRe(currentLocation());
258 acceptPcdata(currentLocation());
260 if (eventsWanted().wantInstanceMarkup())
261 eventHandler().ignoredRs(new (eventAllocator())
262 IgnoredRsEvent(currentChar(),
267 if (eventsWanted().wantInstanceMarkup())
268 eventHandler().sSep(new (eventAllocator())
269 SSepEvent(currentInput()->currentTokenStart(),
270 currentInput()->currentTokenLength(),
274 case tokenIgnoredChar
:
276 if (eventsWanted().wantMarkedSections())
277 eventHandler().ignoredChars(new (eventAllocator())
278 IgnoredCharsEvent(currentInput()->currentTokenStart(),
279 currentInput()->currentTokenLength(),
283 case tokenUnrecognized
:
284 reportNonSgmlCharacter();
288 message(ParserMessages::dataCharDelim
,
289 StringMessageArg(StringC(currentInput()->currentTokenStart(),
290 currentInput()->currentTokenLength())));
296 ASSERT(token
>= tokenFirstShortref
);
297 handleShortref(token
- tokenFirstShortref
);
300 } while (eventQueueEmpty());
303 void Parser::skipDeclaration(unsigned startLevel
)
305 const unsigned skipMax
= 250;
306 unsigned skipCount
= 0;
308 Token token
= getToken(mdMode
);
309 if (inputLevel() == startLevel
)
312 case tokenUnrecognized
:
316 if (inputLevel() <= startLevel
)
321 if (inputLevel() == startLevel
)
325 if (inputLevel() == startLevel
&& skipCount
>= skipMax
326 && currentChar() == syntax().standardFunction(Syntax::fRE
))
335 void Parser::handleShortref(int index
)
337 const ConstPtr
<Entity
> &entity
338 = currentElement().map()->entity(index
);
339 if (!entity
.isNull()) {
340 Owner
<Markup
> markupPtr
;
341 if (eventsWanted().wantInstanceMarkup()) {
342 markupPtr
= new Markup
;
343 markupPtr
->addShortref(currentInput());
345 Ptr
<EntityOrigin
> origin
346 = EntityOrigin::make(internalAllocator(),
349 currentInput()->currentTokenLength(),
351 entity
->contentReference(*this, origin
);
354 InputSource
*in
= currentInput();
355 size_t length
= in
->currentTokenLength();
356 const Char
*s
= in
->currentTokenStart();
358 if (currentMode() == econMode
|| currentMode() == econnetMode
) {
359 // FIXME do this in advance (what about B sequence?)
360 for (i
= 0; i
< length
&& syntax().isS(s
[i
]); i
++)
362 if (i
> 0 && eventsWanted().wantInstanceMarkup())
363 eventHandler().sSep(new (eventAllocator())
364 SSepEvent(s
, i
, currentLocation(), 0));
367 Location
location(currentLocation());
371 acceptPcdata(location
);
372 if (sd().keeprsre()) {
374 eventHandler().data(new (eventAllocator())
375 ImmediateDataEvent(Event::characterData
, s
, length
,
379 // FIXME speed this up
380 for (; length
> 0; location
+= 1, length
--, s
++) {
381 if (*s
== syntax().standardFunction(Syntax::fRS
)) {
383 if (eventsWanted().wantInstanceMarkup())
384 eventHandler().ignoredRs(new (eventAllocator())
385 IgnoredRsEvent(*s
, location
));
387 else if (*s
== syntax().standardFunction(Syntax::fRE
))
391 eventHandler().data(new (eventAllocator())
392 ImmediateDataEvent(Event::characterData
, s
, 1,
399 void Parser::parsePcdata()
402 acceptPcdata(currentLocation());
404 eventHandler().data(new (eventAllocator())
405 ImmediateDataEvent(Event::characterData
,
406 currentInput()->currentTokenStart(),
407 currentInput()->currentTokenLength(),
412 void Parser::parseStartTag()
414 InputSource
*in
= currentInput();
415 Markup
*markup
= startMarkup(eventsWanted().wantInstanceMarkup(),
416 in
->currentLocation());
417 in
->discardInitial();
418 extendNameToken(syntax().namelen(), ParserMessages::nameLength
);
420 markup
->addDelim(Syntax::dSTAGO
);
423 StringC
&name
= nameBuffer();
424 getCurrentToken(syntax().generalSubstTable(), name
);
425 ElementType
*e
= currentDtdNonConst().lookupElementType(name
);
428 e
= completeRankStem(name
);
429 else if (e
->isRankedElement())
430 handleRankedElement(e
);
433 e
= lookupCreateUndefinedElement(name
, currentLocation(), currentDtdNonConst());
435 AttributeList
*attributes
= allocAttributeList(e
->attributeDef(), 0);
436 Token closeToken
= getToken(tagMode
);
437 if (closeToken
== tokenTagc
) {
438 if (name
.size() > syntax().taglen())
439 checkTaglen(markupLocation().index());
440 attributes
->finish(*this);
443 markup
->addDelim(Syntax::dTAGC
);
447 Ptr
<AttributeDefinitionList
> newAttDef
;
448 if (parseAttributeSpec(0, *attributes
, netEnabling
, newAttDef
)) {
449 // The difference between the indices will be the difference
450 // in offsets plus 1 for each named character reference.
451 if (in
->currentLocation().index() - markupLocation().index()
453 checkTaglen(markupLocation().index());
457 if (!newAttDef
.isNull()) {
458 newAttDef
->setIndex(currentDtdNonConst().allocAttributeDefinitionListIndex());
459 e
->setAttributeDef(newAttDef
);
463 new (eventAllocator())
472 ElementType
*Parser::completeRankStem(const StringC
&name
)
474 const RankStem
*rankStem
= currentDtd().lookupRankStem(name
);
476 StringC
name(rankStem
->name());
477 if (!appendCurrentRank(name
, rankStem
))
478 message(ParserMessages::noCurrentRank
, StringMessageArg(name
));
480 return currentDtdNonConst().lookupElementType(name
);
485 void Parser::handleRankedElement(const ElementType
*e
)
487 StringC
rankSuffix(e
->definition()->rankSuffix());
488 const RankStem
*rankStem
= e
->rankedElementRankStem();
489 for (size_t i
= 0; i
< rankStem
->nDefinitions(); i
++) {
490 const ElementDefinition
*def
= rankStem
->definition(i
);
491 for (size_t j
= 0; j
< def
->nRankStems(); j
++)
492 setCurrentRank(def
->rankStem(j
), rankSuffix
);
496 void Parser::checkTaglen(Index tagStartIndex
)
498 const InputSourceOrigin
*origin
499 = currentLocation().origin()->asInputSourceOrigin();
501 if (origin
->startOffset(currentLocation().index())
502 - origin
->startOffset(tagStartIndex
503 + syntax().delimGeneral(Syntax::dSTAGO
).size())
505 message(ParserMessages::taglen
, NumberMessageArg(syntax().taglen()));
508 void Parser::parseEmptyStartTag()
510 if (options().warnEmptyTag
)
511 message(ParserMessages::emptyStartTag
);
512 // FIXME error if not in base.
513 const ElementType
*e
= 0;
515 e
= lastEndedElementType();
516 else if (tagLevel() > 0)
517 e
= currentElement().type();
519 e
= currentDtd().documentElementType();
520 AttributeList
*attributes
= allocAttributeList(e
->attributeDef(), 0);
521 attributes
->finish(*this);
522 Markup
*markup
= startMarkup(eventsWanted().wantInstanceMarkup(),
525 markup
->addDelim(Syntax::dSTAGO
);
526 markup
->addDelim(Syntax::dTAGC
);
529 new (eventAllocator())
538 void Parser::parseGroupStartTag()
540 if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
541 currentMarkup()->addDelim(Syntax::dSTAGO
);
542 currentMarkup()->addDelim(Syntax::dGRPO
);
545 if (!parseTagNameGroup(active
))
547 InputSource
*in
= currentInput();
548 // Location startLocation = in->currentLocation();
550 Xchar c
= in
->tokenChar(messenger());
551 if (!syntax().isNameStartCharacter(c
)) {
552 message(ParserMessages::startTagMissingName
);
555 in
->discardInitial();
556 extendNameToken(syntax().namelen(), ParserMessages::nameLength
);
558 currentMarkup()->addName(currentInput());
561 eventHandler().ignoredMarkup(new (eventAllocator())
562 IgnoredMarkupEvent(markupLocation(),
567 void Parser::parseGroupEndTag()
569 if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
570 currentMarkup()->addDelim(Syntax::dSTAGO
);
571 currentMarkup()->addDelim(Syntax::dGRPO
);
574 if (!parseTagNameGroup(active
))
576 InputSource
*in
= currentInput();
577 // Location startLocation = in->currentLocation();
579 Xchar c
= in
->tokenChar(messenger());
580 if (!syntax().isNameStartCharacter(c
)) {
581 message(ParserMessages::endTagMissingName
);
584 in
->discardInitial();
585 extendNameToken(syntax().namelen(), ParserMessages::nameLength
);
587 currentMarkup()->addName(currentInput());
590 eventHandler().ignoredMarkup(new (eventAllocator())
591 IgnoredMarkupEvent(markupLocation(),
596 void Parser::acceptPcdata(const Location
&startLocation
)
598 if (currentElement().tryTransitionPcdata())
600 // Need to test here since implying tags may turn off pcdataRecovering.
601 if (pcdataRecovering())
603 IList
<Undo
> undoList
;
604 IList
<Event
> eventList
;
605 unsigned startImpliedCount
= 0;
606 unsigned attributeListIndex
= 0;
608 while (tryImplyTag(startLocation
, startImpliedCount
, attributeListIndex
,
609 undoList
, eventList
))
610 if (currentElement().tryTransitionPcdata()) {
611 queueElementEvents(eventList
);
614 discardKeptMessages();
616 if (validate() || afterDocumentElement())
617 message(ParserMessages::pcdataNotAllowed
);
621 void Parser::acceptStartTag(const ElementType
*e
,
622 StartElementEvent
*event
,
625 if (e
->definition()->undefined() && !implydefElement())
626 message(ParserMessages::undefinedElement
, StringMessageArg(e
->name()));
627 if (elementIsExcluded(e
)) {
633 if (currentElement().tryTransition(e
)) {
634 pushElementCheck(e
, event
, netEnabling
);
637 if (elementIsIncluded(e
)) {
638 event
->setIncluded();
639 pushElementCheck(e
, event
, netEnabling
);
644 IList
<Undo
> undoList
;
645 IList
<Event
> eventList
;
646 unsigned startImpliedCount
= 0;
647 unsigned attributeListIndex
= 1;
648 while (tryImplyTag(event
->location(), startImpliedCount
,
649 attributeListIndex
, undoList
, eventList
))
650 if (tryStartTag(e
, event
, netEnabling
, eventList
))
652 discardKeptMessages();
654 if (validate() && !e
->definition()->undefined())
655 handleBadStartTag(e
, event
, netEnabling
);
657 if (validate() ? implydefElement() : afterDocumentElement())
658 message(ParserMessages::elementNotAllowed
, StringMessageArg(e
->name()));
659 // If element couldn't occur because it was excluded, then
660 // do the transition here.
661 (void)currentElement().tryTransition(e
);
662 pushElementCheck(e
, event
, netEnabling
);
666 void Parser::undo(IList
<Undo
> &undoList
)
668 while (!undoList
.empty()) {
669 Undo
*p
= undoList
.get();
675 void Parser::queueElementEvents(IList
<Event
> &events
)
677 releaseKeptMessages();
678 // FIXME provide IList<T>::reverse function
681 while (!events
.empty())
682 tem
.insert(events
.get());
683 while (!tem
.empty()) {
684 Event
*e
= tem
.get();
685 if (e
->type() == Event::startElement
) {
686 noteStartElement(((StartElementEvent
*)e
)->included());
687 eventHandler().startElement((StartElementEvent
*)e
);
690 noteEndElement(((EndElementEvent
*)e
)->included());
691 eventHandler().endElement((EndElementEvent
*)e
);
697 void Parser::checkExclusion(const ElementType
*e
)
699 const LeafContentToken
*token
= currentElement().invalidExclusion(e
);
701 message(ParserMessages::invalidExclusion
,
702 OrdinalMessageArg(token
->typeIndex() + 1),
703 StringMessageArg(token
->elementType()->name()),
704 StringMessageArg(currentElement().type()->name()));
707 Boolean
Parser::tryStartTag(const ElementType
*e
,
708 StartElementEvent
*event
,
710 IList
<Event
> &impliedEvents
)
712 if (elementIsExcluded(e
)) {
716 if (currentElement().tryTransition(e
)) {
717 queueElementEvents(impliedEvents
);
718 pushElementCheck(e
, event
, netEnabling
);
721 if (elementIsIncluded(e
)) {
722 queueElementEvents(impliedEvents
);
723 event
->setIncluded();
724 pushElementCheck(e
, event
, netEnabling
);
730 Boolean
Parser::tryImplyTag(const Location
&loc
,
731 unsigned &startImpliedCount
,
732 unsigned &attributeListIndex
,
734 IList
<Event
> &eventList
)
738 if (currentElement().isFinished()) {
742 const ElementDefinition
*def
= currentElement().type()->definition();
743 if (def
&& !def
->canOmitEndTag())
747 if (startImpliedCount
> 0) {
748 message(ParserMessages::startTagEmptyElement
,
749 StringMessageArg(currentElement().type()->name()));
753 const ElementDefinition
*def
= currentElement().type()->definition();
754 if (def
&& !def
->canOmitEndTag())
755 message(ParserMessages::omitEndTagDeclare
,
756 StringMessageArg(currentElement().type()->name()),
757 currentElement().startLocation());
759 EndElementEvent
*event
760 = new (eventAllocator()) EndElementEvent(currentElement().type(),
764 eventList
.insert(event
);
765 undo
.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
768 const LeafContentToken
*token
= currentElement().impliedStartTag();
771 const ElementType
*e
= token
->elementType();
772 if (elementIsExcluded(e
))
773 message(ParserMessages::requiredElementExcluded
,
774 OrdinalMessageArg(token
->typeIndex() + 1),
775 StringMessageArg(e
->name()),
776 StringMessageArg(currentElement().type()->name()));
778 undo
.insert(new (internalAllocator())
779 UndoTransition(currentElement().matchState()));
780 currentElement().doRequiredTransition();
781 const ElementDefinition
*def
= e
->definition();
782 if (def
->declaredContent() != ElementDefinition::modelGroup
783 && def
->declaredContent() != ElementDefinition::any
)
784 message(ParserMessages::omitStartTagDeclaredContent
,
785 StringMessageArg(e
->name()));
786 if (def
->undefined())
787 message(ParserMessages::undefinedElement
, StringMessageArg(e
->name()));
788 else if (!def
->canOmitStartTag())
789 message(ParserMessages::omitStartTagDeclare
, StringMessageArg(e
->name()));
790 AttributeList
*attributes
791 = allocAttributeList(e
->attributeDef(),
792 attributeListIndex
++);
793 // this will give an error if the element has a required attribute
794 attributes
->finish(*this);
796 StartElementEvent
*event
797 = new (eventAllocator()) StartElementEvent(e
,
802 pushElementCheck(e
, event
, undo
, eventList
);
803 const int implyCheckLimit
= 30; // this is fairly arbitrary
804 if (startImpliedCount
> implyCheckLimit
805 && !checkImplyLoop(startImpliedCount
))
810 void Parser::pushElementCheck(const ElementType
*e
, StartElementEvent
*event
,
813 if (tagLevel() == syntax().taglvl())
814 message(ParserMessages::taglvlOpenElements
, NumberMessageArg(syntax().taglvl()));
815 noteStartElement(event
->included());
816 if (event
->mustOmitEnd()) {
817 if (sd().emptyElementNormal()) {
818 Boolean included
= event
->included();
819 Location
loc(event
->location());
820 eventHandler().startElement(event
);
821 endTagEmptyElement(e
, netEnabling
, included
, loc
);
825 = new (eventAllocator()) EndElementEvent(e
,
829 if (event
->included()) {
835 eventHandler().startElement(event
);
836 eventHandler().endElement(end
);
840 const ShortReferenceMap
*map
= e
->map();
842 map
= currentElement().map();
843 pushElement(new (internalAllocator()) OpenElement(e
,
848 // Can't access event after it's passed to the event handler.
849 eventHandler().startElement(event
);
853 void Parser::endTagEmptyElement(const ElementType
*e
,
856 const Location
&startLoc
)
858 Token token
= getToken(netEnabling
? econnetMode
: econMode
);
862 Markup
*markup
= startMarkup(eventsWanted().wantInstanceMarkup(),
865 markup
->addDelim(Syntax::dNET
);
867 = new (eventAllocator()) EndElementEvent(e
,
873 eventHandler().endElement(end
);
874 noteEndElement(included
);
880 if (options().warnEmptyTag
)
881 message(ParserMessages::emptyEndTag
);
882 Markup
*markup
= startMarkup(eventsWanted().wantInstanceMarkup(),
885 markup
->addDelim(Syntax::dETAGO
);
886 markup
->addDelim(Syntax::dTAGC
);
889 = new (eventAllocator()) EndElementEvent(e
,
895 eventHandler().endElement(end
);
896 noteEndElement(included
);
899 case tokenEtagoNameStart
:
901 EndElementEvent
*end
= parseEndTag();
902 if (end
->elementType() == e
) {
905 eventHandler().endElement(end
);
906 noteEndElement(included
);
909 if (!elementIsOpen(end
->elementType())) {
910 message(ParserMessages::elementNotOpen
,
911 StringMessageArg(end
->elementType()->name()));
915 implyEmptyElementEnd(e
, included
, startLoc
);
922 implyEmptyElementEnd(e
, included
, startLoc
);
923 currentInput()->ungetToken();
926 void Parser::implyEmptyElementEnd(const ElementType
*e
,
928 const Location
&startLoc
)
931 message(ParserMessages::omitEndTagOmittag
,
932 StringMessageArg(e
->name()),
935 const ElementDefinition
*def
= e
->definition();
936 if (def
&& !def
->canOmitEndTag())
937 message(ParserMessages::omitEndTagDeclare
,
938 StringMessageArg(e
->name()),
942 = new (eventAllocator()) EndElementEvent(e
,
948 noteEndElement(included
);
949 eventHandler().endElement(end
);
952 void Parser::pushElementCheck(const ElementType
*e
, StartElementEvent
*event
,
953 IList
<Undo
> &undoList
,
954 IList
<Event
> &eventList
)
956 if (tagLevel() == syntax().taglvl())
957 message(ParserMessages::taglvlOpenElements
, NumberMessageArg(syntax().taglvl()));
958 eventList
.insert(event
);
959 if (event
->mustOmitEnd()) {
961 = new (eventAllocator()) EndElementEvent(e
,
965 if (event
->included())
967 eventList
.insert(end
);
970 undoList
.insert(new (internalAllocator()) UndoStartTag
);
971 const ShortReferenceMap
*map
= e
->map();
973 map
= currentElement().map();
974 pushElement(new (internalAllocator()) OpenElement(e
,
982 EndElementEvent
*Parser::parseEndTag()
984 Markup
*markup
= startMarkup(eventsWanted().wantInstanceMarkup(),
986 currentInput()->discardInitial();
987 extendNameToken(syntax().namelen(), ParserMessages::nameLength
);
989 markup
->addDelim(Syntax::dETAGO
);
990 markup
->addName(currentInput());
992 StringC
&name
= nameBuffer();
993 getCurrentToken(syntax().generalSubstTable(), name
);
994 const ElementType
*e
= currentDtd().lookupElementType(name
);
997 e
= completeRankStem(name
);
1000 e
= lookupCreateUndefinedElement(name
, currentLocation(), currentDtdNonConst());
1002 return new (eventAllocator())
1004 currentDtdPointer(),
1009 void Parser::parseEndTagClose()
1012 Token token
= getToken(tagMode
);
1014 case tokenUnrecognized
:
1015 if (!reportNonSgmlCharacter())
1016 message(ParserMessages::endTagCharacter
, StringMessageArg(currentToken()));
1019 message(ParserMessages::endTagEntityEnd
);
1023 if (!sd().endTagUnclosed())
1024 message(ParserMessages::unclosedEndTagShorttag
);
1025 currentInput()->ungetToken();
1028 if (currentMarkup())
1029 currentMarkup()->addDelim(Syntax::dTAGC
);
1032 if (currentMarkup())
1033 currentMarkup()->addS(currentChar());
1036 message(ParserMessages::endTagInvalidToken
,
1037 TokenMessageArg(token
, tagMode
, syntaxPointer(), sdPointer()));
1043 void Parser::parseEmptyEndTag()
1045 if (options().warnEmptyTag
)
1046 message(ParserMessages::emptyEndTag
);
1047 // FIXME what to do if not in base
1048 if (tagLevel() == 0)
1049 message(ParserMessages::emptyEndTagNoOpenElements
);
1051 Markup
*markup
= startMarkup(eventsWanted().wantInstanceMarkup(),
1054 markup
->addDelim(Syntax::dETAGO
);
1055 markup
->addDelim(Syntax::dTAGC
);
1057 acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(),
1058 currentDtdPointer(),
1064 void Parser::parseNullEndTag()
1066 // If a null end tag was recognized, then there must be a net enabling
1067 // element on the stack.
1069 ASSERT(tagLevel() > 0);
1070 if (currentElement().netEnabling())
1072 if (!currentElement().isFinished() && validate())
1073 message(ParserMessages::elementNotFinished
,
1074 StringMessageArg(currentElement().type()->name()));
1075 implyCurrentElementEnd(currentLocation());
1077 if (!currentElement().isFinished() && validate())
1078 message(ParserMessages::elementEndTagNotFinished
,
1079 StringMessageArg(currentElement().type()->name()));
1080 Markup
*markup
= startMarkup(eventsWanted().wantInstanceMarkup(),
1083 markup
->addDelim(Syntax::dNET
);
1084 acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(),
1085 currentDtdPointer(),
1090 void Parser::endAllElements()
1092 while (tagLevel() > 0) {
1093 if (!currentElement().isFinished())
1094 message(ParserMessages::elementNotFinishedDocumentEnd
,
1095 StringMessageArg(currentElement().type()->name()));
1096 implyCurrentElementEnd(currentLocation());
1098 if (!currentElement().isFinished() && validate())
1099 message(ParserMessages::noDocumentElement
);
1102 void Parser::acceptEndTag(EndElementEvent
*event
)
1104 const ElementType
*e
= event
->elementType();
1105 if (!elementIsOpen(e
)) {
1106 message(ParserMessages::elementNotOpen
, StringMessageArg(e
->name()));
1111 if (currentElement().type() == e
)
1113 if (!currentElement().isFinished() && validate())
1114 message(ParserMessages::elementNotFinished
,
1115 StringMessageArg(currentElement().type()->name()));
1116 implyCurrentElementEnd(event
->location());
1118 if (!currentElement().isFinished() && validate())
1119 message(ParserMessages::elementEndTagNotFinished
,
1120 StringMessageArg(currentElement().type()->name()));
1121 if (currentElement().included())
1122 event
->setIncluded();
1123 noteEndElement(event
->included());
1124 eventHandler().endElement(event
);
1128 void Parser::implyCurrentElementEnd(const Location
&loc
)
1130 if (!sd().omittag())
1131 message(ParserMessages::omitEndTagOmittag
,
1132 StringMessageArg(currentElement().type()->name()),
1133 currentElement().startLocation());
1135 const ElementDefinition
*def
= currentElement().type()->definition();
1136 if (def
&& !def
->canOmitEndTag())
1137 message(ParserMessages::omitEndTagDeclare
,
1138 StringMessageArg(currentElement().type()->name()),
1139 currentElement().startLocation());
1141 EndElementEvent
*event
1142 = new (eventAllocator()) EndElementEvent(currentElement().type(),
1143 currentDtdPointer(),
1146 if (currentElement().included())
1147 event
->setIncluded();
1148 noteEndElement(event
->included());
1149 eventHandler().endElement(event
);
1153 void Parser::extendData()
1155 XcharMap
<PackedBoolean
> isNormal(normalMap());
1156 InputSource
*in
= currentInput();
1157 size_t length
= in
->currentTokenLength();
1158 // This is one of the parser's inner loops, so it needs to be fast.
1159 while (isNormal
[in
->tokenChar(messenger())])
1161 in
->endToken(length
);
1164 void Parser::extendContentS()
1166 InputSource
*in
= currentInput();
1167 size_t length
= in
->currentTokenLength();
1168 XcharMap
<PackedBoolean
> isNormal(normalMap());
1170 Xchar ch
= in
->tokenChar(messenger());
1171 if (!syntax().isS(ch
) || !isNormal
[ch
])
1175 in
->endToken(length
);
1178 void Parser::handleBadStartTag(const ElementType
*e
,
1179 StartElementEvent
*event
,
1180 Boolean netEnabling
)
1182 IList
<Undo
> undoList
;
1183 IList
<Event
> eventList
;
1186 Vector
<const ElementType
*> missing
;
1187 findMissingTag(e
, missing
);
1188 if (missing
.size() == 1) {
1189 queueElementEvents(eventList
);
1190 const ElementType
*m
= missing
[0];
1191 message(ParserMessages::missingElementInferred
,
1192 StringMessageArg(e
->name()),
1193 StringMessageArg(m
->name()));
1194 AttributeList
*attributes
1195 = allocAttributeList(m
->attributeDef(), 1);
1196 // this will give an error if the element has a required attribute
1197 attributes
->finish(*this);
1198 StartElementEvent
*inferEvent
1199 = new (eventAllocator()) StartElementEvent(m
,
1200 currentDtdPointer(),
1204 if (!currentElement().tryTransition(m
))
1205 inferEvent
->setIncluded();
1206 pushElementCheck(m
, inferEvent
, 0);
1207 if (!currentElement().tryTransition(e
))
1208 event
->setIncluded();
1209 pushElementCheck(e
, event
, netEnabling
);
1212 if (missing
.size() > 0) {
1213 queueElementEvents(eventList
);
1214 Vector
<StringC
> missingNames
;
1215 for (size_t i
= 0; i
< missing
.size(); i
++)
1216 missingNames
.push_back(missing
[i
]->name());
1217 message(ParserMessages::missingElementMultiple
,
1218 StringMessageArg(e
->name()),
1219 StringVectorMessageArg(missingNames
));
1220 pushElementCheck(e
, event
, netEnabling
);
1224 || !currentElement().isFinished()
1226 || !currentElement().type()->definition()->canOmitEndTag())
1228 EndElementEvent
*endEvent
1229 = new (eventAllocator()) EndElementEvent(currentElement().type(),
1230 currentDtdPointer(),
1233 eventList
.insert(endEvent
);
1234 undoList
.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
1236 discardKeptMessages();
1238 message(ParserMessages::elementNotAllowed
, StringMessageArg(e
->name()));
1239 // If element couldn't occur because it was excluded, then
1240 // do the transition here.
1241 (void)currentElement().tryTransition(e
);
1242 pushElementCheck(e
, event
, netEnabling
);
1245 void Parser::findMissingTag(const ElementType
*e
,
1246 Vector
<const ElementType
*> &v
)
1248 if (!currentElement().currentPosition()) {
1250 v
.push_back((const ElementType
*)0);
1253 if (elementIsExcluded(e
))
1256 currentElement().matchState().possibleTransitions(v
);
1257 // FIXME also get currentInclusions
1258 for (size_t i
= 0; i
< v
.size(); i
++) {
1259 if (v
[i
] && !elementIsExcluded(v
[i
])) {
1260 Boolean success
= 0;
1261 switch (v
[i
]->definition()->declaredContent()) {
1262 case ElementDefinition::modelGroup
:
1264 const CompiledModelGroup
*grp
1265 = v
[i
]->definition()->compiledModelGroup();
1266 MatchState
state(grp
);
1268 if (state
.tryTransitionPcdata())
1272 if (state
.tryTransition(e
))
1275 for (size_t j
= 0; j
< v
[i
]->definition()->nInclusions(); j
++)
1276 if (v
[i
]->definition()->inclusion(j
) == e
) {
1282 for (size_t j
= 0; j
< v
[i
]->definition()->nExclusions(); j
++)
1283 if (v
[i
]->definition()->exclusion(j
) == e
) {
1292 case ElementDefinition::any
:
1296 case ElementDefinition::cdata
:
1297 case ElementDefinition::rcdata
:
1305 v
[newSize
++] = v
[i
];
1309 // Sort them according to the order of their occurrence in the DTD.
1310 // Do an insertion sort.
1311 for (size_t i
= 1; i
< v
.size(); i
++) {
1312 const ElementType
*tem
= v
[i
];
1314 for (j
= i
; j
> 0 && v
[j
- 1]->index() > tem
->index(); j
--)
1321 // This produces messages that are too verbose
1322 // This doesn't try to be very efficient.
1325 void Parser::getAllowedElementTypes(Vector
<const ElementType
*> &v
)
1328 // FIXME get a list of all inclusions first
1329 // getCurrentInclusions(v);
1330 // x says whether each element of v was excluded
1331 Vector
<PackedBoolean
> x(v
.size(), 0);
1332 unsigned startImpliedCount
= 0;
1333 IList
<Undo
> undoList
;
1335 if (currentElement().currentPosition()) {
1336 // have a model group
1337 size_t i
= v
.size();
1338 currentElement().matchState().possibleTransitions(v
);
1340 for (size_t j
= i
; j
< v
.size(); j
++)
1341 x
[j
] = (v
[j
] && elementIsExcluded(v
[j
]));
1342 if (!sd().omittag())
1344 // Try to imply a tag
1345 if (currentElement().isFinished()) {
1346 if (tagLevel() == 0)
1348 if (startImpliedCount
)
1350 const ElementDefinition
*def
= currentElement().type()->definition();
1351 if (def
&& def
->canOmitEndTag())
1352 undoList
.insert(new (internalAllocator())
1353 UndoEndTag(popSaveElement()));
1358 const LeafContentToken
*token
= currentElement().impliedStartTag();
1361 const ElementType
*e
= token
->elementType();
1362 if (elementIsExcluded(e
))
1364 const ElementDefinition
*def
= e
->definition();
1367 || (def
->declaredContent() != ElementDefinition::modelGroup
1368 && def
->declaredContent() != ElementDefinition::any
)
1369 || !def
->canOmitStartTag())
1371 undoList
.insert(new (internalAllocator()) UndoStartTag
);
1372 startImpliedCount
++;
1373 pushElement(new (internalAllocator()) OpenElement(e
,
1378 if (checkImplyLoop(startImpliedCount
))
1380 for (size_t i
= 0; i
< def
->nInclusions(); i
++)
1381 if (!elementIsExcluded(def
->inclusion(i
))) {
1382 v
.push_back(def
->inclusion(i
));
1388 // must be allowed #pcdata
1389 v
.push_back((const ElementType
*)0);
1390 x
.push_back((PackedBoolean
)0);
1395 // Remove exclusions and duplicates and undefined
1397 for (size_t i
= 0; i
< v
.size(); i
++)
1398 if (!x
[i
] && (!v
[i
] || !v
[i
]->definition()->undefined())) {
1400 for (size_t j
= 0; j
< newSize
; j
++)
1406 v
[newSize
++] = v
[i
];