3065 some functions in the tcp module can be static
[unleashed.git] / usr / src / cmd / man / src / util / nsgmls.src / lib / parseInstance.cxx
blob221afd9b918e199627ca256cb2b0e0da663fbd8d
1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident "%Z%%M% %I% %E% SMI"
5 #include "splib.h"
6 #include "Parser.h"
7 #include "ParserMessages.h"
8 #include "MessageArg.h"
9 #include "TokenMessageArg.h"
10 #include "StringVectorMessageArg.h"
11 #include "token.h"
12 #include "macros.h"
14 #ifdef SP_NAMESPACE
15 namespace SP_NAMESPACE {
16 #endif
18 void Parser::doInstanceStart()
20 if (cancelled()) {
21 allDone();
22 return;
24 // FIXME check here that we have a valid dtd
25 compileInstanceModes();
26 setPhase(contentPhase);
27 Token token = getToken(currentMode());
28 switch (token) {
29 case tokenEe:
30 case tokenStagoNameStart:
31 case tokenStagoTagc:
32 case tokenStagoGrpo:
33 case tokenEtagoNameStart:
34 case tokenEtagoTagc:
35 case tokenEtagoGrpo:
36 break;
37 default:
38 if (sd().omittag()) {
39 unsigned startImpliedCount = 0;
40 unsigned attributeListIndex = 0;
41 IList<Undo> undoList;
42 IList<Event> eventList;
43 if (!tryImplyTag(currentLocation(),
44 startImpliedCount,
45 attributeListIndex,
46 undoList,
47 eventList))
48 CANNOT_HAPPEN();
49 queueElementEvents(eventList);
51 else
52 message(ParserMessages::instanceStartOmittag);
54 currentInput()->ungetToken();
57 void Parser::endInstance()
59 // Do checking before popping entity stack so that there's a
60 // current location for error messages.
61 endAllElements();
62 while (markedSectionLevel() > 0) {
63 message(ParserMessages::unclosedMarkedSection,
64 currentMarkedSectionStartLocation());
65 endMarkedSection();
67 checkIdrefs();
68 popInputStack();
69 allDone();
72 void Parser::checkIdrefs()
74 IdTableIter iter(idTableIter());
75 Id *id;
76 while ((id = iter.next()) != 0) {
77 for (size_t i = 0; i < id->pendingRefs().size(); i++) {
78 Messenger::setNextLocation(id->pendingRefs()[i]);
79 message(ParserMessages::missingId, StringMessageArg(id->name()));
84 void Parser::doContent()
86 do {
87 if (cancelled()) {
88 allDone();
89 return;
91 Token token = getToken(currentMode());
92 switch (token) {
93 case tokenEe:
94 if (inputLevel() == 1) {
95 endInstance();
96 return;
98 if (inputLevel() == specialParseInputLevel()) {
99 // FIXME have separate messages for each type of special parse
100 // perhaps force end of marked section or element
101 message(ParserMessages::specialParseEntityEnd);
103 if (eventsWanted().wantInstanceMarkup())
104 eventHandler().entityEnd(new (eventAllocator())
105 EntityEndEvent(currentLocation()));
106 if (afterDocumentElement())
107 message(ParserMessages::afterDocumentElementEntityEnd);
108 if (sd().integrallyStored()
109 && tagLevel()
110 && currentElement().index() != currentInputElementIndex())
111 message(ParserMessages::contentAsyncEntityRef);
112 popInputStack();
113 break;
114 case tokenCroDigit:
115 case tokenHcroHexDigit:
117 if (afterDocumentElement())
118 message(ParserMessages::characterReferenceAfterDocumentElement);
119 Char ch;
120 Location loc;
121 if (parseNumericCharRef(token == tokenHcroHexDigit, ch, loc)) {
122 acceptPcdata(loc);
123 noteData();
124 Boolean isSgmlChar;
125 if (!translateNumericCharRef(ch, isSgmlChar))
126 break;
127 if (!isSgmlChar) {
128 eventHandler().nonSgmlChar(new (eventAllocator())
129 NonSgmlCharEvent(ch, loc));
130 break;
132 eventHandler().data(new (eventAllocator())
133 ImmediateDataEvent(Event::characterData,
134 &ch, 1, loc, 1));
135 break;
138 break;
139 case tokenCroNameStart:
140 if (afterDocumentElement())
141 message(ParserMessages::characterReferenceAfterDocumentElement);
142 parseNamedCharRef();
143 break;
144 case tokenEroGrpo:
145 case tokenEroNameStart:
147 if (afterDocumentElement())
148 message(ParserMessages::entityReferenceAfterDocumentElement);
149 ConstPtr<Entity> entity;
150 Ptr<EntityOrigin> origin;
151 if (parseEntityReference(0, token == tokenEroGrpo, entity, origin)) {
152 if (!entity.isNull()) {
153 if (entity->isCharacterData())
154 acceptPcdata(Location(origin.pointer(), 0));
155 if (inputLevel() == specialParseInputLevel())
156 entity->rcdataReference(*this, origin);
157 else
158 entity->contentReference(*this, origin);
162 break;
163 case tokenEtagoNameStart:
164 acceptEndTag(parseEndTag());
165 break;
166 case tokenEtagoTagc:
167 parseEmptyEndTag();
168 break;
169 case tokenEtagoGrpo:
170 parseGroupEndTag();
171 break;
172 case tokenMdoNameStart:
173 if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()))
174 currentMarkup()->addDelim(Syntax::dMDO);
175 Syntax::ReservedName name;
176 Boolean result;
177 unsigned startLevel;
178 startLevel = inputLevel();
179 if (parseDeclarationName(&name)) {
180 switch (name) {
181 case Syntax::rUSEMAP:
182 if (afterDocumentElement())
183 message(ParserMessages::declarationAfterDocumentElement,
184 StringMessageArg(syntax().reservedName(name)));
185 result = parseUsemapDecl();
186 break;
187 case Syntax::rUSELINK:
188 if (afterDocumentElement())
189 message(ParserMessages::declarationAfterDocumentElement,
190 StringMessageArg(syntax().reservedName(name)));
191 result = parseUselinkDecl();
192 break;
193 case Syntax::rDOCTYPE:
194 case Syntax::rLINKTYPE:
195 case Syntax::rELEMENT:
196 case Syntax::rATTLIST:
197 case Syntax::rENTITY:
198 case Syntax::rNOTATION:
199 case Syntax::rSHORTREF:
200 case Syntax::rLINK:
201 case Syntax::rIDLINK:
202 message(ParserMessages::instanceDeclaration,
203 StringMessageArg(syntax().reservedName(name)));
204 result = 0;
205 break;
206 default:
207 message(ParserMessages::noSuchDeclarationType,
208 StringMessageArg(syntax().reservedName(name)));
209 result = 0;
210 break;
213 else
214 result = 0;
215 if (!result)
216 skipDeclaration(startLevel);
217 noteMarkup();
218 break;
219 case tokenMdoMdc:
220 // empty comment
221 emptyCommentDecl();
222 noteMarkup();
223 break;
224 case tokenMdoCom:
225 parseCommentDecl();
226 noteMarkup();
227 break;
228 case tokenMdoDso:
229 if (afterDocumentElement())
230 message(ParserMessages::markedSectionAfterDocumentElement);
231 parseMarkedSectionDeclStart();
232 noteMarkup();
233 break;
234 case tokenMscMdc:
235 handleMarkedSectionEnd();
236 noteMarkup();
237 break;
238 case tokenNet:
239 parseNullEndTag();
240 break;
241 case tokenPio:
242 parseProcessingInstruction();
243 break;
244 case tokenStagoNameStart:
245 parseStartTag();
246 break;
247 case tokenStagoTagc:
248 parseEmptyStartTag();
249 break;
250 case tokenStagoGrpo:
251 parseGroupStartTag();
252 break;
253 case tokenRe:
254 acceptPcdata(currentLocation());
255 queueRe(currentLocation());
256 break;
257 case tokenRs:
258 acceptPcdata(currentLocation());
259 noteRs();
260 if (eventsWanted().wantInstanceMarkup())
261 eventHandler().ignoredRs(new (eventAllocator())
262 IgnoredRsEvent(currentChar(),
263 currentLocation()));
264 break;
265 case tokenS:
266 extendContentS();
267 if (eventsWanted().wantInstanceMarkup())
268 eventHandler().sSep(new (eventAllocator())
269 SSepEvent(currentInput()->currentTokenStart(),
270 currentInput()->currentTokenLength(),
271 currentLocation(),
272 0));
273 break;
274 case tokenIgnoredChar:
275 extendData();
276 if (eventsWanted().wantMarkedSections())
277 eventHandler().ignoredChars(new (eventAllocator())
278 IgnoredCharsEvent(currentInput()->currentTokenStart(),
279 currentInput()->currentTokenLength(),
280 currentLocation(),
281 0));
282 break;
283 case tokenUnrecognized:
284 reportNonSgmlCharacter();
285 parsePcdata();
286 break;
287 case tokenCharDelim:
288 message(ParserMessages::dataCharDelim,
289 StringMessageArg(StringC(currentInput()->currentTokenStart(),
290 currentInput()->currentTokenLength())));
291 // fall through
292 case tokenChar:
293 parsePcdata();
294 break;
295 default:
296 ASSERT(token >= tokenFirstShortref);
297 handleShortref(token - tokenFirstShortref);
298 break;
300 } while (eventQueueEmpty());
303 void Parser::skipDeclaration(unsigned startLevel)
305 const unsigned skipMax = 250;
306 unsigned skipCount = 0;
307 for (;;) {
308 Token token = getToken(mdMode);
309 if (inputLevel() == startLevel)
310 skipCount++;
311 switch (token) {
312 case tokenUnrecognized:
313 (void)getChar();
314 break;
315 case tokenEe:
316 if (inputLevel() <= startLevel)
317 return;
318 popInputStack();
319 return;
320 case tokenMdc:
321 if (inputLevel() == startLevel)
322 return;
323 break;
324 case tokenS:
325 if (inputLevel() == startLevel && skipCount >= skipMax
326 && currentChar() == syntax().standardFunction(Syntax::fRE))
327 return;
328 break;
329 default:
330 break;
335 void Parser::handleShortref(int index)
337 const ConstPtr<Entity> &entity
338 = currentElement().map()->entity(index);
339 if (!entity.isNull()) {
340 Owner<Markup> markupPtr;
341 if (eventsWanted().wantInstanceMarkup()) {
342 markupPtr = new Markup;
343 markupPtr->addShortref(currentInput());
345 Ptr<EntityOrigin> origin
346 = EntityOrigin::make(internalAllocator(),
347 entity,
348 currentLocation(),
349 currentInput()->currentTokenLength(),
350 markupPtr);
351 entity->contentReference(*this, origin);
352 return;
354 InputSource *in = currentInput();
355 size_t length = in->currentTokenLength();
356 const Char *s = in->currentTokenStart();
357 size_t i = 0;
358 if (currentMode() == econMode || currentMode() == econnetMode) {
359 // FIXME do this in advance (what about B sequence?)
360 for (i = 0; i < length && syntax().isS(s[i]); i++)
362 if (i > 0 && eventsWanted().wantInstanceMarkup())
363 eventHandler().sSep(new (eventAllocator())
364 SSepEvent(s, i, currentLocation(), 0));
366 if (i < length) {
367 Location location(currentLocation());
368 location += i;
369 s += i;
370 length -= i;
371 acceptPcdata(location);
372 if (sd().keeprsre()) {
373 noteData();
374 eventHandler().data(new (eventAllocator())
375 ImmediateDataEvent(Event::characterData, s, length,
376 location, 0));
377 return;
379 // FIXME speed this up
380 for (; length > 0; location += 1, length--, s++) {
381 if (*s == syntax().standardFunction(Syntax::fRS)) {
382 noteRs();
383 if (eventsWanted().wantInstanceMarkup())
384 eventHandler().ignoredRs(new (eventAllocator())
385 IgnoredRsEvent(*s, location));
387 else if (*s == syntax().standardFunction(Syntax::fRE))
388 queueRe(location);
389 else {
390 noteData();
391 eventHandler().data(new (eventAllocator())
392 ImmediateDataEvent(Event::characterData, s, 1,
393 location, 0));
399 void Parser::parsePcdata()
401 extendData();
402 acceptPcdata(currentLocation());
403 noteData();
404 eventHandler().data(new (eventAllocator())
405 ImmediateDataEvent(Event::characterData,
406 currentInput()->currentTokenStart(),
407 currentInput()->currentTokenLength(),
408 currentLocation(),
409 0));
412 void Parser::parseStartTag()
414 InputSource *in = currentInput();
415 Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
416 in->currentLocation());
417 in->discardInitial();
418 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
419 if (markup) {
420 markup->addDelim(Syntax::dSTAGO);
421 markup->addName(in);
423 StringC &name = nameBuffer();
424 getCurrentToken(syntax().generalSubstTable(), name);
425 ElementType *e = currentDtdNonConst().lookupElementType(name);
426 if (sd().rank()) {
427 if (!e)
428 e = completeRankStem(name);
429 else if (e->isRankedElement())
430 handleRankedElement(e);
432 if (!e)
433 e = lookupCreateUndefinedElement(name, currentLocation(), currentDtdNonConst());
434 Boolean netEnabling;
435 AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
436 Token closeToken = getToken(tagMode);
437 if (closeToken == tokenTagc) {
438 if (name.size() > syntax().taglen())
439 checkTaglen(markupLocation().index());
440 attributes->finish(*this);
441 netEnabling = 0;
442 if (markup)
443 markup->addDelim(Syntax::dTAGC);
445 else {
446 in->ungetToken();
447 Ptr<AttributeDefinitionList> newAttDef;
448 if (parseAttributeSpec(0, *attributes, netEnabling, newAttDef)) {
449 // The difference between the indices will be the difference
450 // in offsets plus 1 for each named character reference.
451 if (in->currentLocation().index() - markupLocation().index()
452 > syntax().taglen())
453 checkTaglen(markupLocation().index());
455 else
456 netEnabling = 0;
457 if (!newAttDef.isNull()) {
458 newAttDef->setIndex(currentDtdNonConst().allocAttributeDefinitionListIndex());
459 e->setAttributeDef(newAttDef);
462 acceptStartTag(e,
463 new (eventAllocator())
464 StartElementEvent(e,
465 currentDtdPointer(),
466 attributes,
467 markupLocation(),
468 markup),
469 netEnabling);
472 ElementType *Parser::completeRankStem(const StringC &name)
474 const RankStem *rankStem = currentDtd().lookupRankStem(name);
475 if (rankStem) {
476 StringC name(rankStem->name());
477 if (!appendCurrentRank(name, rankStem))
478 message(ParserMessages::noCurrentRank, StringMessageArg(name));
479 else
480 return currentDtdNonConst().lookupElementType(name);
482 return 0;
485 void Parser::handleRankedElement(const ElementType *e)
487 StringC rankSuffix(e->definition()->rankSuffix());
488 const RankStem *rankStem = e->rankedElementRankStem();
489 for (size_t i = 0; i < rankStem->nDefinitions(); i++) {
490 const ElementDefinition *def = rankStem->definition(i);
491 for (size_t j = 0; j < def->nRankStems(); j++)
492 setCurrentRank(def->rankStem(j), rankSuffix);
496 void Parser::checkTaglen(Index tagStartIndex)
498 const InputSourceOrigin *origin
499 = currentLocation().origin()->asInputSourceOrigin();
500 ASSERT(origin != 0);
501 if (origin->startOffset(currentLocation().index())
502 - origin->startOffset(tagStartIndex
503 + syntax().delimGeneral(Syntax::dSTAGO).size())
504 > syntax().taglen())
505 message(ParserMessages::taglen, NumberMessageArg(syntax().taglen()));
508 void Parser::parseEmptyStartTag()
510 if (options().warnEmptyTag)
511 message(ParserMessages::emptyStartTag);
512 // FIXME error if not in base.
513 const ElementType *e = 0;
514 if (!sd().omittag())
515 e = lastEndedElementType();
516 else if (tagLevel() > 0)
517 e = currentElement().type();
518 if (!e)
519 e = currentDtd().documentElementType();
520 AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
521 attributes->finish(*this);
522 Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
523 currentLocation());
524 if (markup) {
525 markup->addDelim(Syntax::dSTAGO);
526 markup->addDelim(Syntax::dTAGC);
528 acceptStartTag(e,
529 new (eventAllocator())
530 StartElementEvent(e,
531 currentDtdPointer(),
532 attributes,
533 markupLocation(),
534 markup),
538 void Parser::parseGroupStartTag()
540 if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
541 currentMarkup()->addDelim(Syntax::dSTAGO);
542 currentMarkup()->addDelim(Syntax::dGRPO);
544 Boolean active;
545 if (!parseTagNameGroup(active))
546 return;
547 InputSource *in = currentInput();
548 // Location startLocation = in->currentLocation();
549 in->startToken();
550 Xchar c = in->tokenChar(messenger());
551 if (!syntax().isNameStartCharacter(c)) {
552 message(ParserMessages::startTagMissingName);
553 return;
555 in->discardInitial();
556 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
557 if (currentMarkup())
558 currentMarkup()->addName(currentInput());
559 skipAttributeSpec();
560 if (currentMarkup())
561 eventHandler().ignoredMarkup(new (eventAllocator())
562 IgnoredMarkupEvent(markupLocation(),
563 currentMarkup()));
564 noteMarkup();
567 void Parser::parseGroupEndTag()
569 if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
570 currentMarkup()->addDelim(Syntax::dSTAGO);
571 currentMarkup()->addDelim(Syntax::dGRPO);
573 Boolean active;
574 if (!parseTagNameGroup(active))
575 return;
576 InputSource *in = currentInput();
577 // Location startLocation = in->currentLocation();
578 in->startToken();
579 Xchar c = in->tokenChar(messenger());
580 if (!syntax().isNameStartCharacter(c)) {
581 message(ParserMessages::endTagMissingName);
582 return;
584 in->discardInitial();
585 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
586 if (currentMarkup())
587 currentMarkup()->addName(currentInput());
588 parseEndTagClose();
589 if (currentMarkup())
590 eventHandler().ignoredMarkup(new (eventAllocator())
591 IgnoredMarkupEvent(markupLocation(),
592 currentMarkup()));
593 noteMarkup();
596 void Parser::acceptPcdata(const Location &startLocation)
598 if (currentElement().tryTransitionPcdata())
599 return;
600 // Need to test here since implying tags may turn off pcdataRecovering.
601 if (pcdataRecovering())
602 return;
603 IList<Undo> undoList;
604 IList<Event> eventList;
605 unsigned startImpliedCount = 0;
606 unsigned attributeListIndex = 0;
607 keepMessages();
608 while (tryImplyTag(startLocation, startImpliedCount, attributeListIndex,
609 undoList, eventList))
610 if (currentElement().tryTransitionPcdata()) {
611 queueElementEvents(eventList);
612 return;
614 discardKeptMessages();
615 undo(undoList);
616 if (validate() || afterDocumentElement())
617 message(ParserMessages::pcdataNotAllowed);
618 pcdataRecover();
621 void Parser::acceptStartTag(const ElementType *e,
622 StartElementEvent *event,
623 Boolean netEnabling)
625 if (e->definition()->undefined() && !implydefElement())
626 message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
627 if (elementIsExcluded(e)) {
628 keepMessages();
629 if (validate())
630 checkExclusion(e);
632 else {
633 if (currentElement().tryTransition(e)) {
634 pushElementCheck(e, event, netEnabling);
635 return;
637 if (elementIsIncluded(e)) {
638 event->setIncluded();
639 pushElementCheck(e, event, netEnabling);
640 return;
642 keepMessages();
644 IList<Undo> undoList;
645 IList<Event> eventList;
646 unsigned startImpliedCount = 0;
647 unsigned attributeListIndex = 1;
648 while (tryImplyTag(event->location(), startImpliedCount,
649 attributeListIndex, undoList, eventList))
650 if (tryStartTag(e, event, netEnabling, eventList))
651 return;
652 discardKeptMessages();
653 undo(undoList);
654 if (validate() && !e->definition()->undefined())
655 handleBadStartTag(e, event, netEnabling);
656 else {
657 if (validate() ? implydefElement() : afterDocumentElement())
658 message(ParserMessages::elementNotAllowed, StringMessageArg(e->name()));
659 // If element couldn't occur because it was excluded, then
660 // do the transition here.
661 (void)currentElement().tryTransition(e);
662 pushElementCheck(e, event, netEnabling);
666 void Parser::undo(IList<Undo> &undoList)
668 while (!undoList.empty()) {
669 Undo *p = undoList.get();
670 p->undo(this);
671 delete p;
675 void Parser::queueElementEvents(IList<Event> &events)
677 releaseKeptMessages();
678 // FIXME provide IList<T>::reverse function
679 // reverse it
680 IList<Event> tem;
681 while (!events.empty())
682 tem.insert(events.get());
683 while (!tem.empty()) {
684 Event *e = tem.get();
685 if (e->type() == Event::startElement) {
686 noteStartElement(((StartElementEvent *)e)->included());
687 eventHandler().startElement((StartElementEvent *)e);
689 else {
690 noteEndElement(((EndElementEvent *)e)->included());
691 eventHandler().endElement((EndElementEvent *)e);
697 void Parser::checkExclusion(const ElementType *e)
699 const LeafContentToken *token = currentElement().invalidExclusion(e);
700 if (token)
701 message(ParserMessages::invalidExclusion,
702 OrdinalMessageArg(token->typeIndex() + 1),
703 StringMessageArg(token->elementType()->name()),
704 StringMessageArg(currentElement().type()->name()));
707 Boolean Parser::tryStartTag(const ElementType *e,
708 StartElementEvent *event,
709 Boolean netEnabling,
710 IList<Event> &impliedEvents)
712 if (elementIsExcluded(e)) {
713 checkExclusion(e);
714 return 0;
716 if (currentElement().tryTransition(e)) {
717 queueElementEvents(impliedEvents);
718 pushElementCheck(e, event, netEnabling);
719 return 1;
721 if (elementIsIncluded(e)) {
722 queueElementEvents(impliedEvents);
723 event->setIncluded();
724 pushElementCheck(e, event, netEnabling);
725 return 1;
727 return 0;
730 Boolean Parser::tryImplyTag(const Location &loc,
731 unsigned &startImpliedCount,
732 unsigned &attributeListIndex,
733 IList<Undo> &undo,
734 IList<Event> &eventList)
736 if (!sd().omittag())
737 return 0;
738 if (currentElement().isFinished()) {
739 if (tagLevel() == 0)
740 return 0;
741 #if 1
742 const ElementDefinition *def = currentElement().type()->definition();
743 if (def && !def->canOmitEndTag())
744 return 0;
745 #endif
746 // imply an end tag
747 if (startImpliedCount > 0) {
748 message(ParserMessages::startTagEmptyElement,
749 StringMessageArg(currentElement().type()->name()));
750 startImpliedCount--;
752 #if 0
753 const ElementDefinition *def = currentElement().type()->definition();
754 if (def && !def->canOmitEndTag())
755 message(ParserMessages::omitEndTagDeclare,
756 StringMessageArg(currentElement().type()->name()),
757 currentElement().startLocation());
758 #endif
759 EndElementEvent *event
760 = new (eventAllocator()) EndElementEvent(currentElement().type(),
761 currentDtdPointer(),
762 loc,
764 eventList.insert(event);
765 undo.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
766 return 1;
768 const LeafContentToken *token = currentElement().impliedStartTag();
769 if (!token)
770 return 0;
771 const ElementType *e = token->elementType();
772 if (elementIsExcluded(e))
773 message(ParserMessages::requiredElementExcluded,
774 OrdinalMessageArg(token->typeIndex() + 1),
775 StringMessageArg(e->name()),
776 StringMessageArg(currentElement().type()->name()));
777 if (tagLevel() != 0)
778 undo.insert(new (internalAllocator())
779 UndoTransition(currentElement().matchState()));
780 currentElement().doRequiredTransition();
781 const ElementDefinition *def = e->definition();
782 if (def->declaredContent() != ElementDefinition::modelGroup
783 && def->declaredContent() != ElementDefinition::any)
784 message(ParserMessages::omitStartTagDeclaredContent,
785 StringMessageArg(e->name()));
786 if (def->undefined())
787 message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
788 else if (!def->canOmitStartTag())
789 message(ParserMessages::omitStartTagDeclare, StringMessageArg(e->name()));
790 AttributeList *attributes
791 = allocAttributeList(e->attributeDef(),
792 attributeListIndex++);
793 // this will give an error if the element has a required attribute
794 attributes->finish(*this);
795 startImpliedCount++;
796 StartElementEvent *event
797 = new (eventAllocator()) StartElementEvent(e,
798 currentDtdPointer(),
799 attributes,
800 loc,
802 pushElementCheck(e, event, undo, eventList);
803 const int implyCheckLimit = 30; // this is fairly arbitrary
804 if (startImpliedCount > implyCheckLimit
805 && !checkImplyLoop(startImpliedCount))
806 return 0;
807 return 1;
810 void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
811 Boolean netEnabling)
813 if (tagLevel() == syntax().taglvl())
814 message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
815 noteStartElement(event->included());
816 if (event->mustOmitEnd()) {
817 if (sd().emptyElementNormal()) {
818 Boolean included = event->included();
819 Location loc(event->location());
820 eventHandler().startElement(event);
821 endTagEmptyElement(e, netEnabling, included, loc);
823 else {
824 EndElementEvent *end
825 = new (eventAllocator()) EndElementEvent(e,
826 currentDtdPointer(),
827 event->location(),
829 if (event->included()) {
830 end->setIncluded();
831 noteEndElement(1);
833 else
834 noteEndElement(0);
835 eventHandler().startElement(event);
836 eventHandler().endElement(end);
839 else {
840 const ShortReferenceMap *map = e->map();
841 if (!map)
842 map = currentElement().map();
843 pushElement(new (internalAllocator()) OpenElement(e,
844 netEnabling,
845 event->included(),
846 map,
847 event->location()));
848 // Can't access event after it's passed to the event handler.
849 eventHandler().startElement(event);
853 void Parser::endTagEmptyElement(const ElementType *e,
854 Boolean netEnabling,
855 Boolean included,
856 const Location &startLoc)
858 Token token = getToken(netEnabling ? econnetMode : econMode);
859 switch (token) {
860 case tokenNet:
861 if (netEnabling) {
862 Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
863 currentLocation());
864 if (markup)
865 markup->addDelim(Syntax::dNET);
866 EndElementEvent *end
867 = new (eventAllocator()) EndElementEvent(e,
868 currentDtdPointer(),
869 currentLocation(),
870 markup);
871 if (included)
872 end->setIncluded();
873 eventHandler().endElement(end);
874 noteEndElement(included);
875 return;
877 break;
878 case tokenEtagoTagc:
880 if (options().warnEmptyTag)
881 message(ParserMessages::emptyEndTag);
882 Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
883 currentLocation());
884 if (markup) {
885 markup->addDelim(Syntax::dETAGO);
886 markup->addDelim(Syntax::dTAGC);
888 EndElementEvent *end
889 = new (eventAllocator()) EndElementEvent(e,
890 currentDtdPointer(),
891 currentLocation(),
892 markup);
893 if (included)
894 end->setIncluded();
895 eventHandler().endElement(end);
896 noteEndElement(included);
897 return;
899 case tokenEtagoNameStart:
901 EndElementEvent *end = parseEndTag();
902 if (end->elementType() == e) {
903 if (included)
904 end->setIncluded();
905 eventHandler().endElement(end);
906 noteEndElement(included);
907 return;
909 if (!elementIsOpen(end->elementType())) {
910 message(ParserMessages::elementNotOpen,
911 StringMessageArg(end->elementType()->name()));
912 delete end;
913 break;
915 implyEmptyElementEnd(e, included, startLoc);
916 acceptEndTag(end);
917 return;
919 default:
920 break;
922 implyEmptyElementEnd(e, included, startLoc);
923 currentInput()->ungetToken();
926 void Parser::implyEmptyElementEnd(const ElementType *e,
927 Boolean included,
928 const Location &startLoc)
930 if (!sd().omittag())
931 message(ParserMessages::omitEndTagOmittag,
932 StringMessageArg(e->name()),
933 startLoc);
934 else {
935 const ElementDefinition *def = e->definition();
936 if (def && !def->canOmitEndTag())
937 message(ParserMessages::omitEndTagDeclare,
938 StringMessageArg(e->name()),
939 startLoc);
941 EndElementEvent *end
942 = new (eventAllocator()) EndElementEvent(e,
943 currentDtdPointer(),
944 currentLocation(),
946 if (included)
947 end->setIncluded();
948 noteEndElement(included);
949 eventHandler().endElement(end);
952 void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
953 IList<Undo> &undoList,
954 IList<Event> &eventList)
956 if (tagLevel() == syntax().taglvl())
957 message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
958 eventList.insert(event);
959 if (event->mustOmitEnd()) {
960 EndElementEvent *end
961 = new (eventAllocator()) EndElementEvent(e,
962 currentDtdPointer(),
963 event->location(),
965 if (event->included())
966 end->setIncluded();
967 eventList.insert(end);
969 else {
970 undoList.insert(new (internalAllocator()) UndoStartTag);
971 const ShortReferenceMap *map = e->map();
972 if (!map)
973 map = currentElement().map();
974 pushElement(new (internalAllocator()) OpenElement(e,
976 event->included(),
977 map,
978 event->location()));
982 EndElementEvent *Parser::parseEndTag()
984 Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
985 currentLocation());
986 currentInput()->discardInitial();
987 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
988 if (markup) {
989 markup->addDelim(Syntax::dETAGO);
990 markup->addName(currentInput());
992 StringC &name = nameBuffer();
993 getCurrentToken(syntax().generalSubstTable(), name);
994 const ElementType *e = currentDtd().lookupElementType(name);
995 if (sd().rank()) {
996 if (!e)
997 e = completeRankStem(name);
999 if (!e)
1000 e = lookupCreateUndefinedElement(name, currentLocation(), currentDtdNonConst());
1001 parseEndTagClose();
1002 return new (eventAllocator())
1003 EndElementEvent(e,
1004 currentDtdPointer(),
1005 markupLocation(),
1006 markup);
1009 void Parser::parseEndTagClose()
1011 for (;;) {
1012 Token token = getToken(tagMode);
1013 switch (token) {
1014 case tokenUnrecognized:
1015 if (!reportNonSgmlCharacter())
1016 message(ParserMessages::endTagCharacter, StringMessageArg(currentToken()));
1017 return;
1018 case tokenEe:
1019 message(ParserMessages::endTagEntityEnd);
1020 return;
1021 case tokenEtago:
1022 case tokenStago:
1023 if (!sd().endTagUnclosed())
1024 message(ParserMessages::unclosedEndTagShorttag);
1025 currentInput()->ungetToken();
1026 return;
1027 case tokenTagc:
1028 if (currentMarkup())
1029 currentMarkup()->addDelim(Syntax::dTAGC);
1030 return;
1031 case tokenS:
1032 if (currentMarkup())
1033 currentMarkup()->addS(currentChar());
1034 break;
1035 default:
1036 message(ParserMessages::endTagInvalidToken,
1037 TokenMessageArg(token, tagMode, syntaxPointer(), sdPointer()));
1038 return;
1043 void Parser::parseEmptyEndTag()
1045 if (options().warnEmptyTag)
1046 message(ParserMessages::emptyEndTag);
1047 // FIXME what to do if not in base
1048 if (tagLevel() == 0)
1049 message(ParserMessages::emptyEndTagNoOpenElements);
1050 else {
1051 Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
1052 currentLocation());
1053 if (markup) {
1054 markup->addDelim(Syntax::dETAGO);
1055 markup->addDelim(Syntax::dTAGC);
1057 acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(),
1058 currentDtdPointer(),
1059 currentLocation(),
1060 markup));
1064 void Parser::parseNullEndTag()
1066 // If a null end tag was recognized, then there must be a net enabling
1067 // element on the stack.
1068 for (;;) {
1069 ASSERT(tagLevel() > 0);
1070 if (currentElement().netEnabling())
1071 break;
1072 if (!currentElement().isFinished() && validate())
1073 message(ParserMessages::elementNotFinished,
1074 StringMessageArg(currentElement().type()->name()));
1075 implyCurrentElementEnd(currentLocation());
1077 if (!currentElement().isFinished() && validate())
1078 message(ParserMessages::elementEndTagNotFinished,
1079 StringMessageArg(currentElement().type()->name()));
1080 Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
1081 currentLocation());
1082 if (markup)
1083 markup->addDelim(Syntax::dNET);
1084 acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(),
1085 currentDtdPointer(),
1086 currentLocation(),
1087 markup));
1090 void Parser::endAllElements()
1092 while (tagLevel() > 0) {
1093 if (!currentElement().isFinished())
1094 message(ParserMessages::elementNotFinishedDocumentEnd,
1095 StringMessageArg(currentElement().type()->name()));
1096 implyCurrentElementEnd(currentLocation());
1098 if (!currentElement().isFinished() && validate())
1099 message(ParserMessages::noDocumentElement);
1102 void Parser::acceptEndTag(EndElementEvent *event)
1104 const ElementType *e = event->elementType();
1105 if (!elementIsOpen(e)) {
1106 message(ParserMessages::elementNotOpen, StringMessageArg(e->name()));
1107 delete event;
1108 return;
1110 for (;;){
1111 if (currentElement().type() == e)
1112 break;
1113 if (!currentElement().isFinished() && validate())
1114 message(ParserMessages::elementNotFinished,
1115 StringMessageArg(currentElement().type()->name()));
1116 implyCurrentElementEnd(event->location());
1118 if (!currentElement().isFinished() && validate())
1119 message(ParserMessages::elementEndTagNotFinished,
1120 StringMessageArg(currentElement().type()->name()));
1121 if (currentElement().included())
1122 event->setIncluded();
1123 noteEndElement(event->included());
1124 eventHandler().endElement(event);
1125 popElement();
1128 void Parser::implyCurrentElementEnd(const Location &loc)
1130 if (!sd().omittag())
1131 message(ParserMessages::omitEndTagOmittag,
1132 StringMessageArg(currentElement().type()->name()),
1133 currentElement().startLocation());
1134 else {
1135 const ElementDefinition *def = currentElement().type()->definition();
1136 if (def && !def->canOmitEndTag())
1137 message(ParserMessages::omitEndTagDeclare,
1138 StringMessageArg(currentElement().type()->name()),
1139 currentElement().startLocation());
1141 EndElementEvent *event
1142 = new (eventAllocator()) EndElementEvent(currentElement().type(),
1143 currentDtdPointer(),
1144 loc,
1146 if (currentElement().included())
1147 event->setIncluded();
1148 noteEndElement(event->included());
1149 eventHandler().endElement(event);
1150 popElement();
1153 void Parser::extendData()
1155 XcharMap<PackedBoolean> isNormal(normalMap());
1156 InputSource *in = currentInput();
1157 size_t length = in->currentTokenLength();
1158 // This is one of the parser's inner loops, so it needs to be fast.
1159 while (isNormal[in->tokenChar(messenger())])
1160 length++;
1161 in->endToken(length);
1164 void Parser::extendContentS()
1166 InputSource *in = currentInput();
1167 size_t length = in->currentTokenLength();
1168 XcharMap<PackedBoolean> isNormal(normalMap());
1169 for (;;) {
1170 Xchar ch = in->tokenChar(messenger());
1171 if (!syntax().isS(ch) || !isNormal[ch])
1172 break;
1173 length++;
1175 in->endToken(length);
1178 void Parser::handleBadStartTag(const ElementType *e,
1179 StartElementEvent *event,
1180 Boolean netEnabling)
1182 IList<Undo> undoList;
1183 IList<Event> eventList;
1184 keepMessages();
1185 for (;;) {
1186 Vector<const ElementType *> missing;
1187 findMissingTag(e, missing);
1188 if (missing.size() == 1) {
1189 queueElementEvents(eventList);
1190 const ElementType *m = missing[0];
1191 message(ParserMessages::missingElementInferred,
1192 StringMessageArg(e->name()),
1193 StringMessageArg(m->name()));
1194 AttributeList *attributes
1195 = allocAttributeList(m->attributeDef(), 1);
1196 // this will give an error if the element has a required attribute
1197 attributes->finish(*this);
1198 StartElementEvent *inferEvent
1199 = new (eventAllocator()) StartElementEvent(m,
1200 currentDtdPointer(),
1201 attributes,
1202 event->location(),
1204 if (!currentElement().tryTransition(m))
1205 inferEvent->setIncluded();
1206 pushElementCheck(m, inferEvent, 0);
1207 if (!currentElement().tryTransition(e))
1208 event->setIncluded();
1209 pushElementCheck(e, event, netEnabling);
1210 return;
1212 if (missing.size() > 0) {
1213 queueElementEvents(eventList);
1214 Vector<StringC> missingNames;
1215 for (size_t i = 0; i < missing.size(); i++)
1216 missingNames.push_back(missing[i]->name());
1217 message(ParserMessages::missingElementMultiple,
1218 StringMessageArg(e->name()),
1219 StringVectorMessageArg(missingNames));
1220 pushElementCheck(e, event, netEnabling);
1221 return;
1223 if (!sd().omittag()
1224 || !currentElement().isFinished()
1225 || tagLevel() == 0
1226 || !currentElement().type()->definition()->canOmitEndTag())
1227 break;
1228 EndElementEvent *endEvent
1229 = new (eventAllocator()) EndElementEvent(currentElement().type(),
1230 currentDtdPointer(),
1231 event->location(),
1233 eventList.insert(endEvent);
1234 undoList.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
1236 discardKeptMessages();
1237 undo(undoList);
1238 message(ParserMessages::elementNotAllowed, StringMessageArg(e->name()));
1239 // If element couldn't occur because it was excluded, then
1240 // do the transition here.
1241 (void)currentElement().tryTransition(e);
1242 pushElementCheck(e, event, netEnabling);
1245 void Parser::findMissingTag(const ElementType *e,
1246 Vector<const ElementType *> &v)
1248 if (!currentElement().currentPosition()) {
1249 if (!e)
1250 v.push_back((const ElementType *)0);
1251 return;
1253 if (elementIsExcluded(e))
1254 return;
1255 size_t newSize = 0;
1256 currentElement().matchState().possibleTransitions(v);
1257 // FIXME also get currentInclusions
1258 for (size_t i = 0; i < v.size(); i++) {
1259 if (v[i] && !elementIsExcluded(v[i])) {
1260 Boolean success = 0;
1261 switch (v[i]->definition()->declaredContent()) {
1262 case ElementDefinition::modelGroup:
1264 const CompiledModelGroup *grp
1265 = v[i]->definition()->compiledModelGroup();
1266 MatchState state(grp);
1267 if (!e) {
1268 if (state.tryTransitionPcdata())
1269 success = 1;
1271 else {
1272 if (state.tryTransition(e))
1273 success = 1;
1274 if (!success) {
1275 for (size_t j = 0; j < v[i]->definition()->nInclusions(); j++)
1276 if (v[i]->definition()->inclusion(j) == e) {
1277 success = 1;
1278 break;
1281 if (success) {
1282 for (size_t j = 0; j < v[i]->definition()->nExclusions(); j++)
1283 if (v[i]->definition()->exclusion(j) == e) {
1284 success = 0;
1285 break;
1290 break;
1291 #if 0
1292 case ElementDefinition::any:
1293 success = 1;
1294 break;
1295 #endif
1296 case ElementDefinition::cdata:
1297 case ElementDefinition::rcdata:
1298 if (e == 0)
1299 success = 1;
1300 break;
1301 default:
1302 break;
1304 if (success)
1305 v[newSize++] = v[i];
1308 v.resize(newSize);
1309 // Sort them according to the order of their occurrence in the DTD.
1310 // Do an insertion sort.
1311 for (size_t i = 1; i < v.size(); i++) {
1312 const ElementType *tem = v[i];
1313 size_t j;
1314 for (j = i; j > 0 && v[j - 1]->index() > tem->index(); j--)
1315 v[j] = v[j - 1];
1316 v[j] = tem;
1320 #if 0
1321 // This produces messages that are too verbose
1322 // This doesn't try to be very efficient.
1323 // 0 for #pcdata
1325 void Parser::getAllowedElementTypes(Vector<const ElementType *> &v)
1327 v.clear();
1328 // FIXME get a list of all inclusions first
1329 // getCurrentInclusions(v);
1330 // x says whether each element of v was excluded
1331 Vector<PackedBoolean> x(v.size(), 0);
1332 unsigned startImpliedCount = 0;
1333 IList<Undo> undoList;
1334 for (;;) {
1335 if (currentElement().currentPosition()) {
1336 // have a model group
1337 size_t i = v.size();
1338 currentElement().matchState().possibleTransitions(v);
1339 x.resize(v.size());
1340 for (size_t j = i; j < v.size(); j++)
1341 x[j] = (v[j] && elementIsExcluded(v[j]));
1342 if (!sd().omittag())
1343 break;
1344 // Try to imply a tag
1345 if (currentElement().isFinished()) {
1346 if (tagLevel() == 0)
1347 break;
1348 if (startImpliedCount)
1349 break;
1350 const ElementDefinition *def = currentElement().type()->definition();
1351 if (def && def->canOmitEndTag())
1352 undoList.insert(new (internalAllocator())
1353 UndoEndTag(popSaveElement()));
1354 else
1355 break;
1357 else {
1358 const LeafContentToken *token = currentElement().impliedStartTag();
1359 if (!token)
1360 break;
1361 const ElementType *e = token->elementType();
1362 if (elementIsExcluded(e))
1363 break;
1364 const ElementDefinition *def = e->definition();
1365 if (!def
1366 || def->undefined()
1367 || (def->declaredContent() != ElementDefinition::modelGroup
1368 && def->declaredContent() != ElementDefinition::any)
1369 || !def->canOmitStartTag())
1370 break;
1371 undoList.insert(new (internalAllocator()) UndoStartTag);
1372 startImpliedCount++;
1373 pushElement(new (internalAllocator()) OpenElement(e,
1377 Location()));
1378 if (checkImplyLoop(startImpliedCount))
1379 break;
1380 for (size_t i = 0; i < def->nInclusions(); i++)
1381 if (!elementIsExcluded(def->inclusion(i))) {
1382 v.push_back(def->inclusion(i));
1383 x.push_back(0);
1387 else {
1388 // must be allowed #pcdata
1389 v.push_back((const ElementType *)0);
1390 x.push_back((PackedBoolean)0);
1391 break;
1394 undo(undoList);
1395 // Remove exclusions and duplicates and undefined
1396 size_t newSize = 0;
1397 for (size_t i = 0; i < v.size(); i++)
1398 if (!x[i] && (!v[i] || !v[i]->definition()->undefined())) {
1399 Boolean dup = 0;
1400 for (size_t j = 0; j < newSize; j++)
1401 if (v[i] == v[j]) {
1402 dup = 1;
1403 break;
1405 if (!dup)
1406 v[newSize++] = v[i];
1408 v.resize(newSize);
1410 #endif
1412 #ifdef SP_NAMESPACE
1414 #endif