krnl386.exe16: Don't overflow when calculating size.
[wine.git] / libs / xml2 / parser.c
blob794799794f1401c78cfaf9dac4d48acaabf8a1cd
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
28 * See Copyright for the status of this software.
30 * daniel@veillard.com
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
38 #define IN_LIBXML
39 #include "libxml.h"
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/valid.h>
61 #include <libxml/entities.h>
62 #include <libxml/xmlerror.h>
63 #include <libxml/encoding.h>
64 #include <libxml/xmlIO.h>
65 #include <libxml/uri.h>
66 #ifdef LIBXML_CATALOG_ENABLED
67 #include <libxml/catalog.h>
68 #endif
69 #ifdef LIBXML_SCHEMAS_ENABLED
70 #include <libxml/xmlschemastypes.h>
71 #include <libxml/relaxng.h>
72 #endif
74 #include "buf.h"
75 #include "enc.h"
77 struct _xmlStartTag {
78 const xmlChar *prefix;
79 const xmlChar *URI;
80 int line;
81 int nsNr;
84 static void
85 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
87 static xmlParserCtxtPtr
88 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
89 const xmlChar *base, xmlParserCtxtPtr pctx);
91 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
93 static int
94 xmlParseElementStart(xmlParserCtxtPtr ctxt);
96 static void
97 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
99 /************************************************************************
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
103 ************************************************************************/
105 #define XML_MAX_HUGE_LENGTH 1000000000
107 #define XML_PARSER_BIG_ENTITY 1000
108 #define XML_PARSER_LOT_ENTITY 5000
111 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
112 * replacement over the size in byte of the input indicates that you have
113 * and exponential behaviour. A value of 10 correspond to at least 3 entity
114 * replacement per byte of input.
116 #define XML_PARSER_NON_LINEAR 10
119 * xmlParserEntityCheck
121 * Function to check non-linear entity expansion behaviour
122 * This is here to detect and stop exponential linear entity expansion
123 * This is not a limitation of the parser but a safety
124 * boundary feature. It can be disabled with the XML_PARSE_HUGE
125 * parser option.
127 static int
128 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
129 xmlEntityPtr ent, size_t replacement)
131 size_t consumed = 0;
132 int i;
134 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
135 return (0);
136 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
137 return (1);
140 * This may look absurd but is needed to detect
141 * entities problems
143 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
144 (ent->content != NULL) && (ent->checked == 0) &&
145 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
146 unsigned long oldnbent = ctxt->nbentities, diff;
147 xmlChar *rep;
149 ent->checked = 1;
151 ++ctxt->depth;
152 rep = xmlStringDecodeEntities(ctxt, ent->content,
153 XML_SUBSTITUTE_REF, 0, 0, 0);
154 --ctxt->depth;
155 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
156 ent->content[0] = 0;
159 diff = ctxt->nbentities - oldnbent + 1;
160 if (diff > INT_MAX / 2)
161 diff = INT_MAX / 2;
162 ent->checked = diff * 2;
163 if (rep != NULL) {
164 if (xmlStrchr(rep, '<'))
165 ent->checked |= 1;
166 xmlFree(rep);
167 rep = NULL;
172 * Prevent entity exponential check, not just replacement while
173 * parsing the DTD
174 * The check is potentially costly so do that only once in a thousand
176 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
177 (ctxt->nbentities % 1024 == 0)) {
178 for (i = 0;i < ctxt->inputNr;i++) {
179 consumed += ctxt->inputTab[i]->consumed +
180 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
182 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
183 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
184 ctxt->instate = XML_PARSER_EOF;
185 return (1);
187 consumed = 0;
192 if (replacement != 0) {
193 if (replacement < XML_MAX_TEXT_LENGTH)
194 return(0);
197 * If the volume of entity copy reaches 10 times the
198 * amount of parsed data and over the large text threshold
199 * then that's very likely to be an abuse.
201 if (ctxt->input != NULL) {
202 consumed = ctxt->input->consumed +
203 (ctxt->input->cur - ctxt->input->base);
205 consumed += ctxt->sizeentities;
207 if (replacement < XML_PARSER_NON_LINEAR * consumed)
208 return(0);
209 } else if (size != 0) {
211 * Do the check based on the replacement size of the entity
213 if (size < XML_PARSER_BIG_ENTITY)
214 return(0);
217 * A limit on the amount of text data reasonably used
219 if (ctxt->input != NULL) {
220 consumed = ctxt->input->consumed +
221 (ctxt->input->cur - ctxt->input->base);
223 consumed += ctxt->sizeentities;
225 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
226 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
227 return (0);
228 } else if (ent != NULL) {
230 * use the number of parsed entities in the replacement
232 size = ent->checked / 2;
235 * The amount of data parsed counting entities size only once
237 if (ctxt->input != NULL) {
238 consumed = ctxt->input->consumed +
239 (ctxt->input->cur - ctxt->input->base);
241 consumed += ctxt->sizeentities;
244 * Check the density of entities for the amount of data
245 * knowing an entity reference will take at least 3 bytes
247 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
248 return (0);
249 } else {
251 * strange we got no data for checking
253 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
254 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
255 (ctxt->nbentities <= 10000))
256 return (0);
258 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
259 return (1);
263 * xmlParserMaxDepth:
265 * arbitrary depth limit for the XML documents that we allow to
266 * process. This is not a limitation of the parser but a safety
267 * boundary feature. It can be disabled with the XML_PARSE_HUGE
268 * parser option.
270 unsigned int xmlParserMaxDepth = 256;
274 #define SAX2 1
275 #define XML_PARSER_BIG_BUFFER_SIZE 300
276 #define XML_PARSER_BUFFER_SIZE 100
277 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
280 * XML_PARSER_CHUNK_SIZE
282 * When calling GROW that's the minimal amount of data
283 * the parser expected to have received. It is not a hard
284 * limit but an optimization when reading strings like Names
285 * It is not strictly needed as long as inputs available characters
286 * are followed by 0, which should be provided by the I/O level
288 #define XML_PARSER_CHUNK_SIZE 100
291 * List of XML prefixed PI allowed by W3C specs
294 static const char* const xmlW3CPIs[] = {
295 "xml-stylesheet",
296 "xml-model",
297 NULL
301 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
302 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
303 const xmlChar **str);
305 static xmlParserErrors
306 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
307 xmlSAXHandlerPtr sax,
308 void *user_data, int depth, const xmlChar *URL,
309 const xmlChar *ID, xmlNodePtr *list);
311 static int
312 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
313 const char *encoding);
314 #ifdef LIBXML_LEGACY_ENABLED
315 static void
316 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
317 xmlNodePtr lastNode);
318 #endif /* LIBXML_LEGACY_ENABLED */
320 static xmlParserErrors
321 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
322 const xmlChar *string, void *user_data, xmlNodePtr *lst);
324 static int
325 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
327 /************************************************************************
329 * Some factorized error routines *
331 ************************************************************************/
334 * xmlErrAttributeDup:
335 * @ctxt: an XML parser context
336 * @prefix: the attribute prefix
337 * @localname: the attribute localname
339 * Handle a redefinition of attribute error
341 static void
342 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
343 const xmlChar * localname)
345 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
346 (ctxt->instate == XML_PARSER_EOF))
347 return;
348 if (ctxt != NULL)
349 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
351 if (prefix == NULL)
352 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
353 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
354 (const char *) localname, NULL, NULL, 0, 0,
355 "Attribute %s redefined\n", localname);
356 else
357 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
358 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
359 (const char *) prefix, (const char *) localname,
360 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
361 localname);
362 if (ctxt != NULL) {
363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
370 * xmlFatalErr:
371 * @ctxt: an XML parser context
372 * @error: the error number
373 * @extra: extra information string
375 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
377 static void
378 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
380 const char *errmsg;
382 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
383 (ctxt->instate == XML_PARSER_EOF))
384 return;
385 switch (error) {
386 case XML_ERR_INVALID_HEX_CHARREF:
387 errmsg = "CharRef: invalid hexadecimal value";
388 break;
389 case XML_ERR_INVALID_DEC_CHARREF:
390 errmsg = "CharRef: invalid decimal value";
391 break;
392 case XML_ERR_INVALID_CHARREF:
393 errmsg = "CharRef: invalid value";
394 break;
395 case XML_ERR_INTERNAL_ERROR:
396 errmsg = "internal error";
397 break;
398 case XML_ERR_PEREF_AT_EOF:
399 errmsg = "PEReference at end of document";
400 break;
401 case XML_ERR_PEREF_IN_PROLOG:
402 errmsg = "PEReference in prolog";
403 break;
404 case XML_ERR_PEREF_IN_EPILOG:
405 errmsg = "PEReference in epilog";
406 break;
407 case XML_ERR_PEREF_NO_NAME:
408 errmsg = "PEReference: no name";
409 break;
410 case XML_ERR_PEREF_SEMICOL_MISSING:
411 errmsg = "PEReference: expecting ';'";
412 break;
413 case XML_ERR_ENTITY_LOOP:
414 errmsg = "Detected an entity reference loop";
415 break;
416 case XML_ERR_ENTITY_NOT_STARTED:
417 errmsg = "EntityValue: \" or ' expected";
418 break;
419 case XML_ERR_ENTITY_PE_INTERNAL:
420 errmsg = "PEReferences forbidden in internal subset";
421 break;
422 case XML_ERR_ENTITY_NOT_FINISHED:
423 errmsg = "EntityValue: \" or ' expected";
424 break;
425 case XML_ERR_ATTRIBUTE_NOT_STARTED:
426 errmsg = "AttValue: \" or ' expected";
427 break;
428 case XML_ERR_LT_IN_ATTRIBUTE:
429 errmsg = "Unescaped '<' not allowed in attributes values";
430 break;
431 case XML_ERR_LITERAL_NOT_STARTED:
432 errmsg = "SystemLiteral \" or ' expected";
433 break;
434 case XML_ERR_LITERAL_NOT_FINISHED:
435 errmsg = "Unfinished System or Public ID \" or ' expected";
436 break;
437 case XML_ERR_MISPLACED_CDATA_END:
438 errmsg = "Sequence ']]>' not allowed in content";
439 break;
440 case XML_ERR_URI_REQUIRED:
441 errmsg = "SYSTEM or PUBLIC, the URI is missing";
442 break;
443 case XML_ERR_PUBID_REQUIRED:
444 errmsg = "PUBLIC, the Public Identifier is missing";
445 break;
446 case XML_ERR_HYPHEN_IN_COMMENT:
447 errmsg = "Comment must not contain '--' (double-hyphen)";
448 break;
449 case XML_ERR_PI_NOT_STARTED:
450 errmsg = "xmlParsePI : no target name";
451 break;
452 case XML_ERR_RESERVED_XML_NAME:
453 errmsg = "Invalid PI name";
454 break;
455 case XML_ERR_NOTATION_NOT_STARTED:
456 errmsg = "NOTATION: Name expected here";
457 break;
458 case XML_ERR_NOTATION_NOT_FINISHED:
459 errmsg = "'>' required to close NOTATION declaration";
460 break;
461 case XML_ERR_VALUE_REQUIRED:
462 errmsg = "Entity value required";
463 break;
464 case XML_ERR_URI_FRAGMENT:
465 errmsg = "Fragment not allowed";
466 break;
467 case XML_ERR_ATTLIST_NOT_STARTED:
468 errmsg = "'(' required to start ATTLIST enumeration";
469 break;
470 case XML_ERR_NMTOKEN_REQUIRED:
471 errmsg = "NmToken expected in ATTLIST enumeration";
472 break;
473 case XML_ERR_ATTLIST_NOT_FINISHED:
474 errmsg = "')' required to finish ATTLIST enumeration";
475 break;
476 case XML_ERR_MIXED_NOT_STARTED:
477 errmsg = "MixedContentDecl : '|' or ')*' expected";
478 break;
479 case XML_ERR_PCDATA_REQUIRED:
480 errmsg = "MixedContentDecl : '#PCDATA' expected";
481 break;
482 case XML_ERR_ELEMCONTENT_NOT_STARTED:
483 errmsg = "ContentDecl : Name or '(' expected";
484 break;
485 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
486 errmsg = "ContentDecl : ',' '|' or ')' expected";
487 break;
488 case XML_ERR_PEREF_IN_INT_SUBSET:
489 errmsg =
490 "PEReference: forbidden within markup decl in internal subset";
491 break;
492 case XML_ERR_GT_REQUIRED:
493 errmsg = "expected '>'";
494 break;
495 case XML_ERR_CONDSEC_INVALID:
496 errmsg = "XML conditional section '[' expected";
497 break;
498 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
499 errmsg = "Content error in the external subset";
500 break;
501 case XML_ERR_CONDSEC_INVALID_KEYWORD:
502 errmsg =
503 "conditional section INCLUDE or IGNORE keyword expected";
504 break;
505 case XML_ERR_CONDSEC_NOT_FINISHED:
506 errmsg = "XML conditional section not closed";
507 break;
508 case XML_ERR_XMLDECL_NOT_STARTED:
509 errmsg = "Text declaration '<?xml' required";
510 break;
511 case XML_ERR_XMLDECL_NOT_FINISHED:
512 errmsg = "parsing XML declaration: '?>' expected";
513 break;
514 case XML_ERR_EXT_ENTITY_STANDALONE:
515 errmsg = "external parsed entities cannot be standalone";
516 break;
517 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
518 errmsg = "EntityRef: expecting ';'";
519 break;
520 case XML_ERR_DOCTYPE_NOT_FINISHED:
521 errmsg = "DOCTYPE improperly terminated";
522 break;
523 case XML_ERR_LTSLASH_REQUIRED:
524 errmsg = "EndTag: '</' not found";
525 break;
526 case XML_ERR_EQUAL_REQUIRED:
527 errmsg = "expected '='";
528 break;
529 case XML_ERR_STRING_NOT_CLOSED:
530 errmsg = "String not closed expecting \" or '";
531 break;
532 case XML_ERR_STRING_NOT_STARTED:
533 errmsg = "String not started expecting ' or \"";
534 break;
535 case XML_ERR_ENCODING_NAME:
536 errmsg = "Invalid XML encoding name";
537 break;
538 case XML_ERR_STANDALONE_VALUE:
539 errmsg = "standalone accepts only 'yes' or 'no'";
540 break;
541 case XML_ERR_DOCUMENT_EMPTY:
542 errmsg = "Document is empty";
543 break;
544 case XML_ERR_DOCUMENT_END:
545 errmsg = "Extra content at the end of the document";
546 break;
547 case XML_ERR_NOT_WELL_BALANCED:
548 errmsg = "chunk is not well balanced";
549 break;
550 case XML_ERR_EXTRA_CONTENT:
551 errmsg = "extra content at the end of well balanced chunk";
552 break;
553 case XML_ERR_VERSION_MISSING:
554 errmsg = "Malformed declaration expecting version";
555 break;
556 case XML_ERR_NAME_TOO_LONG:
557 errmsg = "Name too long";
558 break;
559 #if 0
560 case:
561 errmsg = "";
562 break;
563 #endif
564 default:
565 errmsg = "Unregistered error message";
567 if (ctxt != NULL)
568 ctxt->errNo = error;
569 if (info == NULL) {
570 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
571 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
572 errmsg);
573 } else {
574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
576 errmsg, info);
578 if (ctxt != NULL) {
579 ctxt->wellFormed = 0;
580 if (ctxt->recovery == 0)
581 ctxt->disableSAX = 1;
586 * xmlFatalErrMsg:
587 * @ctxt: an XML parser context
588 * @error: the error number
589 * @msg: the error message
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 static void LIBXML_ATTR_FORMAT(3,0)
594 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg)
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
603 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
604 if (ctxt != NULL) {
605 ctxt->wellFormed = 0;
606 if (ctxt->recovery == 0)
607 ctxt->disableSAX = 1;
612 * xmlWarningMsg:
613 * @ctxt: an XML parser context
614 * @error: the error number
615 * @msg: the error message
616 * @str1: extra data
617 * @str2: extra data
619 * Handle a warning.
621 static void LIBXML_ATTR_FORMAT(3,0)
622 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
623 const char *msg, const xmlChar *str1, const xmlChar *str2)
625 xmlStructuredErrorFunc schannel = NULL;
627 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
628 (ctxt->instate == XML_PARSER_EOF))
629 return;
630 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
631 (ctxt->sax->initialized == XML_SAX2_MAGIC))
632 schannel = ctxt->sax->serror;
633 if (ctxt != NULL) {
634 __xmlRaiseError(schannel,
635 (ctxt->sax) ? ctxt->sax->warning : NULL,
636 ctxt->userData,
637 ctxt, NULL, XML_FROM_PARSER, error,
638 XML_ERR_WARNING, NULL, 0,
639 (const char *) str1, (const char *) str2, NULL, 0, 0,
640 msg, (const char *) str1, (const char *) str2);
641 } else {
642 __xmlRaiseError(schannel, NULL, NULL,
643 ctxt, NULL, XML_FROM_PARSER, error,
644 XML_ERR_WARNING, NULL, 0,
645 (const char *) str1, (const char *) str2, NULL, 0, 0,
646 msg, (const char *) str1, (const char *) str2);
651 * xmlValidityError:
652 * @ctxt: an XML parser context
653 * @error: the error number
654 * @msg: the error message
655 * @str1: extra data
657 * Handle a validity error.
659 static void LIBXML_ATTR_FORMAT(3,0)
660 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
661 const char *msg, const xmlChar *str1, const xmlChar *str2)
663 xmlStructuredErrorFunc schannel = NULL;
665 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
666 (ctxt->instate == XML_PARSER_EOF))
667 return;
668 if (ctxt != NULL) {
669 ctxt->errNo = error;
670 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
671 schannel = ctxt->sax->serror;
673 if (ctxt != NULL) {
674 __xmlRaiseError(schannel,
675 ctxt->vctxt.error, ctxt->vctxt.userData,
676 ctxt, NULL, XML_FROM_DTD, error,
677 XML_ERR_ERROR, NULL, 0, (const char *) str1,
678 (const char *) str2, NULL, 0, 0,
679 msg, (const char *) str1, (const char *) str2);
680 ctxt->valid = 0;
681 } else {
682 __xmlRaiseError(schannel, NULL, NULL,
683 ctxt, NULL, XML_FROM_DTD, error,
684 XML_ERR_ERROR, NULL, 0, (const char *) str1,
685 (const char *) str2, NULL, 0, 0,
686 msg, (const char *) str1, (const char *) str2);
691 * xmlFatalErrMsgInt:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @val: an integer value
697 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
699 static void LIBXML_ATTR_FORMAT(3,0)
700 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
701 const char *msg, int val)
703 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
704 (ctxt->instate == XML_PARSER_EOF))
705 return;
706 if (ctxt != NULL)
707 ctxt->errNo = error;
708 __xmlRaiseError(NULL, NULL, NULL,
709 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
710 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
711 if (ctxt != NULL) {
712 ctxt->wellFormed = 0;
713 if (ctxt->recovery == 0)
714 ctxt->disableSAX = 1;
719 * xmlFatalErrMsgStrIntStr:
720 * @ctxt: an XML parser context
721 * @error: the error number
722 * @msg: the error message
723 * @str1: an string info
724 * @val: an integer value
725 * @str2: an string info
727 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
729 static void LIBXML_ATTR_FORMAT(3,0)
730 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
731 const char *msg, const xmlChar *str1, int val,
732 const xmlChar *str2)
734 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
735 (ctxt->instate == XML_PARSER_EOF))
736 return;
737 if (ctxt != NULL)
738 ctxt->errNo = error;
739 __xmlRaiseError(NULL, NULL, NULL,
740 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
741 NULL, 0, (const char *) str1, (const char *) str2,
742 NULL, val, 0, msg, str1, val, str2);
743 if (ctxt != NULL) {
744 ctxt->wellFormed = 0;
745 if (ctxt->recovery == 0)
746 ctxt->disableSAX = 1;
751 * xmlFatalErrMsgStr:
752 * @ctxt: an XML parser context
753 * @error: the error number
754 * @msg: the error message
755 * @val: a string value
757 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
759 static void LIBXML_ATTR_FORMAT(3,0)
760 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
761 const char *msg, const xmlChar * val)
763 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
764 (ctxt->instate == XML_PARSER_EOF))
765 return;
766 if (ctxt != NULL)
767 ctxt->errNo = error;
768 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
769 XML_FROM_PARSER, error, XML_ERR_FATAL,
770 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
771 val);
772 if (ctxt != NULL) {
773 ctxt->wellFormed = 0;
774 if (ctxt->recovery == 0)
775 ctxt->disableSAX = 1;
780 * xmlErrMsgStr:
781 * @ctxt: an XML parser context
782 * @error: the error number
783 * @msg: the error message
784 * @val: a string value
786 * Handle a non fatal parser error
788 static void LIBXML_ATTR_FORMAT(3,0)
789 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
790 const char *msg, const xmlChar * val)
792 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
793 (ctxt->instate == XML_PARSER_EOF))
794 return;
795 if (ctxt != NULL)
796 ctxt->errNo = error;
797 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
798 XML_FROM_PARSER, error, XML_ERR_ERROR,
799 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
800 val);
804 * xmlNsErr:
805 * @ctxt: an XML parser context
806 * @error: the error number
807 * @msg: the message
808 * @info1: extra information string
809 * @info2: extra information string
811 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
813 static void LIBXML_ATTR_FORMAT(3,0)
814 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
815 const char *msg,
816 const xmlChar * info1, const xmlChar * info2,
817 const xmlChar * info3)
819 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
820 (ctxt->instate == XML_PARSER_EOF))
821 return;
822 if (ctxt != NULL)
823 ctxt->errNo = error;
824 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
825 XML_ERR_ERROR, NULL, 0, (const char *) info1,
826 (const char *) info2, (const char *) info3, 0, 0, msg,
827 info1, info2, info3);
828 if (ctxt != NULL)
829 ctxt->nsWellFormed = 0;
833 * xmlNsWarn
834 * @ctxt: an XML parser context
835 * @error: the error number
836 * @msg: the message
837 * @info1: extra information string
838 * @info2: extra information string
840 * Handle a namespace warning error
842 static void LIBXML_ATTR_FORMAT(3,0)
843 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
844 const char *msg,
845 const xmlChar * info1, const xmlChar * info2,
846 const xmlChar * info3)
848 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
849 (ctxt->instate == XML_PARSER_EOF))
850 return;
851 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
852 XML_ERR_WARNING, NULL, 0, (const char *) info1,
853 (const char *) info2, (const char *) info3, 0, 0, msg,
854 info1, info2, info3);
857 /************************************************************************
859 * Library wide options *
861 ************************************************************************/
864 * xmlHasFeature:
865 * @feature: the feature to be examined
867 * Examines if the library has been compiled with a given feature.
869 * Returns a non-zero value if the feature exist, otherwise zero.
870 * Returns zero (0) if the feature does not exist or an unknown
871 * unknown feature is requested, non-zero otherwise.
874 xmlHasFeature(xmlFeature feature)
876 switch (feature) {
877 case XML_WITH_THREAD:
878 #ifdef LIBXML_THREAD_ENABLED
879 return(1);
880 #else
881 return(0);
882 #endif
883 case XML_WITH_TREE:
884 #ifdef LIBXML_TREE_ENABLED
885 return(1);
886 #else
887 return(0);
888 #endif
889 case XML_WITH_OUTPUT:
890 #ifdef LIBXML_OUTPUT_ENABLED
891 return(1);
892 #else
893 return(0);
894 #endif
895 case XML_WITH_PUSH:
896 #ifdef LIBXML_PUSH_ENABLED
897 return(1);
898 #else
899 return(0);
900 #endif
901 case XML_WITH_READER:
902 #ifdef LIBXML_READER_ENABLED
903 return(1);
904 #else
905 return(0);
906 #endif
907 case XML_WITH_PATTERN:
908 #ifdef LIBXML_PATTERN_ENABLED
909 return(1);
910 #else
911 return(0);
912 #endif
913 case XML_WITH_WRITER:
914 #ifdef LIBXML_WRITER_ENABLED
915 return(1);
916 #else
917 return(0);
918 #endif
919 case XML_WITH_SAX1:
920 #ifdef LIBXML_SAX1_ENABLED
921 return(1);
922 #else
923 return(0);
924 #endif
925 case XML_WITH_FTP:
926 #ifdef LIBXML_FTP_ENABLED
927 return(1);
928 #else
929 return(0);
930 #endif
931 case XML_WITH_HTTP:
932 #ifdef LIBXML_HTTP_ENABLED
933 return(1);
934 #else
935 return(0);
936 #endif
937 case XML_WITH_VALID:
938 #ifdef LIBXML_VALID_ENABLED
939 return(1);
940 #else
941 return(0);
942 #endif
943 case XML_WITH_HTML:
944 #ifdef LIBXML_HTML_ENABLED
945 return(1);
946 #else
947 return(0);
948 #endif
949 case XML_WITH_LEGACY:
950 #ifdef LIBXML_LEGACY_ENABLED
951 return(1);
952 #else
953 return(0);
954 #endif
955 case XML_WITH_C14N:
956 #ifdef LIBXML_C14N_ENABLED
957 return(1);
958 #else
959 return(0);
960 #endif
961 case XML_WITH_CATALOG:
962 #ifdef LIBXML_CATALOG_ENABLED
963 return(1);
964 #else
965 return(0);
966 #endif
967 case XML_WITH_XPATH:
968 #ifdef LIBXML_XPATH_ENABLED
969 return(1);
970 #else
971 return(0);
972 #endif
973 case XML_WITH_XPTR:
974 #ifdef LIBXML_XPTR_ENABLED
975 return(1);
976 #else
977 return(0);
978 #endif
979 case XML_WITH_XINCLUDE:
980 #ifdef LIBXML_XINCLUDE_ENABLED
981 return(1);
982 #else
983 return(0);
984 #endif
985 case XML_WITH_ICONV:
986 #ifdef LIBXML_ICONV_ENABLED
987 return(1);
988 #else
989 return(0);
990 #endif
991 case XML_WITH_ISO8859X:
992 #ifdef LIBXML_ISO8859X_ENABLED
993 return(1);
994 #else
995 return(0);
996 #endif
997 case XML_WITH_UNICODE:
998 #ifdef LIBXML_UNICODE_ENABLED
999 return(1);
1000 #else
1001 return(0);
1002 #endif
1003 case XML_WITH_REGEXP:
1004 #ifdef LIBXML_REGEXP_ENABLED
1005 return(1);
1006 #else
1007 return(0);
1008 #endif
1009 case XML_WITH_AUTOMATA:
1010 #ifdef LIBXML_AUTOMATA_ENABLED
1011 return(1);
1012 #else
1013 return(0);
1014 #endif
1015 case XML_WITH_EXPR:
1016 #ifdef LIBXML_EXPR_ENABLED
1017 return(1);
1018 #else
1019 return(0);
1020 #endif
1021 case XML_WITH_SCHEMAS:
1022 #ifdef LIBXML_SCHEMAS_ENABLED
1023 return(1);
1024 #else
1025 return(0);
1026 #endif
1027 case XML_WITH_SCHEMATRON:
1028 #ifdef LIBXML_SCHEMATRON_ENABLED
1029 return(1);
1030 #else
1031 return(0);
1032 #endif
1033 case XML_WITH_MODULES:
1034 #ifdef LIBXML_MODULES_ENABLED
1035 return(1);
1036 #else
1037 return(0);
1038 #endif
1039 case XML_WITH_DEBUG:
1040 #ifdef LIBXML_DEBUG_ENABLED
1041 return(1);
1042 #else
1043 return(0);
1044 #endif
1045 case XML_WITH_DEBUG_MEM:
1046 #ifdef DEBUG_MEMORY_LOCATION
1047 return(1);
1048 #else
1049 return(0);
1050 #endif
1051 case XML_WITH_DEBUG_RUN:
1052 #ifdef LIBXML_DEBUG_RUNTIME
1053 return(1);
1054 #else
1055 return(0);
1056 #endif
1057 case XML_WITH_ZLIB:
1058 #ifdef LIBXML_ZLIB_ENABLED
1059 return(1);
1060 #else
1061 return(0);
1062 #endif
1063 case XML_WITH_LZMA:
1064 #ifdef LIBXML_LZMA_ENABLED
1065 return(1);
1066 #else
1067 return(0);
1068 #endif
1069 case XML_WITH_ICU:
1070 #ifdef LIBXML_ICU_ENABLED
1071 return(1);
1072 #else
1073 return(0);
1074 #endif
1075 default:
1076 break;
1078 return(0);
1081 /************************************************************************
1083 * SAX2 defaulted attributes handling *
1085 ************************************************************************/
1088 * xmlDetectSAX2:
1089 * @ctxt: an XML parser context
1091 * Do the SAX2 detection and specific initialization
1093 static void
1094 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1095 xmlSAXHandlerPtr sax;
1097 /* Avoid unused variable warning if features are disabled. */
1098 (void) sax;
1100 if (ctxt == NULL) return;
1101 sax = ctxt->sax;
1102 #ifdef LIBXML_SAX1_ENABLED
1103 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1104 ((sax->startElementNs != NULL) ||
1105 (sax->endElementNs != NULL) ||
1106 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1107 ctxt->sax2 = 1;
1108 #else
1109 ctxt->sax2 = 1;
1110 #endif /* LIBXML_SAX1_ENABLED */
1112 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1113 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1114 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1115 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1116 (ctxt->str_xml_ns == NULL)) {
1117 xmlErrMemory(ctxt, NULL);
1121 typedef struct _xmlDefAttrs xmlDefAttrs;
1122 typedef xmlDefAttrs *xmlDefAttrsPtr;
1123 struct _xmlDefAttrs {
1124 int nbAttrs; /* number of defaulted attributes on that element */
1125 int maxAttrs; /* the size of the array */
1126 #if __STDC_VERSION__ >= 199901L
1127 /* Using a C99 flexible array member avoids UBSan errors. */
1128 const xmlChar *values[]; /* array of localname/prefix/values/external */
1129 #else
1130 const xmlChar *values[5];
1131 #endif
1135 * xmlAttrNormalizeSpace:
1136 * @src: the source string
1137 * @dst: the target string
1139 * Normalize the space in non CDATA attribute values:
1140 * If the attribute type is not CDATA, then the XML processor MUST further
1141 * process the normalized attribute value by discarding any leading and
1142 * trailing space (#x20) characters, and by replacing sequences of space
1143 * (#x20) characters by a single space (#x20) character.
1144 * Note that the size of dst need to be at least src, and if one doesn't need
1145 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1146 * passing src as dst is just fine.
1148 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1149 * is needed.
1151 static xmlChar *
1152 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1154 if ((src == NULL) || (dst == NULL))
1155 return(NULL);
1157 while (*src == 0x20) src++;
1158 while (*src != 0) {
1159 if (*src == 0x20) {
1160 while (*src == 0x20) src++;
1161 if (*src != 0)
1162 *dst++ = 0x20;
1163 } else {
1164 *dst++ = *src++;
1167 *dst = 0;
1168 if (dst == src)
1169 return(NULL);
1170 return(dst);
1174 * xmlAttrNormalizeSpace2:
1175 * @src: the source string
1177 * Normalize the space in non CDATA attribute values, a slightly more complex
1178 * front end to avoid allocation problems when running on attribute values
1179 * coming from the input.
1181 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1182 * is needed.
1184 static const xmlChar *
1185 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1187 int i;
1188 int remove_head = 0;
1189 int need_realloc = 0;
1190 const xmlChar *cur;
1192 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1193 return(NULL);
1194 i = *len;
1195 if (i <= 0)
1196 return(NULL);
1198 cur = src;
1199 while (*cur == 0x20) {
1200 cur++;
1201 remove_head++;
1203 while (*cur != 0) {
1204 if (*cur == 0x20) {
1205 cur++;
1206 if ((*cur == 0x20) || (*cur == 0)) {
1207 need_realloc = 1;
1208 break;
1210 } else
1211 cur++;
1213 if (need_realloc) {
1214 xmlChar *ret;
1216 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1217 if (ret == NULL) {
1218 xmlErrMemory(ctxt, NULL);
1219 return(NULL);
1221 xmlAttrNormalizeSpace(ret, ret);
1222 *len = (int) strlen((const char *)ret);
1223 return(ret);
1224 } else if (remove_head) {
1225 *len -= remove_head;
1226 memmove(src, src + remove_head, 1 + *len);
1227 return(src);
1229 return(NULL);
1233 * xmlAddDefAttrs:
1234 * @ctxt: an XML parser context
1235 * @fullname: the element fullname
1236 * @fullattr: the attribute fullname
1237 * @value: the attribute value
1239 * Add a defaulted attribute for an element
1241 static void
1242 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1243 const xmlChar *fullname,
1244 const xmlChar *fullattr,
1245 const xmlChar *value) {
1246 xmlDefAttrsPtr defaults;
1247 int len;
1248 const xmlChar *name;
1249 const xmlChar *prefix;
1252 * Allows to detect attribute redefinitions
1254 if (ctxt->attsSpecial != NULL) {
1255 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1256 return;
1259 if (ctxt->attsDefault == NULL) {
1260 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1261 if (ctxt->attsDefault == NULL)
1262 goto mem_error;
1266 * split the element name into prefix:localname , the string found
1267 * are within the DTD and then not associated to namespace names.
1269 name = xmlSplitQName3(fullname, &len);
1270 if (name == NULL) {
1271 name = xmlDictLookup(ctxt->dict, fullname, -1);
1272 prefix = NULL;
1273 } else {
1274 name = xmlDictLookup(ctxt->dict, name, -1);
1275 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1279 * make sure there is some storage
1281 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1282 if (defaults == NULL) {
1283 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1284 (4 * 5) * sizeof(const xmlChar *));
1285 if (defaults == NULL)
1286 goto mem_error;
1287 defaults->nbAttrs = 0;
1288 defaults->maxAttrs = 4;
1289 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1290 defaults, NULL) < 0) {
1291 xmlFree(defaults);
1292 goto mem_error;
1294 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1295 xmlDefAttrsPtr temp;
1297 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1298 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1299 if (temp == NULL)
1300 goto mem_error;
1301 defaults = temp;
1302 defaults->maxAttrs *= 2;
1303 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1304 defaults, NULL) < 0) {
1305 xmlFree(defaults);
1306 goto mem_error;
1311 * Split the element name into prefix:localname , the string found
1312 * are within the DTD and hen not associated to namespace names.
1314 name = xmlSplitQName3(fullattr, &len);
1315 if (name == NULL) {
1316 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1317 prefix = NULL;
1318 } else {
1319 name = xmlDictLookup(ctxt->dict, name, -1);
1320 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1323 defaults->values[5 * defaults->nbAttrs] = name;
1324 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1325 /* intern the string and precompute the end */
1326 len = xmlStrlen(value);
1327 value = xmlDictLookup(ctxt->dict, value, len);
1328 defaults->values[5 * defaults->nbAttrs + 2] = value;
1329 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1330 if (ctxt->external)
1331 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1332 else
1333 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1334 defaults->nbAttrs++;
1336 return;
1338 mem_error:
1339 xmlErrMemory(ctxt, NULL);
1340 return;
1344 * xmlAddSpecialAttr:
1345 * @ctxt: an XML parser context
1346 * @fullname: the element fullname
1347 * @fullattr: the attribute fullname
1348 * @type: the attribute type
1350 * Register this attribute type
1352 static void
1353 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1354 const xmlChar *fullname,
1355 const xmlChar *fullattr,
1356 int type)
1358 if (ctxt->attsSpecial == NULL) {
1359 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1360 if (ctxt->attsSpecial == NULL)
1361 goto mem_error;
1364 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1365 return;
1367 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1368 (void *) (ptrdiff_t) type);
1369 return;
1371 mem_error:
1372 xmlErrMemory(ctxt, NULL);
1373 return;
1377 * xmlCleanSpecialAttrCallback:
1379 * Removes CDATA attributes from the special attribute table
1381 static void
1382 xmlCleanSpecialAttrCallback(void *payload, void *data,
1383 const xmlChar *fullname, const xmlChar *fullattr,
1384 const xmlChar *unused ATTRIBUTE_UNUSED) {
1385 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1387 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1388 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1393 * xmlCleanSpecialAttr:
1394 * @ctxt: an XML parser context
1396 * Trim the list of attributes defined to remove all those of type
1397 * CDATA as they are not special. This call should be done when finishing
1398 * to parse the DTD and before starting to parse the document root.
1400 static void
1401 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1403 if (ctxt->attsSpecial == NULL)
1404 return;
1406 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1408 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1409 xmlHashFree(ctxt->attsSpecial, NULL);
1410 ctxt->attsSpecial = NULL;
1412 return;
1416 * xmlCheckLanguageID:
1417 * @lang: pointer to the string value
1419 * Checks that the value conforms to the LanguageID production:
1421 * NOTE: this is somewhat deprecated, those productions were removed from
1422 * the XML Second edition.
1424 * [33] LanguageID ::= Langcode ('-' Subcode)*
1425 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1426 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1427 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1428 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1429 * [38] Subcode ::= ([a-z] | [A-Z])+
1431 * The current REC reference the successors of RFC 1766, currently 5646
1433 * http://www.rfc-editor.org/rfc/rfc5646.txt
1434 * langtag = language
1435 * ["-" script]
1436 * ["-" region]
1437 * *("-" variant)
1438 * *("-" extension)
1439 * ["-" privateuse]
1440 * language = 2*3ALPHA ; shortest ISO 639 code
1441 * ["-" extlang] ; sometimes followed by
1442 * ; extended language subtags
1443 * / 4ALPHA ; or reserved for future use
1444 * / 5*8ALPHA ; or registered language subtag
1446 * extlang = 3ALPHA ; selected ISO 639 codes
1447 * *2("-" 3ALPHA) ; permanently reserved
1449 * script = 4ALPHA ; ISO 15924 code
1451 * region = 2ALPHA ; ISO 3166-1 code
1452 * / 3DIGIT ; UN M.49 code
1454 * variant = 5*8alphanum ; registered variants
1455 * / (DIGIT 3alphanum)
1457 * extension = singleton 1*("-" (2*8alphanum))
1459 * ; Single alphanumerics
1460 * ; "x" reserved for private use
1461 * singleton = DIGIT ; 0 - 9
1462 * / %x41-57 ; A - W
1463 * / %x59-5A ; Y - Z
1464 * / %x61-77 ; a - w
1465 * / %x79-7A ; y - z
1467 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1468 * The parser below doesn't try to cope with extension or privateuse
1469 * that could be added but that's not interoperable anyway
1471 * Returns 1 if correct 0 otherwise
1474 xmlCheckLanguageID(const xmlChar * lang)
1476 const xmlChar *cur = lang, *nxt;
1478 if (cur == NULL)
1479 return (0);
1480 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1481 ((cur[0] == 'I') && (cur[1] == '-')) ||
1482 ((cur[0] == 'x') && (cur[1] == '-')) ||
1483 ((cur[0] == 'X') && (cur[1] == '-'))) {
1485 * Still allow IANA code and user code which were coming
1486 * from the previous version of the XML-1.0 specification
1487 * it's deprecated but we should not fail
1489 cur += 2;
1490 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1491 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1492 cur++;
1493 return(cur[0] == 0);
1495 nxt = cur;
1496 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 nxt++;
1499 if (nxt - cur >= 4) {
1501 * Reserved
1503 if ((nxt - cur > 8) || (nxt[0] != 0))
1504 return(0);
1505 return(1);
1507 if (nxt - cur < 2)
1508 return(0);
1509 /* we got an ISO 639 code */
1510 if (nxt[0] == 0)
1511 return(1);
1512 if (nxt[0] != '-')
1513 return(0);
1515 nxt++;
1516 cur = nxt;
1517 /* now we can have extlang or script or region or variant */
1518 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1519 goto region_m49;
1521 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1522 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1523 nxt++;
1524 if (nxt - cur == 4)
1525 goto script;
1526 if (nxt - cur == 2)
1527 goto region;
1528 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1529 goto variant;
1530 if (nxt - cur != 3)
1531 return(0);
1532 /* we parsed an extlang */
1533 if (nxt[0] == 0)
1534 return(1);
1535 if (nxt[0] != '-')
1536 return(0);
1538 nxt++;
1539 cur = nxt;
1540 /* now we can have script or region or variant */
1541 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1542 goto region_m49;
1544 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1545 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1546 nxt++;
1547 if (nxt - cur == 2)
1548 goto region;
1549 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1550 goto variant;
1551 if (nxt - cur != 4)
1552 return(0);
1553 /* we parsed a script */
1554 script:
1555 if (nxt[0] == 0)
1556 return(1);
1557 if (nxt[0] != '-')
1558 return(0);
1560 nxt++;
1561 cur = nxt;
1562 /* now we can have region or variant */
1563 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1564 goto region_m49;
1566 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1567 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1568 nxt++;
1570 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1571 goto variant;
1572 if (nxt - cur != 2)
1573 return(0);
1574 /* we parsed a region */
1575 region:
1576 if (nxt[0] == 0)
1577 return(1);
1578 if (nxt[0] != '-')
1579 return(0);
1581 nxt++;
1582 cur = nxt;
1583 /* now we can just have a variant */
1584 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1585 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1586 nxt++;
1588 if ((nxt - cur < 5) || (nxt - cur > 8))
1589 return(0);
1591 /* we parsed a variant */
1592 variant:
1593 if (nxt[0] == 0)
1594 return(1);
1595 if (nxt[0] != '-')
1596 return(0);
1597 /* extensions and private use subtags not checked */
1598 return (1);
1600 region_m49:
1601 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1602 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1603 nxt += 3;
1604 goto region;
1606 return(0);
1609 /************************************************************************
1611 * Parser stacks related functions and macros *
1613 ************************************************************************/
1615 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1616 const xmlChar ** str);
1618 #ifdef SAX2
1620 * nsPush:
1621 * @ctxt: an XML parser context
1622 * @prefix: the namespace prefix or NULL
1623 * @URL: the namespace name
1625 * Pushes a new parser namespace on top of the ns stack
1627 * Returns -1 in case of error, -2 if the namespace should be discarded
1628 * and the index in the stack otherwise.
1630 static int
1631 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1633 if (ctxt->options & XML_PARSE_NSCLEAN) {
1634 int i;
1635 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1636 if (ctxt->nsTab[i] == prefix) {
1637 /* in scope */
1638 if (ctxt->nsTab[i + 1] == URL)
1639 return(-2);
1640 /* out of scope keep it */
1641 break;
1645 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1646 ctxt->nsMax = 10;
1647 ctxt->nsNr = 0;
1648 ctxt->nsTab = (const xmlChar **)
1649 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1650 if (ctxt->nsTab == NULL) {
1651 xmlErrMemory(ctxt, NULL);
1652 ctxt->nsMax = 0;
1653 return (-1);
1655 } else if (ctxt->nsNr >= ctxt->nsMax) {
1656 const xmlChar ** tmp;
1657 ctxt->nsMax *= 2;
1658 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1659 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1660 if (tmp == NULL) {
1661 xmlErrMemory(ctxt, NULL);
1662 ctxt->nsMax /= 2;
1663 return (-1);
1665 ctxt->nsTab = tmp;
1667 ctxt->nsTab[ctxt->nsNr++] = prefix;
1668 ctxt->nsTab[ctxt->nsNr++] = URL;
1669 return (ctxt->nsNr);
1672 * nsPop:
1673 * @ctxt: an XML parser context
1674 * @nr: the number to pop
1676 * Pops the top @nr parser prefix/namespace from the ns stack
1678 * Returns the number of namespaces removed
1680 static int
1681 nsPop(xmlParserCtxtPtr ctxt, int nr)
1683 int i;
1685 if (ctxt->nsTab == NULL) return(0);
1686 if (ctxt->nsNr < nr) {
1687 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1688 nr = ctxt->nsNr;
1690 if (ctxt->nsNr <= 0)
1691 return (0);
1693 for (i = 0;i < nr;i++) {
1694 ctxt->nsNr--;
1695 ctxt->nsTab[ctxt->nsNr] = NULL;
1697 return(nr);
1699 #endif
1701 static int
1702 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1703 const xmlChar **atts;
1704 int *attallocs;
1705 int maxatts;
1707 if (ctxt->atts == NULL) {
1708 maxatts = 55; /* allow for 10 attrs by default */
1709 atts = (const xmlChar **)
1710 xmlMalloc(maxatts * sizeof(xmlChar *));
1711 if (atts == NULL) goto mem_error;
1712 ctxt->atts = atts;
1713 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1714 if (attallocs == NULL) goto mem_error;
1715 ctxt->attallocs = attallocs;
1716 ctxt->maxatts = maxatts;
1717 } else if (nr + 5 > ctxt->maxatts) {
1718 maxatts = (nr + 5) * 2;
1719 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1720 maxatts * sizeof(const xmlChar *));
1721 if (atts == NULL) goto mem_error;
1722 ctxt->atts = atts;
1723 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1724 (maxatts / 5) * sizeof(int));
1725 if (attallocs == NULL) goto mem_error;
1726 ctxt->attallocs = attallocs;
1727 ctxt->maxatts = maxatts;
1729 return(ctxt->maxatts);
1730 mem_error:
1731 xmlErrMemory(ctxt, NULL);
1732 return(-1);
1736 * inputPush:
1737 * @ctxt: an XML parser context
1738 * @value: the parser input
1740 * Pushes a new parser input on top of the input stack
1742 * Returns -1 in case of error, the index in the stack otherwise
1745 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1747 if ((ctxt == NULL) || (value == NULL))
1748 return(-1);
1749 if (ctxt->inputNr >= ctxt->inputMax) {
1750 ctxt->inputMax *= 2;
1751 ctxt->inputTab =
1752 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1753 ctxt->inputMax *
1754 sizeof(ctxt->inputTab[0]));
1755 if (ctxt->inputTab == NULL) {
1756 xmlErrMemory(ctxt, NULL);
1757 ctxt->inputMax /= 2;
1758 return (-1);
1761 ctxt->inputTab[ctxt->inputNr] = value;
1762 ctxt->input = value;
1763 return (ctxt->inputNr++);
1766 * inputPop:
1767 * @ctxt: an XML parser context
1769 * Pops the top parser input from the input stack
1771 * Returns the input just removed
1773 xmlParserInputPtr
1774 inputPop(xmlParserCtxtPtr ctxt)
1776 xmlParserInputPtr ret;
1778 if (ctxt == NULL)
1779 return(NULL);
1780 if (ctxt->inputNr <= 0)
1781 return (NULL);
1782 ctxt->inputNr--;
1783 if (ctxt->inputNr > 0)
1784 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1785 else
1786 ctxt->input = NULL;
1787 ret = ctxt->inputTab[ctxt->inputNr];
1788 ctxt->inputTab[ctxt->inputNr] = NULL;
1789 return (ret);
1792 * nodePush:
1793 * @ctxt: an XML parser context
1794 * @value: the element node
1796 * Pushes a new element node on top of the node stack
1798 * Returns -1 in case of error, the index in the stack otherwise
1801 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1803 if (ctxt == NULL) return(0);
1804 if (ctxt->nodeNr >= ctxt->nodeMax) {
1805 xmlNodePtr *tmp;
1807 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1808 ctxt->nodeMax * 2 *
1809 sizeof(ctxt->nodeTab[0]));
1810 if (tmp == NULL) {
1811 xmlErrMemory(ctxt, NULL);
1812 return (-1);
1814 ctxt->nodeTab = tmp;
1815 ctxt->nodeMax *= 2;
1817 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1818 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1819 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1820 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1821 xmlParserMaxDepth);
1822 xmlHaltParser(ctxt);
1823 return(-1);
1825 ctxt->nodeTab[ctxt->nodeNr] = value;
1826 ctxt->node = value;
1827 return (ctxt->nodeNr++);
1831 * nodePop:
1832 * @ctxt: an XML parser context
1834 * Pops the top element node from the node stack
1836 * Returns the node just removed
1838 xmlNodePtr
1839 nodePop(xmlParserCtxtPtr ctxt)
1841 xmlNodePtr ret;
1843 if (ctxt == NULL) return(NULL);
1844 if (ctxt->nodeNr <= 0)
1845 return (NULL);
1846 ctxt->nodeNr--;
1847 if (ctxt->nodeNr > 0)
1848 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1849 else
1850 ctxt->node = NULL;
1851 ret = ctxt->nodeTab[ctxt->nodeNr];
1852 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1853 return (ret);
1857 * nameNsPush:
1858 * @ctxt: an XML parser context
1859 * @value: the element name
1860 * @prefix: the element prefix
1861 * @URI: the element namespace name
1862 * @line: the current line number for error messages
1863 * @nsNr: the number of namespaces pushed on the namespace table
1865 * Pushes a new element name/prefix/URL on top of the name stack
1867 * Returns -1 in case of error, the index in the stack otherwise
1869 static int
1870 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1871 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1873 xmlStartTag *tag;
1875 if (ctxt->nameNr >= ctxt->nameMax) {
1876 const xmlChar * *tmp;
1877 xmlStartTag *tmp2;
1878 ctxt->nameMax *= 2;
1879 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1880 ctxt->nameMax *
1881 sizeof(ctxt->nameTab[0]));
1882 if (tmp == NULL) {
1883 ctxt->nameMax /= 2;
1884 goto mem_error;
1886 ctxt->nameTab = tmp;
1887 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1888 ctxt->nameMax *
1889 sizeof(ctxt->pushTab[0]));
1890 if (tmp2 == NULL) {
1891 ctxt->nameMax /= 2;
1892 goto mem_error;
1894 ctxt->pushTab = tmp2;
1895 } else if (ctxt->pushTab == NULL) {
1896 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1897 sizeof(ctxt->pushTab[0]));
1898 if (ctxt->pushTab == NULL)
1899 goto mem_error;
1901 ctxt->nameTab[ctxt->nameNr] = value;
1902 ctxt->name = value;
1903 tag = &ctxt->pushTab[ctxt->nameNr];
1904 tag->prefix = prefix;
1905 tag->URI = URI;
1906 tag->line = line;
1907 tag->nsNr = nsNr;
1908 return (ctxt->nameNr++);
1909 mem_error:
1910 xmlErrMemory(ctxt, NULL);
1911 return (-1);
1913 #ifdef LIBXML_PUSH_ENABLED
1915 * nameNsPop:
1916 * @ctxt: an XML parser context
1918 * Pops the top element/prefix/URI name from the name stack
1920 * Returns the name just removed
1922 static const xmlChar *
1923 nameNsPop(xmlParserCtxtPtr ctxt)
1925 const xmlChar *ret;
1927 if (ctxt->nameNr <= 0)
1928 return (NULL);
1929 ctxt->nameNr--;
1930 if (ctxt->nameNr > 0)
1931 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1932 else
1933 ctxt->name = NULL;
1934 ret = ctxt->nameTab[ctxt->nameNr];
1935 ctxt->nameTab[ctxt->nameNr] = NULL;
1936 return (ret);
1938 #endif /* LIBXML_PUSH_ENABLED */
1941 * namePush:
1942 * @ctxt: an XML parser context
1943 * @value: the element name
1945 * Pushes a new element name on top of the name stack
1947 * Returns -1 in case of error, the index in the stack otherwise
1950 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1952 if (ctxt == NULL) return (-1);
1954 if (ctxt->nameNr >= ctxt->nameMax) {
1955 const xmlChar * *tmp;
1956 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1957 ctxt->nameMax * 2 *
1958 sizeof(ctxt->nameTab[0]));
1959 if (tmp == NULL) {
1960 goto mem_error;
1962 ctxt->nameTab = tmp;
1963 ctxt->nameMax *= 2;
1965 ctxt->nameTab[ctxt->nameNr] = value;
1966 ctxt->name = value;
1967 return (ctxt->nameNr++);
1968 mem_error:
1969 xmlErrMemory(ctxt, NULL);
1970 return (-1);
1973 * namePop:
1974 * @ctxt: an XML parser context
1976 * Pops the top element name from the name stack
1978 * Returns the name just removed
1980 const xmlChar *
1981 namePop(xmlParserCtxtPtr ctxt)
1983 const xmlChar *ret;
1985 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1986 return (NULL);
1987 ctxt->nameNr--;
1988 if (ctxt->nameNr > 0)
1989 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1990 else
1991 ctxt->name = NULL;
1992 ret = ctxt->nameTab[ctxt->nameNr];
1993 ctxt->nameTab[ctxt->nameNr] = NULL;
1994 return (ret);
1997 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1998 if (ctxt->spaceNr >= ctxt->spaceMax) {
1999 int *tmp;
2001 ctxt->spaceMax *= 2;
2002 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2003 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2004 if (tmp == NULL) {
2005 xmlErrMemory(ctxt, NULL);
2006 ctxt->spaceMax /=2;
2007 return(-1);
2009 ctxt->spaceTab = tmp;
2011 ctxt->spaceTab[ctxt->spaceNr] = val;
2012 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2013 return(ctxt->spaceNr++);
2016 static int spacePop(xmlParserCtxtPtr ctxt) {
2017 int ret;
2018 if (ctxt->spaceNr <= 0) return(0);
2019 ctxt->spaceNr--;
2020 if (ctxt->spaceNr > 0)
2021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2022 else
2023 ctxt->space = &ctxt->spaceTab[0];
2024 ret = ctxt->spaceTab[ctxt->spaceNr];
2025 ctxt->spaceTab[ctxt->spaceNr] = -1;
2026 return(ret);
2030 * Macros for accessing the content. Those should be used only by the parser,
2031 * and not exported.
2033 * Dirty macros, i.e. one often need to make assumption on the context to
2034 * use them
2036 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2037 * To be used with extreme caution since operations consuming
2038 * characters may move the input buffer to a different location !
2039 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2040 * This should be used internally by the parser
2041 * only to compare to ASCII values otherwise it would break when
2042 * running with UTF-8 encoding.
2043 * RAW same as CUR but in the input buffer, bypass any token
2044 * extraction that may have been done
2045 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2046 * to compare on ASCII based substring.
2047 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2048 * strings without newlines within the parser.
2049 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2050 * defined char within the parser.
2051 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2053 * NEXT Skip to the next character, this does the proper decoding
2054 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2055 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2056 * CUR_CHAR(l) returns the current unicode character (int), set l
2057 * to the number of xmlChars used for the encoding [0-5].
2058 * CUR_SCHAR same but operate on a string instead of the context
2059 * COPY_BUF copy the current unicode char to the target buffer, increment
2060 * the index
2061 * GROW, SHRINK handling of input buffers
2064 #define RAW (*ctxt->input->cur)
2065 #define CUR (*ctxt->input->cur)
2066 #define NXT(val) ctxt->input->cur[(val)]
2067 #define CUR_PTR ctxt->input->cur
2068 #define BASE_PTR ctxt->input->base
2070 #define CMP4( s, c1, c2, c3, c4 ) \
2071 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2072 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2073 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2074 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2075 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2076 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2077 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2078 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2079 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2080 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2081 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2082 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2083 ((unsigned char *) s)[ 8 ] == c9 )
2084 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2085 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2086 ((unsigned char *) s)[ 9 ] == c10 )
2088 #define SKIP(val) do { \
2089 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2090 if (*ctxt->input->cur == 0) \
2091 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2092 } while (0)
2094 #define SKIPL(val) do { \
2095 int skipl; \
2096 for(skipl=0; skipl<val; skipl++) { \
2097 if (*(ctxt->input->cur) == '\n') { \
2098 ctxt->input->line++; ctxt->input->col = 1; \
2099 } else ctxt->input->col++; \
2100 ctxt->input->cur++; \
2102 if (*ctxt->input->cur == 0) \
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2104 } while (0)
2106 #define SHRINK if ((ctxt->progressive == 0) && \
2107 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2108 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2109 xmlSHRINK (ctxt);
2111 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2112 xmlParserInputShrink(ctxt->input);
2113 if (*ctxt->input->cur == 0)
2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2117 #define GROW if ((ctxt->progressive == 0) && \
2118 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2119 xmlGROW (ctxt);
2121 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2122 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2123 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2125 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2126 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2127 ((ctxt->input->buf) &&
2128 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2129 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2130 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2131 xmlHaltParser(ctxt);
2132 return;
2134 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2135 if ((ctxt->input->cur > ctxt->input->end) ||
2136 (ctxt->input->cur < ctxt->input->base)) {
2137 xmlHaltParser(ctxt);
2138 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2139 return;
2141 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2142 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2145 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2147 #define NEXT xmlNextChar(ctxt)
2149 #define NEXT1 { \
2150 ctxt->input->col++; \
2151 ctxt->input->cur++; \
2152 if (*ctxt->input->cur == 0) \
2153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2156 #define NEXTL(l) do { \
2157 if (*(ctxt->input->cur) == '\n') { \
2158 ctxt->input->line++; ctxt->input->col = 1; \
2159 } else ctxt->input->col++; \
2160 ctxt->input->cur += l; \
2161 } while (0)
2163 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2164 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2166 #define COPY_BUF(l,b,i,v) \
2167 if (l == 1) b[i++] = (xmlChar) v; \
2168 else i += xmlCopyCharMultiByte(&b[i],v)
2170 #define CUR_CONSUMED \
2171 (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2174 * xmlSkipBlankChars:
2175 * @ctxt: the XML parser context
2177 * skip all blanks character found at that point in the input streams.
2178 * It pops up finished entities in the process if allowable at that point.
2180 * Returns the number of space chars skipped
2184 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2185 int res = 0;
2188 * It's Okay to use CUR/NEXT here since all the blanks are on
2189 * the ASCII range.
2191 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2192 (ctxt->instate == XML_PARSER_START)) {
2193 const xmlChar *cur;
2195 * if we are in the document content, go really fast
2197 cur = ctxt->input->cur;
2198 while (IS_BLANK_CH(*cur)) {
2199 if (*cur == '\n') {
2200 ctxt->input->line++; ctxt->input->col = 1;
2201 } else {
2202 ctxt->input->col++;
2204 cur++;
2205 if (res < INT_MAX)
2206 res++;
2207 if (*cur == 0) {
2208 ctxt->input->cur = cur;
2209 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2210 cur = ctxt->input->cur;
2213 ctxt->input->cur = cur;
2214 } else {
2215 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2217 while (1) {
2218 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2219 NEXT;
2220 } else if (CUR == '%') {
2222 * Need to handle support of entities branching here
2224 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2225 break;
2226 xmlParsePEReference(ctxt);
2227 } else if (CUR == 0) {
2228 if (ctxt->inputNr <= 1)
2229 break;
2230 xmlPopInput(ctxt);
2231 } else {
2232 break;
2236 * Also increase the counter when entering or exiting a PERef.
2237 * The spec says: "When a parameter-entity reference is recognized
2238 * in the DTD and included, its replacement text MUST be enlarged
2239 * by the attachment of one leading and one following space (#x20)
2240 * character."
2242 if (res < INT_MAX)
2243 res++;
2246 return(res);
2249 /************************************************************************
2251 * Commodity functions to handle entities *
2253 ************************************************************************/
2256 * xmlPopInput:
2257 * @ctxt: an XML parser context
2259 * xmlPopInput: the current input pointed by ctxt->input came to an end
2260 * pop it and return the next char.
2262 * Returns the current xmlChar in the parser context
2264 xmlChar
2265 xmlPopInput(xmlParserCtxtPtr ctxt) {
2266 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2267 if (xmlParserDebugEntities)
2268 xmlGenericError(xmlGenericErrorContext,
2269 "Popping input %d\n", ctxt->inputNr);
2270 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2271 (ctxt->instate != XML_PARSER_EOF))
2272 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2273 "Unfinished entity outside the DTD");
2274 xmlFreeInputStream(inputPop(ctxt));
2275 if (*ctxt->input->cur == 0)
2276 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2277 return(CUR);
2281 * xmlPushInput:
2282 * @ctxt: an XML parser context
2283 * @input: an XML parser input fragment (entity, XML fragment ...).
2285 * xmlPushInput: switch to a new input stream which is stacked on top
2286 * of the previous one(s).
2287 * Returns -1 in case of error or the index in the input stack
2290 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2291 int ret;
2292 if (input == NULL) return(-1);
2294 if (xmlParserDebugEntities) {
2295 if ((ctxt->input != NULL) && (ctxt->input->filename))
2296 xmlGenericError(xmlGenericErrorContext,
2297 "%s(%d): ", ctxt->input->filename,
2298 ctxt->input->line);
2299 xmlGenericError(xmlGenericErrorContext,
2300 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2302 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2303 (ctxt->inputNr > 1024)) {
2304 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2305 while (ctxt->inputNr > 1)
2306 xmlFreeInputStream(inputPop(ctxt));
2307 return(-1);
2309 ret = inputPush(ctxt, input);
2310 if (ctxt->instate == XML_PARSER_EOF)
2311 return(-1);
2312 GROW;
2313 return(ret);
2317 * xmlParseCharRef:
2318 * @ctxt: an XML parser context
2320 * parse Reference declarations
2322 * [66] CharRef ::= '&#' [0-9]+ ';' |
2323 * '&#x' [0-9a-fA-F]+ ';'
2325 * [ WFC: Legal Character ]
2326 * Characters referred to using character references must match the
2327 * production for Char.
2329 * Returns the value parsed (as an int), 0 in case of error
2332 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2333 int val = 0;
2334 int count = 0;
2337 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2339 if ((RAW == '&') && (NXT(1) == '#') &&
2340 (NXT(2) == 'x')) {
2341 SKIP(3);
2342 GROW;
2343 while (RAW != ';') { /* loop blocked by count */
2344 if (count++ > 20) {
2345 count = 0;
2346 GROW;
2347 if (ctxt->instate == XML_PARSER_EOF)
2348 return(0);
2350 if ((RAW >= '0') && (RAW <= '9'))
2351 val = val * 16 + (CUR - '0');
2352 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2353 val = val * 16 + (CUR - 'a') + 10;
2354 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2355 val = val * 16 + (CUR - 'A') + 10;
2356 else {
2357 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2358 val = 0;
2359 break;
2361 if (val > 0x110000)
2362 val = 0x110000;
2364 NEXT;
2365 count++;
2367 if (RAW == ';') {
2368 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2369 ctxt->input->col++;
2370 ctxt->input->cur++;
2372 } else if ((RAW == '&') && (NXT(1) == '#')) {
2373 SKIP(2);
2374 GROW;
2375 while (RAW != ';') { /* loop blocked by count */
2376 if (count++ > 20) {
2377 count = 0;
2378 GROW;
2379 if (ctxt->instate == XML_PARSER_EOF)
2380 return(0);
2382 if ((RAW >= '0') && (RAW <= '9'))
2383 val = val * 10 + (CUR - '0');
2384 else {
2385 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2386 val = 0;
2387 break;
2389 if (val > 0x110000)
2390 val = 0x110000;
2392 NEXT;
2393 count++;
2395 if (RAW == ';') {
2396 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2397 ctxt->input->col++;
2398 ctxt->input->cur++;
2400 } else {
2401 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2405 * [ WFC: Legal Character ]
2406 * Characters referred to using character references must match the
2407 * production for Char.
2409 if (val >= 0x110000) {
2410 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2411 "xmlParseCharRef: character reference out of bounds\n",
2412 val);
2413 } else if (IS_CHAR(val)) {
2414 return(val);
2415 } else {
2416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417 "xmlParseCharRef: invalid xmlChar value %d\n",
2418 val);
2420 return(0);
2424 * xmlParseStringCharRef:
2425 * @ctxt: an XML parser context
2426 * @str: a pointer to an index in the string
2428 * parse Reference declarations, variant parsing from a string rather
2429 * than an an input flow.
2431 * [66] CharRef ::= '&#' [0-9]+ ';' |
2432 * '&#x' [0-9a-fA-F]+ ';'
2434 * [ WFC: Legal Character ]
2435 * Characters referred to using character references must match the
2436 * production for Char.
2438 * Returns the value parsed (as an int), 0 in case of error, str will be
2439 * updated to the current value of the index
2441 static int
2442 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2443 const xmlChar *ptr;
2444 xmlChar cur;
2445 int val = 0;
2447 if ((str == NULL) || (*str == NULL)) return(0);
2448 ptr = *str;
2449 cur = *ptr;
2450 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2451 ptr += 3;
2452 cur = *ptr;
2453 while (cur != ';') { /* Non input consuming loop */
2454 if ((cur >= '0') && (cur <= '9'))
2455 val = val * 16 + (cur - '0');
2456 else if ((cur >= 'a') && (cur <= 'f'))
2457 val = val * 16 + (cur - 'a') + 10;
2458 else if ((cur >= 'A') && (cur <= 'F'))
2459 val = val * 16 + (cur - 'A') + 10;
2460 else {
2461 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2462 val = 0;
2463 break;
2465 if (val > 0x110000)
2466 val = 0x110000;
2468 ptr++;
2469 cur = *ptr;
2471 if (cur == ';')
2472 ptr++;
2473 } else if ((cur == '&') && (ptr[1] == '#')){
2474 ptr += 2;
2475 cur = *ptr;
2476 while (cur != ';') { /* Non input consuming loops */
2477 if ((cur >= '0') && (cur <= '9'))
2478 val = val * 10 + (cur - '0');
2479 else {
2480 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2481 val = 0;
2482 break;
2484 if (val > 0x110000)
2485 val = 0x110000;
2487 ptr++;
2488 cur = *ptr;
2490 if (cur == ';')
2491 ptr++;
2492 } else {
2493 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2494 return(0);
2496 *str = ptr;
2499 * [ WFC: Legal Character ]
2500 * Characters referred to using character references must match the
2501 * production for Char.
2503 if (val >= 0x110000) {
2504 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2505 "xmlParseStringCharRef: character reference out of bounds\n",
2506 val);
2507 } else if (IS_CHAR(val)) {
2508 return(val);
2509 } else {
2510 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2512 val);
2514 return(0);
2518 * xmlParserHandlePEReference:
2519 * @ctxt: the parser context
2521 * [69] PEReference ::= '%' Name ';'
2523 * [ WFC: No Recursion ]
2524 * A parsed entity must not contain a recursive
2525 * reference to itself, either directly or indirectly.
2527 * [ WFC: Entity Declared ]
2528 * In a document without any DTD, a document with only an internal DTD
2529 * subset which contains no parameter entity references, or a document
2530 * with "standalone='yes'", ... ... The declaration of a parameter
2531 * entity must precede any reference to it...
2533 * [ VC: Entity Declared ]
2534 * In a document with an external subset or external parameter entities
2535 * with "standalone='no'", ... ... The declaration of a parameter entity
2536 * must precede any reference to it...
2538 * [ WFC: In DTD ]
2539 * Parameter-entity references may only appear in the DTD.
2540 * NOTE: misleading but this is handled.
2542 * A PEReference may have been detected in the current input stream
2543 * the handling is done accordingly to
2544 * http://www.w3.org/TR/REC-xml#entproc
2545 * i.e.
2546 * - Included in literal in entity values
2547 * - Included as Parameter Entity reference within DTDs
2549 void
2550 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2551 switch(ctxt->instate) {
2552 case XML_PARSER_CDATA_SECTION:
2553 return;
2554 case XML_PARSER_COMMENT:
2555 return;
2556 case XML_PARSER_START_TAG:
2557 return;
2558 case XML_PARSER_END_TAG:
2559 return;
2560 case XML_PARSER_EOF:
2561 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2562 return;
2563 case XML_PARSER_PROLOG:
2564 case XML_PARSER_START:
2565 case XML_PARSER_MISC:
2566 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2567 return;
2568 case XML_PARSER_ENTITY_DECL:
2569 case XML_PARSER_CONTENT:
2570 case XML_PARSER_ATTRIBUTE_VALUE:
2571 case XML_PARSER_PI:
2572 case XML_PARSER_SYSTEM_LITERAL:
2573 case XML_PARSER_PUBLIC_LITERAL:
2574 /* we just ignore it there */
2575 return;
2576 case XML_PARSER_EPILOG:
2577 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2578 return;
2579 case XML_PARSER_ENTITY_VALUE:
2581 * NOTE: in the case of entity values, we don't do the
2582 * substitution here since we need the literal
2583 * entity value to be able to save the internal
2584 * subset of the document.
2585 * This will be handled by xmlStringDecodeEntities
2587 return;
2588 case XML_PARSER_DTD:
2590 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2591 * In the internal DTD subset, parameter-entity references
2592 * can occur only where markup declarations can occur, not
2593 * within markup declarations.
2594 * In that case this is handled in xmlParseMarkupDecl
2596 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2597 return;
2598 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2599 return;
2600 break;
2601 case XML_PARSER_IGNORE:
2602 return;
2605 xmlParsePEReference(ctxt);
2609 * Macro used to grow the current buffer.
2610 * buffer##_size is expected to be a size_t
2611 * mem_error: is expected to handle memory allocation failures
2613 #define growBuffer(buffer, n) { \
2614 xmlChar *tmp; \
2615 size_t new_size = buffer##_size * 2 + n; \
2616 if (new_size < buffer##_size) goto mem_error; \
2617 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2618 if (tmp == NULL) goto mem_error; \
2619 buffer = tmp; \
2620 buffer##_size = new_size; \
2624 * xmlStringLenDecodeEntities:
2625 * @ctxt: the parser context
2626 * @str: the input string
2627 * @len: the string length
2628 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2629 * @end: an end marker xmlChar, 0 if none
2630 * @end2: an end marker xmlChar, 0 if none
2631 * @end3: an end marker xmlChar, 0 if none
2633 * Takes a entity string content and process to do the adequate substitutions.
2635 * [67] Reference ::= EntityRef | CharRef
2637 * [69] PEReference ::= '%' Name ';'
2639 * Returns A newly allocated string with the substitution done. The caller
2640 * must deallocate it !
2642 xmlChar *
2643 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2644 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2645 xmlChar *buffer = NULL;
2646 size_t buffer_size = 0;
2647 size_t nbchars = 0;
2649 xmlChar *current = NULL;
2650 xmlChar *rep = NULL;
2651 const xmlChar *last;
2652 xmlEntityPtr ent;
2653 int c,l;
2655 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2656 return(NULL);
2657 last = str + len;
2659 if (((ctxt->depth > 40) &&
2660 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2661 (ctxt->depth > 1024)) {
2662 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2663 return(NULL);
2667 * allocate a translation buffer.
2669 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2670 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2671 if (buffer == NULL) goto mem_error;
2674 * OK loop until we reach one of the ending char or a size limit.
2675 * we are operating on already parsed values.
2677 if (str < last)
2678 c = CUR_SCHAR(str, l);
2679 else
2680 c = 0;
2681 while ((c != 0) && (c != end) && /* non input consuming loop */
2682 (c != end2) && (c != end3) &&
2683 (ctxt->instate != XML_PARSER_EOF)) {
2685 if (c == 0) break;
2686 if ((c == '&') && (str[1] == '#')) {
2687 int val = xmlParseStringCharRef(ctxt, &str);
2688 if (val == 0)
2689 goto int_error;
2690 COPY_BUF(0,buffer,nbchars,val);
2691 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2692 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2694 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2695 if (xmlParserDebugEntities)
2696 xmlGenericError(xmlGenericErrorContext,
2697 "String decoding Entity Reference: %.30s\n",
2698 str);
2699 ent = xmlParseStringEntityRef(ctxt, &str);
2700 xmlParserEntityCheck(ctxt, 0, ent, 0);
2701 if (ent != NULL)
2702 ctxt->nbentities += ent->checked / 2;
2703 if ((ent != NULL) &&
2704 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2705 if (ent->content != NULL) {
2706 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2707 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2708 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2710 } else {
2711 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2712 "predefined entity has no content\n");
2713 goto int_error;
2715 } else if ((ent != NULL) && (ent->content != NULL)) {
2716 ctxt->depth++;
2717 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2718 0, 0, 0);
2719 ctxt->depth--;
2720 if (rep == NULL) {
2721 ent->content[0] = 0;
2722 goto int_error;
2725 current = rep;
2726 while (*current != 0) { /* non input consuming loop */
2727 buffer[nbchars++] = *current++;
2728 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2730 goto int_error;
2731 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2734 xmlFree(rep);
2735 rep = NULL;
2736 } else if (ent != NULL) {
2737 int i = xmlStrlen(ent->name);
2738 const xmlChar *cur = ent->name;
2740 buffer[nbchars++] = '&';
2741 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2742 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2744 for (;i > 0;i--)
2745 buffer[nbchars++] = *cur++;
2746 buffer[nbchars++] = ';';
2748 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2749 if (xmlParserDebugEntities)
2750 xmlGenericError(xmlGenericErrorContext,
2751 "String decoding PE Reference: %.30s\n", str);
2752 ent = xmlParseStringPEReference(ctxt, &str);
2753 xmlParserEntityCheck(ctxt, 0, ent, 0);
2754 if (ent != NULL)
2755 ctxt->nbentities += ent->checked / 2;
2756 if (ent != NULL) {
2757 if (ent->content == NULL) {
2759 * Note: external parsed entities will not be loaded,
2760 * it is not required for a non-validating parser to
2761 * complete external PEReferences coming from the
2762 * internal subset
2764 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2765 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2766 (ctxt->validate != 0)) {
2767 xmlLoadEntityContent(ctxt, ent);
2768 } else {
2769 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2770 "not validating will not read content for PE entity %s\n",
2771 ent->name, NULL);
2774 ctxt->depth++;
2775 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2776 0, 0, 0);
2777 ctxt->depth--;
2778 if (rep == NULL) {
2779 if (ent->content != NULL)
2780 ent->content[0] = 0;
2781 goto int_error;
2783 current = rep;
2784 while (*current != 0) { /* non input consuming loop */
2785 buffer[nbchars++] = *current++;
2786 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2787 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2788 goto int_error;
2789 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2792 xmlFree(rep);
2793 rep = NULL;
2795 } else {
2796 COPY_BUF(l,buffer,nbchars,c);
2797 str += l;
2798 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2799 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2802 if (str < last)
2803 c = CUR_SCHAR(str, l);
2804 else
2805 c = 0;
2807 buffer[nbchars] = 0;
2808 return(buffer);
2810 mem_error:
2811 xmlErrMemory(ctxt, NULL);
2812 int_error:
2813 if (rep != NULL)
2814 xmlFree(rep);
2815 if (buffer != NULL)
2816 xmlFree(buffer);
2817 return(NULL);
2821 * xmlStringDecodeEntities:
2822 * @ctxt: the parser context
2823 * @str: the input string
2824 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2825 * @end: an end marker xmlChar, 0 if none
2826 * @end2: an end marker xmlChar, 0 if none
2827 * @end3: an end marker xmlChar, 0 if none
2829 * Takes a entity string content and process to do the adequate substitutions.
2831 * [67] Reference ::= EntityRef | CharRef
2833 * [69] PEReference ::= '%' Name ';'
2835 * Returns A newly allocated string with the substitution done. The caller
2836 * must deallocate it !
2838 xmlChar *
2839 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2840 xmlChar end, xmlChar end2, xmlChar end3) {
2841 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2842 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2843 end, end2, end3));
2846 /************************************************************************
2848 * Commodity functions, cleanup needed ? *
2850 ************************************************************************/
2853 * areBlanks:
2854 * @ctxt: an XML parser context
2855 * @str: a xmlChar *
2856 * @len: the size of @str
2857 * @blank_chars: we know the chars are blanks
2859 * Is this a sequence of blank chars that one can ignore ?
2861 * Returns 1 if ignorable 0 otherwise.
2864 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2865 int blank_chars) {
2866 int i, ret;
2867 xmlNodePtr lastChild;
2870 * Don't spend time trying to differentiate them, the same callback is
2871 * used !
2873 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2874 return(0);
2877 * Check for xml:space value.
2879 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2880 (*(ctxt->space) == -2))
2881 return(0);
2884 * Check that the string is made of blanks
2886 if (blank_chars == 0) {
2887 for (i = 0;i < len;i++)
2888 if (!(IS_BLANK_CH(str[i]))) return(0);
2892 * Look if the element is mixed content in the DTD if available
2894 if (ctxt->node == NULL) return(0);
2895 if (ctxt->myDoc != NULL) {
2896 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2897 if (ret == 0) return(1);
2898 if (ret == 1) return(0);
2902 * Otherwise, heuristic :-\
2904 if ((RAW != '<') && (RAW != 0xD)) return(0);
2905 if ((ctxt->node->children == NULL) &&
2906 (RAW == '<') && (NXT(1) == '/')) return(0);
2908 lastChild = xmlGetLastChild(ctxt->node);
2909 if (lastChild == NULL) {
2910 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2911 (ctxt->node->content != NULL)) return(0);
2912 } else if (xmlNodeIsText(lastChild))
2913 return(0);
2914 else if ((ctxt->node->children != NULL) &&
2915 (xmlNodeIsText(ctxt->node->children)))
2916 return(0);
2917 return(1);
2920 /************************************************************************
2922 * Extra stuff for namespace support *
2923 * Relates to http://www.w3.org/TR/WD-xml-names *
2925 ************************************************************************/
2928 * xmlSplitQName:
2929 * @ctxt: an XML parser context
2930 * @name: an XML parser context
2931 * @prefix: a xmlChar **
2933 * parse an UTF8 encoded XML qualified name string
2935 * [NS 5] QName ::= (Prefix ':')? LocalPart
2937 * [NS 6] Prefix ::= NCName
2939 * [NS 7] LocalPart ::= NCName
2941 * Returns the local part, and prefix is updated
2942 * to get the Prefix if any.
2945 xmlChar *
2946 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2947 xmlChar buf[XML_MAX_NAMELEN + 5];
2948 xmlChar *buffer = NULL;
2949 int len = 0;
2950 int max = XML_MAX_NAMELEN;
2951 xmlChar *ret = NULL;
2952 const xmlChar *cur = name;
2953 int c;
2955 if (prefix == NULL) return(NULL);
2956 *prefix = NULL;
2958 if (cur == NULL) return(NULL);
2960 #ifndef XML_XML_NAMESPACE
2961 /* xml: prefix is not really a namespace */
2962 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2963 (cur[2] == 'l') && (cur[3] == ':'))
2964 return(xmlStrdup(name));
2965 #endif
2967 /* nasty but well=formed */
2968 if (cur[0] == ':')
2969 return(xmlStrdup(name));
2971 c = *cur++;
2972 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2973 buf[len++] = c;
2974 c = *cur++;
2976 if (len >= max) {
2978 * Okay someone managed to make a huge name, so he's ready to pay
2979 * for the processing speed.
2981 max = len * 2;
2983 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2984 if (buffer == NULL) {
2985 xmlErrMemory(ctxt, NULL);
2986 return(NULL);
2988 memcpy(buffer, buf, len);
2989 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2990 if (len + 10 > max) {
2991 xmlChar *tmp;
2993 max *= 2;
2994 tmp = (xmlChar *) xmlRealloc(buffer,
2995 max * sizeof(xmlChar));
2996 if (tmp == NULL) {
2997 xmlFree(buffer);
2998 xmlErrMemory(ctxt, NULL);
2999 return(NULL);
3001 buffer = tmp;
3003 buffer[len++] = c;
3004 c = *cur++;
3006 buffer[len] = 0;
3009 if ((c == ':') && (*cur == 0)) {
3010 if (buffer != NULL)
3011 xmlFree(buffer);
3012 *prefix = NULL;
3013 return(xmlStrdup(name));
3016 if (buffer == NULL)
3017 ret = xmlStrndup(buf, len);
3018 else {
3019 ret = buffer;
3020 buffer = NULL;
3021 max = XML_MAX_NAMELEN;
3025 if (c == ':') {
3026 c = *cur;
3027 *prefix = ret;
3028 if (c == 0) {
3029 return(xmlStrndup(BAD_CAST "", 0));
3031 len = 0;
3034 * Check that the first character is proper to start
3035 * a new name
3037 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3038 ((c >= 0x41) && (c <= 0x5A)) ||
3039 (c == '_') || (c == ':'))) {
3040 int l;
3041 int first = CUR_SCHAR(cur, l);
3043 if (!IS_LETTER(first) && (first != '_')) {
3044 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3045 "Name %s is not XML Namespace compliant\n",
3046 name);
3049 cur++;
3051 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3052 buf[len++] = c;
3053 c = *cur++;
3055 if (len >= max) {
3057 * Okay someone managed to make a huge name, so he's ready to pay
3058 * for the processing speed.
3060 max = len * 2;
3062 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3063 if (buffer == NULL) {
3064 xmlErrMemory(ctxt, NULL);
3065 return(NULL);
3067 memcpy(buffer, buf, len);
3068 while (c != 0) { /* tested bigname2.xml */
3069 if (len + 10 > max) {
3070 xmlChar *tmp;
3072 max *= 2;
3073 tmp = (xmlChar *) xmlRealloc(buffer,
3074 max * sizeof(xmlChar));
3075 if (tmp == NULL) {
3076 xmlErrMemory(ctxt, NULL);
3077 xmlFree(buffer);
3078 return(NULL);
3080 buffer = tmp;
3082 buffer[len++] = c;
3083 c = *cur++;
3085 buffer[len] = 0;
3088 if (buffer == NULL)
3089 ret = xmlStrndup(buf, len);
3090 else {
3091 ret = buffer;
3095 return(ret);
3098 /************************************************************************
3100 * The parser itself *
3101 * Relates to http://www.w3.org/TR/REC-xml *
3103 ************************************************************************/
3105 /************************************************************************
3107 * Routines to parse Name, NCName and NmToken *
3109 ************************************************************************/
3110 #ifdef DEBUG
3111 static unsigned long nbParseName = 0;
3112 static unsigned long nbParseNmToken = 0;
3113 static unsigned long nbParseNCName = 0;
3114 static unsigned long nbParseNCNameComplex = 0;
3115 static unsigned long nbParseNameComplex = 0;
3116 static unsigned long nbParseStringName = 0;
3117 #endif
3120 * The two following functions are related to the change of accepted
3121 * characters for Name and NmToken in the Revision 5 of XML-1.0
3122 * They correspond to the modified production [4] and the new production [4a]
3123 * changes in that revision. Also note that the macros used for the
3124 * productions Letter, Digit, CombiningChar and Extender are not needed
3125 * anymore.
3126 * We still keep compatibility to pre-revision5 parsing semantic if the
3127 * new XML_PARSE_OLD10 option is given to the parser.
3129 static int
3130 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3131 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3133 * Use the new checks of production [4] [4a] amd [5] of the
3134 * Update 5 of XML-1.0
3136 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3137 (((c >= 'a') && (c <= 'z')) ||
3138 ((c >= 'A') && (c <= 'Z')) ||
3139 (c == '_') || (c == ':') ||
3140 ((c >= 0xC0) && (c <= 0xD6)) ||
3141 ((c >= 0xD8) && (c <= 0xF6)) ||
3142 ((c >= 0xF8) && (c <= 0x2FF)) ||
3143 ((c >= 0x370) && (c <= 0x37D)) ||
3144 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3145 ((c >= 0x200C) && (c <= 0x200D)) ||
3146 ((c >= 0x2070) && (c <= 0x218F)) ||
3147 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3149 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3150 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151 ((c >= 0x10000) && (c <= 0xEFFFF))))
3152 return(1);
3153 } else {
3154 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3155 return(1);
3157 return(0);
3160 static int
3161 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3162 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3164 * Use the new checks of production [4] [4a] amd [5] of the
3165 * Update 5 of XML-1.0
3167 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3168 (((c >= 'a') && (c <= 'z')) ||
3169 ((c >= 'A') && (c <= 'Z')) ||
3170 ((c >= '0') && (c <= '9')) || /* !start */
3171 (c == '_') || (c == ':') ||
3172 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3173 ((c >= 0xC0) && (c <= 0xD6)) ||
3174 ((c >= 0xD8) && (c <= 0xF6)) ||
3175 ((c >= 0xF8) && (c <= 0x2FF)) ||
3176 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3177 ((c >= 0x370) && (c <= 0x37D)) ||
3178 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179 ((c >= 0x200C) && (c <= 0x200D)) ||
3180 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3181 ((c >= 0x2070) && (c <= 0x218F)) ||
3182 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3183 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3184 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3185 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3186 ((c >= 0x10000) && (c <= 0xEFFFF))))
3187 return(1);
3188 } else {
3189 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3190 (c == '.') || (c == '-') ||
3191 (c == '_') || (c == ':') ||
3192 (IS_COMBINING(c)) ||
3193 (IS_EXTENDER(c)))
3194 return(1);
3196 return(0);
3199 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3200 int *len, int *alloc, int normalize);
3202 static const xmlChar *
3203 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3204 int len = 0, l;
3205 int c;
3206 int count = 0;
3207 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3208 XML_MAX_TEXT_LENGTH :
3209 XML_MAX_NAME_LENGTH;
3211 #ifdef DEBUG
3212 nbParseNameComplex++;
3213 #endif
3216 * Handler for more complex cases
3218 GROW;
3219 if (ctxt->instate == XML_PARSER_EOF)
3220 return(NULL);
3221 c = CUR_CHAR(l);
3222 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3224 * Use the new checks of production [4] [4a] amd [5] of the
3225 * Update 5 of XML-1.0
3227 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228 (!(((c >= 'a') && (c <= 'z')) ||
3229 ((c >= 'A') && (c <= 'Z')) ||
3230 (c == '_') || (c == ':') ||
3231 ((c >= 0xC0) && (c <= 0xD6)) ||
3232 ((c >= 0xD8) && (c <= 0xF6)) ||
3233 ((c >= 0xF8) && (c <= 0x2FF)) ||
3234 ((c >= 0x370) && (c <= 0x37D)) ||
3235 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236 ((c >= 0x200C) && (c <= 0x200D)) ||
3237 ((c >= 0x2070) && (c <= 0x218F)) ||
3238 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243 return(NULL);
3245 len += l;
3246 NEXTL(l);
3247 c = CUR_CHAR(l);
3248 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249 (((c >= 'a') && (c <= 'z')) ||
3250 ((c >= 'A') && (c <= 'Z')) ||
3251 ((c >= '0') && (c <= '9')) || /* !start */
3252 (c == '_') || (c == ':') ||
3253 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254 ((c >= 0xC0) && (c <= 0xD6)) ||
3255 ((c >= 0xD8) && (c <= 0xF6)) ||
3256 ((c >= 0xF8) && (c <= 0x2FF)) ||
3257 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258 ((c >= 0x370) && (c <= 0x37D)) ||
3259 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260 ((c >= 0x200C) && (c <= 0x200D)) ||
3261 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262 ((c >= 0x2070) && (c <= 0x218F)) ||
3263 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267 ((c >= 0x10000) && (c <= 0xEFFFF))
3268 )) {
3269 if (count++ > XML_PARSER_CHUNK_SIZE) {
3270 count = 0;
3271 GROW;
3272 if (ctxt->instate == XML_PARSER_EOF)
3273 return(NULL);
3275 if (len <= INT_MAX - l)
3276 len += l;
3277 NEXTL(l);
3278 c = CUR_CHAR(l);
3280 } else {
3281 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3282 (!IS_LETTER(c) && (c != '_') &&
3283 (c != ':'))) {
3284 return(NULL);
3286 len += l;
3287 NEXTL(l);
3288 c = CUR_CHAR(l);
3290 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3291 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3292 (c == '.') || (c == '-') ||
3293 (c == '_') || (c == ':') ||
3294 (IS_COMBINING(c)) ||
3295 (IS_EXTENDER(c)))) {
3296 if (count++ > XML_PARSER_CHUNK_SIZE) {
3297 count = 0;
3298 GROW;
3299 if (ctxt->instate == XML_PARSER_EOF)
3300 return(NULL);
3302 if (len <= INT_MAX - l)
3303 len += l;
3304 NEXTL(l);
3305 c = CUR_CHAR(l);
3308 if (len > maxLength) {
3309 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3310 return(NULL);
3312 if (ctxt->input->cur - ctxt->input->base < len) {
3314 * There were a couple of bugs where PERefs lead to to a change
3315 * of the buffer. Check the buffer size to avoid passing an invalid
3316 * pointer to xmlDictLookup.
3318 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3319 "unexpected change of input buffer");
3320 return (NULL);
3322 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3323 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3324 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3328 * xmlParseName:
3329 * @ctxt: an XML parser context
3331 * parse an XML name.
3333 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3334 * CombiningChar | Extender
3336 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3338 * [6] Names ::= Name (#x20 Name)*
3340 * Returns the Name parsed or NULL
3343 const xmlChar *
3344 xmlParseName(xmlParserCtxtPtr ctxt) {
3345 const xmlChar *in;
3346 const xmlChar *ret;
3347 size_t count = 0;
3348 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349 XML_MAX_TEXT_LENGTH :
3350 XML_MAX_NAME_LENGTH;
3352 GROW;
3354 #ifdef DEBUG
3355 nbParseName++;
3356 #endif
3359 * Accelerator for simple ASCII names
3361 in = ctxt->input->cur;
3362 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3363 ((*in >= 0x41) && (*in <= 0x5A)) ||
3364 (*in == '_') || (*in == ':')) {
3365 in++;
3366 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3367 ((*in >= 0x41) && (*in <= 0x5A)) ||
3368 ((*in >= 0x30) && (*in <= 0x39)) ||
3369 (*in == '_') || (*in == '-') ||
3370 (*in == ':') || (*in == '.'))
3371 in++;
3372 if ((*in > 0) && (*in < 0x80)) {
3373 count = in - ctxt->input->cur;
3374 if (count > maxLength) {
3375 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3376 return(NULL);
3378 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3379 ctxt->input->cur = in;
3380 ctxt->input->col += count;
3381 if (ret == NULL)
3382 xmlErrMemory(ctxt, NULL);
3383 return(ret);
3386 /* accelerator for special cases */
3387 return(xmlParseNameComplex(ctxt));
3390 static const xmlChar *
3391 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3392 int len = 0, l;
3393 int c;
3394 int count = 0;
3395 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3396 XML_MAX_TEXT_LENGTH :
3397 XML_MAX_NAME_LENGTH;
3398 size_t startPosition = 0;
3400 #ifdef DEBUG
3401 nbParseNCNameComplex++;
3402 #endif
3405 * Handler for more complex cases
3407 GROW;
3408 startPosition = CUR_PTR - BASE_PTR;
3409 c = CUR_CHAR(l);
3410 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3411 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3412 return(NULL);
3415 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3416 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3417 if (count++ > XML_PARSER_CHUNK_SIZE) {
3418 count = 0;
3419 GROW;
3420 if (ctxt->instate == XML_PARSER_EOF)
3421 return(NULL);
3423 if (len <= INT_MAX - l)
3424 len += l;
3425 NEXTL(l);
3426 c = CUR_CHAR(l);
3427 if (c == 0) {
3428 count = 0;
3430 * when shrinking to extend the buffer we really need to preserve
3431 * the part of the name we already parsed. Hence rolling back
3432 * by current length.
3434 ctxt->input->cur -= l;
3435 GROW;
3436 if (ctxt->instate == XML_PARSER_EOF)
3437 return(NULL);
3438 ctxt->input->cur += l;
3439 c = CUR_CHAR(l);
3442 if (len > maxLength) {
3443 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3444 return(NULL);
3446 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3450 * xmlParseNCName:
3451 * @ctxt: an XML parser context
3452 * @len: length of the string parsed
3454 * parse an XML name.
3456 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3457 * CombiningChar | Extender
3459 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3461 * Returns the Name parsed or NULL
3464 static const xmlChar *
3465 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3466 const xmlChar *in, *e;
3467 const xmlChar *ret;
3468 size_t count = 0;
3469 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3470 XML_MAX_TEXT_LENGTH :
3471 XML_MAX_NAME_LENGTH;
3473 #ifdef DEBUG
3474 nbParseNCName++;
3475 #endif
3478 * Accelerator for simple ASCII names
3480 in = ctxt->input->cur;
3481 e = ctxt->input->end;
3482 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483 ((*in >= 0x41) && (*in <= 0x5A)) ||
3484 (*in == '_')) && (in < e)) {
3485 in++;
3486 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3487 ((*in >= 0x41) && (*in <= 0x5A)) ||
3488 ((*in >= 0x30) && (*in <= 0x39)) ||
3489 (*in == '_') || (*in == '-') ||
3490 (*in == '.')) && (in < e))
3491 in++;
3492 if (in >= e)
3493 goto complex;
3494 if ((*in > 0) && (*in < 0x80)) {
3495 count = in - ctxt->input->cur;
3496 if (count > maxLength) {
3497 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3498 return(NULL);
3500 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501 ctxt->input->cur = in;
3502 ctxt->input->col += count;
3503 if (ret == NULL) {
3504 xmlErrMemory(ctxt, NULL);
3506 return(ret);
3509 complex:
3510 return(xmlParseNCNameComplex(ctxt));
3514 * xmlParseNameAndCompare:
3515 * @ctxt: an XML parser context
3517 * parse an XML name and compares for match
3518 * (specialized for endtag parsing)
3520 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3521 * and the name for mismatch
3524 static const xmlChar *
3525 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3526 register const xmlChar *cmp = other;
3527 register const xmlChar *in;
3528 const xmlChar *ret;
3530 GROW;
3531 if (ctxt->instate == XML_PARSER_EOF)
3532 return(NULL);
3534 in = ctxt->input->cur;
3535 while (*in != 0 && *in == *cmp) {
3536 ++in;
3537 ++cmp;
3539 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3540 /* success */
3541 ctxt->input->col += in - ctxt->input->cur;
3542 ctxt->input->cur = in;
3543 return (const xmlChar*) 1;
3545 /* failure (or end of input buffer), check with full function */
3546 ret = xmlParseName (ctxt);
3547 /* strings coming from the dictionary direct compare possible */
3548 if (ret == other) {
3549 return (const xmlChar*) 1;
3551 return ret;
3555 * xmlParseStringName:
3556 * @ctxt: an XML parser context
3557 * @str: a pointer to the string pointer (IN/OUT)
3559 * parse an XML name.
3561 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562 * CombiningChar | Extender
3564 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3566 * [6] Names ::= Name (#x20 Name)*
3568 * Returns the Name parsed or NULL. The @str pointer
3569 * is updated to the current location in the string.
3572 static xmlChar *
3573 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3574 xmlChar buf[XML_MAX_NAMELEN + 5];
3575 const xmlChar *cur = *str;
3576 int len = 0, l;
3577 int c;
3578 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3579 XML_MAX_TEXT_LENGTH :
3580 XML_MAX_NAME_LENGTH;
3582 #ifdef DEBUG
3583 nbParseStringName++;
3584 #endif
3586 c = CUR_SCHAR(cur, l);
3587 if (!xmlIsNameStartChar(ctxt, c)) {
3588 return(NULL);
3591 COPY_BUF(l,buf,len,c);
3592 cur += l;
3593 c = CUR_SCHAR(cur, l);
3594 while (xmlIsNameChar(ctxt, c)) {
3595 COPY_BUF(l,buf,len,c);
3596 cur += l;
3597 c = CUR_SCHAR(cur, l);
3598 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3600 * Okay someone managed to make a huge name, so he's ready to pay
3601 * for the processing speed.
3603 xmlChar *buffer;
3604 int max = len * 2;
3606 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3607 if (buffer == NULL) {
3608 xmlErrMemory(ctxt, NULL);
3609 return(NULL);
3611 memcpy(buffer, buf, len);
3612 while (xmlIsNameChar(ctxt, c)) {
3613 if (len + 10 > max) {
3614 xmlChar *tmp;
3616 max *= 2;
3617 tmp = (xmlChar *) xmlRealloc(buffer,
3618 max * sizeof(xmlChar));
3619 if (tmp == NULL) {
3620 xmlErrMemory(ctxt, NULL);
3621 xmlFree(buffer);
3622 return(NULL);
3624 buffer = tmp;
3626 COPY_BUF(l,buffer,len,c);
3627 cur += l;
3628 c = CUR_SCHAR(cur, l);
3629 if (len > maxLength) {
3630 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3631 xmlFree(buffer);
3632 return(NULL);
3635 buffer[len] = 0;
3636 *str = cur;
3637 return(buffer);
3640 if (len > maxLength) {
3641 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3642 return(NULL);
3644 *str = cur;
3645 return(xmlStrndup(buf, len));
3649 * xmlParseNmtoken:
3650 * @ctxt: an XML parser context
3652 * parse an XML Nmtoken.
3654 * [7] Nmtoken ::= (NameChar)+
3656 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3658 * Returns the Nmtoken parsed or NULL
3661 xmlChar *
3662 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3663 xmlChar buf[XML_MAX_NAMELEN + 5];
3664 int len = 0, l;
3665 int c;
3666 int count = 0;
3667 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3668 XML_MAX_TEXT_LENGTH :
3669 XML_MAX_NAME_LENGTH;
3671 #ifdef DEBUG
3672 nbParseNmToken++;
3673 #endif
3675 GROW;
3676 if (ctxt->instate == XML_PARSER_EOF)
3677 return(NULL);
3678 c = CUR_CHAR(l);
3680 while (xmlIsNameChar(ctxt, c)) {
3681 if (count++ > XML_PARSER_CHUNK_SIZE) {
3682 count = 0;
3683 GROW;
3685 COPY_BUF(l,buf,len,c);
3686 NEXTL(l);
3687 c = CUR_CHAR(l);
3688 if (c == 0) {
3689 count = 0;
3690 GROW;
3691 if (ctxt->instate == XML_PARSER_EOF)
3692 return(NULL);
3693 c = CUR_CHAR(l);
3695 if (len >= XML_MAX_NAMELEN) {
3697 * Okay someone managed to make a huge token, so he's ready to pay
3698 * for the processing speed.
3700 xmlChar *buffer;
3701 int max = len * 2;
3703 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3704 if (buffer == NULL) {
3705 xmlErrMemory(ctxt, NULL);
3706 return(NULL);
3708 memcpy(buffer, buf, len);
3709 while (xmlIsNameChar(ctxt, c)) {
3710 if (count++ > XML_PARSER_CHUNK_SIZE) {
3711 count = 0;
3712 GROW;
3713 if (ctxt->instate == XML_PARSER_EOF) {
3714 xmlFree(buffer);
3715 return(NULL);
3718 if (len + 10 > max) {
3719 xmlChar *tmp;
3721 max *= 2;
3722 tmp = (xmlChar *) xmlRealloc(buffer,
3723 max * sizeof(xmlChar));
3724 if (tmp == NULL) {
3725 xmlErrMemory(ctxt, NULL);
3726 xmlFree(buffer);
3727 return(NULL);
3729 buffer = tmp;
3731 COPY_BUF(l,buffer,len,c);
3732 NEXTL(l);
3733 c = CUR_CHAR(l);
3734 if (len > maxLength) {
3735 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3736 xmlFree(buffer);
3737 return(NULL);
3740 buffer[len] = 0;
3741 return(buffer);
3744 if (len == 0)
3745 return(NULL);
3746 if (len > maxLength) {
3747 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3748 return(NULL);
3750 return(xmlStrndup(buf, len));
3754 * xmlParseEntityValue:
3755 * @ctxt: an XML parser context
3756 * @orig: if non-NULL store a copy of the original entity value
3758 * parse a value for ENTITY declarations
3760 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3761 * "'" ([^%&'] | PEReference | Reference)* "'"
3763 * Returns the EntityValue parsed with reference substituted or NULL
3766 xmlChar *
3767 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3768 xmlChar *buf = NULL;
3769 int len = 0;
3770 int size = XML_PARSER_BUFFER_SIZE;
3771 int c, l;
3772 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3773 XML_MAX_HUGE_LENGTH :
3774 XML_MAX_TEXT_LENGTH;
3775 xmlChar stop;
3776 xmlChar *ret = NULL;
3777 const xmlChar *cur = NULL;
3778 xmlParserInputPtr input;
3780 if (RAW == '"') stop = '"';
3781 else if (RAW == '\'') stop = '\'';
3782 else {
3783 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3784 return(NULL);
3786 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3787 if (buf == NULL) {
3788 xmlErrMemory(ctxt, NULL);
3789 return(NULL);
3793 * The content of the entity definition is copied in a buffer.
3796 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3797 input = ctxt->input;
3798 GROW;
3799 if (ctxt->instate == XML_PARSER_EOF)
3800 goto error;
3801 NEXT;
3802 c = CUR_CHAR(l);
3804 * NOTE: 4.4.5 Included in Literal
3805 * When a parameter entity reference appears in a literal entity
3806 * value, ... a single or double quote character in the replacement
3807 * text is always treated as a normal data character and will not
3808 * terminate the literal.
3809 * In practice it means we stop the loop only when back at parsing
3810 * the initial entity and the quote is found
3812 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3813 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3814 if (len + 5 >= size) {
3815 xmlChar *tmp;
3817 size *= 2;
3818 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3819 if (tmp == NULL) {
3820 xmlErrMemory(ctxt, NULL);
3821 goto error;
3823 buf = tmp;
3825 COPY_BUF(l,buf,len,c);
3826 NEXTL(l);
3828 GROW;
3829 c = CUR_CHAR(l);
3830 if (c == 0) {
3831 GROW;
3832 c = CUR_CHAR(l);
3835 if (len > maxLength) {
3836 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3837 "entity value too long\n");
3838 goto error;
3841 buf[len] = 0;
3842 if (ctxt->instate == XML_PARSER_EOF)
3843 goto error;
3844 if (c != stop) {
3845 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3846 goto error;
3848 NEXT;
3851 * Raise problem w.r.t. '&' and '%' being used in non-entities
3852 * reference constructs. Note Charref will be handled in
3853 * xmlStringDecodeEntities()
3855 cur = buf;
3856 while (*cur != 0) { /* non input consuming */
3857 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3858 xmlChar *name;
3859 xmlChar tmp = *cur;
3860 int nameOk = 0;
3862 cur++;
3863 name = xmlParseStringName(ctxt, &cur);
3864 if (name != NULL) {
3865 nameOk = 1;
3866 xmlFree(name);
3868 if ((nameOk == 0) || (*cur != ';')) {
3869 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3870 "EntityValue: '%c' forbidden except for entities references\n",
3871 tmp);
3872 goto error;
3874 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3875 (ctxt->inputNr == 1)) {
3876 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3877 goto error;
3879 if (*cur == 0)
3880 break;
3882 cur++;
3886 * Then PEReference entities are substituted.
3888 * NOTE: 4.4.7 Bypassed
3889 * When a general entity reference appears in the EntityValue in
3890 * an entity declaration, it is bypassed and left as is.
3891 * so XML_SUBSTITUTE_REF is not set here.
3893 ++ctxt->depth;
3894 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3895 0, 0, 0);
3896 --ctxt->depth;
3897 if (orig != NULL) {
3898 *orig = buf;
3899 buf = NULL;
3902 error:
3903 if (buf != NULL)
3904 xmlFree(buf);
3905 return(ret);
3909 * xmlParseAttValueComplex:
3910 * @ctxt: an XML parser context
3911 * @len: the resulting attribute len
3912 * @normalize: whether to apply the inner normalization
3914 * parse a value for an attribute, this is the fallback function
3915 * of xmlParseAttValue() when the attribute parsing requires handling
3916 * of non-ASCII characters, or normalization compaction.
3918 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3920 static xmlChar *
3921 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3922 xmlChar limit = 0;
3923 xmlChar *buf = NULL;
3924 xmlChar *rep = NULL;
3925 size_t len = 0;
3926 size_t buf_size = 0;
3927 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3928 XML_MAX_HUGE_LENGTH :
3929 XML_MAX_TEXT_LENGTH;
3930 int c, l, in_space = 0;
3931 xmlChar *current = NULL;
3932 xmlEntityPtr ent;
3934 if (NXT(0) == '"') {
3935 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3936 limit = '"';
3937 NEXT;
3938 } else if (NXT(0) == '\'') {
3939 limit = '\'';
3940 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3941 NEXT;
3942 } else {
3943 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3944 return(NULL);
3948 * allocate a translation buffer.
3950 buf_size = XML_PARSER_BUFFER_SIZE;
3951 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3952 if (buf == NULL) goto mem_error;
3955 * OK loop until we reach one of the ending char or a size limit.
3957 c = CUR_CHAR(l);
3958 while (((NXT(0) != limit) && /* checked */
3959 (IS_CHAR(c)) && (c != '<')) &&
3960 (ctxt->instate != XML_PARSER_EOF)) {
3961 if (c == '&') {
3962 in_space = 0;
3963 if (NXT(1) == '#') {
3964 int val = xmlParseCharRef(ctxt);
3966 if (val == '&') {
3967 if (ctxt->replaceEntities) {
3968 if (len + 10 > buf_size) {
3969 growBuffer(buf, 10);
3971 buf[len++] = '&';
3972 } else {
3974 * The reparsing will be done in xmlStringGetNodeList()
3975 * called by the attribute() function in SAX.c
3977 if (len + 10 > buf_size) {
3978 growBuffer(buf, 10);
3980 buf[len++] = '&';
3981 buf[len++] = '#';
3982 buf[len++] = '3';
3983 buf[len++] = '8';
3984 buf[len++] = ';';
3986 } else if (val != 0) {
3987 if (len + 10 > buf_size) {
3988 growBuffer(buf, 10);
3990 len += xmlCopyChar(0, &buf[len], val);
3992 } else {
3993 ent = xmlParseEntityRef(ctxt);
3994 ctxt->nbentities++;
3995 if (ent != NULL)
3996 ctxt->nbentities += ent->owner;
3997 if ((ent != NULL) &&
3998 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3999 if (len + 10 > buf_size) {
4000 growBuffer(buf, 10);
4002 if ((ctxt->replaceEntities == 0) &&
4003 (ent->content[0] == '&')) {
4004 buf[len++] = '&';
4005 buf[len++] = '#';
4006 buf[len++] = '3';
4007 buf[len++] = '8';
4008 buf[len++] = ';';
4009 } else {
4010 buf[len++] = ent->content[0];
4012 } else if ((ent != NULL) &&
4013 (ctxt->replaceEntities != 0)) {
4014 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4015 ++ctxt->depth;
4016 rep = xmlStringDecodeEntities(ctxt, ent->content,
4017 XML_SUBSTITUTE_REF,
4018 0, 0, 0);
4019 --ctxt->depth;
4020 if (rep != NULL) {
4021 current = rep;
4022 while (*current != 0) { /* non input consuming */
4023 if ((*current == 0xD) || (*current == 0xA) ||
4024 (*current == 0x9)) {
4025 buf[len++] = 0x20;
4026 current++;
4027 } else
4028 buf[len++] = *current++;
4029 if (len + 10 > buf_size) {
4030 growBuffer(buf, 10);
4033 xmlFree(rep);
4034 rep = NULL;
4036 } else {
4037 if (len + 10 > buf_size) {
4038 growBuffer(buf, 10);
4040 if (ent->content != NULL)
4041 buf[len++] = ent->content[0];
4043 } else if (ent != NULL) {
4044 int i = xmlStrlen(ent->name);
4045 const xmlChar *cur = ent->name;
4048 * This may look absurd but is needed to detect
4049 * entities problems
4051 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4052 (ent->content != NULL) && (ent->checked == 0)) {
4053 unsigned long oldnbent = ctxt->nbentities, diff;
4055 ++ctxt->depth;
4056 rep = xmlStringDecodeEntities(ctxt, ent->content,
4057 XML_SUBSTITUTE_REF, 0, 0, 0);
4058 --ctxt->depth;
4060 diff = ctxt->nbentities - oldnbent + 1;
4061 if (diff > INT_MAX / 2)
4062 diff = INT_MAX / 2;
4063 ent->checked = diff * 2;
4064 if (rep != NULL) {
4065 if (xmlStrchr(rep, '<'))
4066 ent->checked |= 1;
4067 xmlFree(rep);
4068 rep = NULL;
4069 } else {
4070 ent->content[0] = 0;
4075 * Just output the reference
4077 buf[len++] = '&';
4078 while (len + i + 10 > buf_size) {
4079 growBuffer(buf, i + 10);
4081 for (;i > 0;i--)
4082 buf[len++] = *cur++;
4083 buf[len++] = ';';
4086 } else {
4087 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4088 if ((len != 0) || (!normalize)) {
4089 if ((!normalize) || (!in_space)) {
4090 COPY_BUF(l,buf,len,0x20);
4091 while (len + 10 > buf_size) {
4092 growBuffer(buf, 10);
4095 in_space = 1;
4097 } else {
4098 in_space = 0;
4099 COPY_BUF(l,buf,len,c);
4100 if (len + 10 > buf_size) {
4101 growBuffer(buf, 10);
4104 NEXTL(l);
4106 GROW;
4107 c = CUR_CHAR(l);
4108 if (len > maxLength) {
4109 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4110 "AttValue length too long\n");
4111 goto mem_error;
4114 if (ctxt->instate == XML_PARSER_EOF)
4115 goto error;
4117 if ((in_space) && (normalize)) {
4118 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4120 buf[len] = 0;
4121 if (RAW == '<') {
4122 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4123 } else if (RAW != limit) {
4124 if ((c != 0) && (!IS_CHAR(c))) {
4125 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4126 "invalid character in attribute value\n");
4127 } else {
4128 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4129 "AttValue: ' expected\n");
4131 } else
4132 NEXT;
4134 if (attlen != NULL) *attlen = (int) len;
4135 return(buf);
4137 mem_error:
4138 xmlErrMemory(ctxt, NULL);
4139 error:
4140 if (buf != NULL)
4141 xmlFree(buf);
4142 if (rep != NULL)
4143 xmlFree(rep);
4144 return(NULL);
4148 * xmlParseAttValue:
4149 * @ctxt: an XML parser context
4151 * parse a value for an attribute
4152 * Note: the parser won't do substitution of entities here, this
4153 * will be handled later in xmlStringGetNodeList
4155 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4156 * "'" ([^<&'] | Reference)* "'"
4158 * 3.3.3 Attribute-Value Normalization:
4159 * Before the value of an attribute is passed to the application or
4160 * checked for validity, the XML processor must normalize it as follows:
4161 * - a character reference is processed by appending the referenced
4162 * character to the attribute value
4163 * - an entity reference is processed by recursively processing the
4164 * replacement text of the entity
4165 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4166 * appending #x20 to the normalized value, except that only a single
4167 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4168 * parsed entity or the literal entity value of an internal parsed entity
4169 * - other characters are processed by appending them to the normalized value
4170 * If the declared value is not CDATA, then the XML processor must further
4171 * process the normalized attribute value by discarding any leading and
4172 * trailing space (#x20) characters, and by replacing sequences of space
4173 * (#x20) characters by a single space (#x20) character.
4174 * All attributes for which no declaration has been read should be treated
4175 * by a non-validating parser as if declared CDATA.
4177 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4181 xmlChar *
4182 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4183 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4184 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4188 * xmlParseSystemLiteral:
4189 * @ctxt: an XML parser context
4191 * parse an XML Literal
4193 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4195 * Returns the SystemLiteral parsed or NULL
4198 xmlChar *
4199 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4200 xmlChar *buf = NULL;
4201 int len = 0;
4202 int size = XML_PARSER_BUFFER_SIZE;
4203 int cur, l;
4204 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4205 XML_MAX_TEXT_LENGTH :
4206 XML_MAX_NAME_LENGTH;
4207 xmlChar stop;
4208 int state = ctxt->instate;
4209 int count = 0;
4211 SHRINK;
4212 if (RAW == '"') {
4213 NEXT;
4214 stop = '"';
4215 } else if (RAW == '\'') {
4216 NEXT;
4217 stop = '\'';
4218 } else {
4219 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4220 return(NULL);
4223 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4224 if (buf == NULL) {
4225 xmlErrMemory(ctxt, NULL);
4226 return(NULL);
4228 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4229 cur = CUR_CHAR(l);
4230 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4231 if (len + 5 >= size) {
4232 xmlChar *tmp;
4234 size *= 2;
4235 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4236 if (tmp == NULL) {
4237 xmlFree(buf);
4238 xmlErrMemory(ctxt, NULL);
4239 ctxt->instate = (xmlParserInputState) state;
4240 return(NULL);
4242 buf = tmp;
4244 count++;
4245 if (count > 50) {
4246 SHRINK;
4247 GROW;
4248 count = 0;
4249 if (ctxt->instate == XML_PARSER_EOF) {
4250 xmlFree(buf);
4251 return(NULL);
4254 COPY_BUF(l,buf,len,cur);
4255 NEXTL(l);
4256 cur = CUR_CHAR(l);
4257 if (cur == 0) {
4258 GROW;
4259 SHRINK;
4260 cur = CUR_CHAR(l);
4262 if (len > maxLength) {
4263 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4264 xmlFree(buf);
4265 ctxt->instate = (xmlParserInputState) state;
4266 return(NULL);
4269 buf[len] = 0;
4270 ctxt->instate = (xmlParserInputState) state;
4271 if (!IS_CHAR(cur)) {
4272 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4273 } else {
4274 NEXT;
4276 return(buf);
4280 * xmlParsePubidLiteral:
4281 * @ctxt: an XML parser context
4283 * parse an XML public literal
4285 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4287 * Returns the PubidLiteral parsed or NULL.
4290 xmlChar *
4291 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4292 xmlChar *buf = NULL;
4293 int len = 0;
4294 int size = XML_PARSER_BUFFER_SIZE;
4295 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4296 XML_MAX_TEXT_LENGTH :
4297 XML_MAX_NAME_LENGTH;
4298 xmlChar cur;
4299 xmlChar stop;
4300 int count = 0;
4301 xmlParserInputState oldstate = ctxt->instate;
4303 SHRINK;
4304 if (RAW == '"') {
4305 NEXT;
4306 stop = '"';
4307 } else if (RAW == '\'') {
4308 NEXT;
4309 stop = '\'';
4310 } else {
4311 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4312 return(NULL);
4314 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4315 if (buf == NULL) {
4316 xmlErrMemory(ctxt, NULL);
4317 return(NULL);
4319 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4320 cur = CUR;
4321 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4322 if (len + 1 >= size) {
4323 xmlChar *tmp;
4325 size *= 2;
4326 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4327 if (tmp == NULL) {
4328 xmlErrMemory(ctxt, NULL);
4329 xmlFree(buf);
4330 return(NULL);
4332 buf = tmp;
4334 buf[len++] = cur;
4335 count++;
4336 if (count > 50) {
4337 SHRINK;
4338 GROW;
4339 count = 0;
4340 if (ctxt->instate == XML_PARSER_EOF) {
4341 xmlFree(buf);
4342 return(NULL);
4345 NEXT;
4346 cur = CUR;
4347 if (cur == 0) {
4348 GROW;
4349 SHRINK;
4350 cur = CUR;
4352 if (len > maxLength) {
4353 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4354 xmlFree(buf);
4355 return(NULL);
4358 buf[len] = 0;
4359 if (cur != stop) {
4360 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4361 } else {
4362 NEXT;
4364 ctxt->instate = oldstate;
4365 return(buf);
4368 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4371 * used for the test in the inner loop of the char data testing
4373 static const unsigned char test_char_data[256] = {
4374 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4375 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4376 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4377 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4378 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4379 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4380 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4381 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4382 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4383 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4384 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4385 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4386 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4387 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4388 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4389 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4409 * xmlParseCharData:
4410 * @ctxt: an XML parser context
4411 * @cdata: int indicating whether we are within a CDATA section
4413 * parse a CharData section.
4414 * if we are within a CDATA section ']]>' marks an end of section.
4416 * The right angle bracket (>) may be represented using the string "&gt;",
4417 * and must, for compatibility, be escaped using "&gt;" or a character
4418 * reference when it appears in the string "]]>" in content, when that
4419 * string is not marking the end of a CDATA section.
4421 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4424 void
4425 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4426 const xmlChar *in;
4427 int nbchar = 0;
4428 int line = ctxt->input->line;
4429 int col = ctxt->input->col;
4430 int ccol;
4432 SHRINK;
4433 GROW;
4435 * Accelerated common case where input don't need to be
4436 * modified before passing it to the handler.
4438 if (!cdata) {
4439 in = ctxt->input->cur;
4440 do {
4441 get_more_space:
4442 while (*in == 0x20) { in++; ctxt->input->col++; }
4443 if (*in == 0xA) {
4444 do {
4445 ctxt->input->line++; ctxt->input->col = 1;
4446 in++;
4447 } while (*in == 0xA);
4448 goto get_more_space;
4450 if (*in == '<') {
4451 nbchar = in - ctxt->input->cur;
4452 if (nbchar > 0) {
4453 const xmlChar *tmp = ctxt->input->cur;
4454 ctxt->input->cur = in;
4456 if ((ctxt->sax != NULL) &&
4457 (ctxt->sax->ignorableWhitespace !=
4458 ctxt->sax->characters)) {
4459 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4460 if (ctxt->sax->ignorableWhitespace != NULL)
4461 ctxt->sax->ignorableWhitespace(ctxt->userData,
4462 tmp, nbchar);
4463 } else {
4464 if (ctxt->sax->characters != NULL)
4465 ctxt->sax->characters(ctxt->userData,
4466 tmp, nbchar);
4467 if (*ctxt->space == -1)
4468 *ctxt->space = -2;
4470 } else if ((ctxt->sax != NULL) &&
4471 (ctxt->sax->characters != NULL)) {
4472 ctxt->sax->characters(ctxt->userData,
4473 tmp, nbchar);
4476 return;
4479 get_more:
4480 ccol = ctxt->input->col;
4481 while (test_char_data[*in]) {
4482 in++;
4483 ccol++;
4485 ctxt->input->col = ccol;
4486 if (*in == 0xA) {
4487 do {
4488 ctxt->input->line++; ctxt->input->col = 1;
4489 in++;
4490 } while (*in == 0xA);
4491 goto get_more;
4493 if (*in == ']') {
4494 if ((in[1] == ']') && (in[2] == '>')) {
4495 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4496 ctxt->input->cur = in + 1;
4497 return;
4499 in++;
4500 ctxt->input->col++;
4501 goto get_more;
4503 nbchar = in - ctxt->input->cur;
4504 if (nbchar > 0) {
4505 if ((ctxt->sax != NULL) &&
4506 (ctxt->sax->ignorableWhitespace !=
4507 ctxt->sax->characters) &&
4508 (IS_BLANK_CH(*ctxt->input->cur))) {
4509 const xmlChar *tmp = ctxt->input->cur;
4510 ctxt->input->cur = in;
4512 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4513 if (ctxt->sax->ignorableWhitespace != NULL)
4514 ctxt->sax->ignorableWhitespace(ctxt->userData,
4515 tmp, nbchar);
4516 } else {
4517 if (ctxt->sax->characters != NULL)
4518 ctxt->sax->characters(ctxt->userData,
4519 tmp, nbchar);
4520 if (*ctxt->space == -1)
4521 *ctxt->space = -2;
4523 line = ctxt->input->line;
4524 col = ctxt->input->col;
4525 } else if (ctxt->sax != NULL) {
4526 if (ctxt->sax->characters != NULL)
4527 ctxt->sax->characters(ctxt->userData,
4528 ctxt->input->cur, nbchar);
4529 line = ctxt->input->line;
4530 col = ctxt->input->col;
4532 /* something really bad happened in the SAX callback */
4533 if (ctxt->instate != XML_PARSER_CONTENT)
4534 return;
4536 ctxt->input->cur = in;
4537 if (*in == 0xD) {
4538 in++;
4539 if (*in == 0xA) {
4540 ctxt->input->cur = in;
4541 in++;
4542 ctxt->input->line++; ctxt->input->col = 1;
4543 continue; /* while */
4545 in--;
4547 if (*in == '<') {
4548 return;
4550 if (*in == '&') {
4551 return;
4553 SHRINK;
4554 GROW;
4555 if (ctxt->instate == XML_PARSER_EOF)
4556 return;
4557 in = ctxt->input->cur;
4558 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4559 nbchar = 0;
4561 ctxt->input->line = line;
4562 ctxt->input->col = col;
4563 xmlParseCharDataComplex(ctxt, cdata);
4567 * xmlParseCharDataComplex:
4568 * @ctxt: an XML parser context
4569 * @cdata: int indicating whether we are within a CDATA section
4571 * parse a CharData section.this is the fallback function
4572 * of xmlParseCharData() when the parsing requires handling
4573 * of non-ASCII characters.
4575 static void
4576 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4577 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4578 int nbchar = 0;
4579 int cur, l;
4580 int count = 0;
4582 SHRINK;
4583 GROW;
4584 cur = CUR_CHAR(l);
4585 while ((cur != '<') && /* checked */
4586 (cur != '&') &&
4587 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4588 if ((cur == ']') && (NXT(1) == ']') &&
4589 (NXT(2) == '>')) {
4590 if (cdata) break;
4591 else {
4592 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4595 COPY_BUF(l,buf,nbchar,cur);
4596 /* move current position before possible calling of ctxt->sax->characters */
4597 NEXTL(l);
4598 cur = CUR_CHAR(l);
4599 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4600 buf[nbchar] = 0;
4603 * OK the segment is to be consumed as chars.
4605 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4606 if (areBlanks(ctxt, buf, nbchar, 0)) {
4607 if (ctxt->sax->ignorableWhitespace != NULL)
4608 ctxt->sax->ignorableWhitespace(ctxt->userData,
4609 buf, nbchar);
4610 } else {
4611 if (ctxt->sax->characters != NULL)
4612 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4613 if ((ctxt->sax->characters !=
4614 ctxt->sax->ignorableWhitespace) &&
4615 (*ctxt->space == -1))
4616 *ctxt->space = -2;
4619 nbchar = 0;
4620 /* something really bad happened in the SAX callback */
4621 if (ctxt->instate != XML_PARSER_CONTENT)
4622 return;
4624 count++;
4625 if (count > 50) {
4626 SHRINK;
4627 GROW;
4628 count = 0;
4629 if (ctxt->instate == XML_PARSER_EOF)
4630 return;
4633 if (nbchar != 0) {
4634 buf[nbchar] = 0;
4636 * OK the segment is to be consumed as chars.
4638 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4639 if (areBlanks(ctxt, buf, nbchar, 0)) {
4640 if (ctxt->sax->ignorableWhitespace != NULL)
4641 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4642 } else {
4643 if (ctxt->sax->characters != NULL)
4644 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4645 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4646 (*ctxt->space == -1))
4647 *ctxt->space = -2;
4651 if ((cur != 0) && (!IS_CHAR(cur))) {
4652 /* Generate the error and skip the offending character */
4653 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4654 "PCDATA invalid Char value %d\n",
4655 cur);
4656 NEXTL(l);
4661 * xmlParseExternalID:
4662 * @ctxt: an XML parser context
4663 * @publicID: a xmlChar** receiving PubidLiteral
4664 * @strict: indicate whether we should restrict parsing to only
4665 * production [75], see NOTE below
4667 * Parse an External ID or a Public ID
4669 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4670 * 'PUBLIC' S PubidLiteral S SystemLiteral
4672 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4673 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4675 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4677 * Returns the function returns SystemLiteral and in the second
4678 * case publicID receives PubidLiteral, is strict is off
4679 * it is possible to return NULL and have publicID set.
4682 xmlChar *
4683 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4684 xmlChar *URI = NULL;
4686 SHRINK;
4688 *publicID = NULL;
4689 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4690 SKIP(6);
4691 if (SKIP_BLANKS == 0) {
4692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693 "Space required after 'SYSTEM'\n");
4695 URI = xmlParseSystemLiteral(ctxt);
4696 if (URI == NULL) {
4697 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4699 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4700 SKIP(6);
4701 if (SKIP_BLANKS == 0) {
4702 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703 "Space required after 'PUBLIC'\n");
4705 *publicID = xmlParsePubidLiteral(ctxt);
4706 if (*publicID == NULL) {
4707 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4709 if (strict) {
4711 * We don't handle [83] so "S SystemLiteral" is required.
4713 if (SKIP_BLANKS == 0) {
4714 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4715 "Space required after the Public Identifier\n");
4717 } else {
4719 * We handle [83] so we return immediately, if
4720 * "S SystemLiteral" is not detected. We skip blanks if no
4721 * system literal was found, but this is harmless since we must
4722 * be at the end of a NotationDecl.
4724 if (SKIP_BLANKS == 0) return(NULL);
4725 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4727 URI = xmlParseSystemLiteral(ctxt);
4728 if (URI == NULL) {
4729 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4732 return(URI);
4736 * xmlParseCommentComplex:
4737 * @ctxt: an XML parser context
4738 * @buf: the already parsed part of the buffer
4739 * @len: number of bytes in the buffer
4740 * @size: allocated size of the buffer
4742 * Skip an XML (SGML) comment <!-- .... -->
4743 * The spec says that "For compatibility, the string "--" (double-hyphen)
4744 * must not occur within comments. "
4745 * This is the slow routine in case the accelerator for ascii didn't work
4747 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4749 static void
4750 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4751 size_t len, size_t size) {
4752 int q, ql;
4753 int r, rl;
4754 int cur, l;
4755 size_t count = 0;
4756 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4757 XML_MAX_HUGE_LENGTH :
4758 XML_MAX_TEXT_LENGTH;
4759 int inputid;
4761 inputid = ctxt->input->id;
4763 if (buf == NULL) {
4764 len = 0;
4765 size = XML_PARSER_BUFFER_SIZE;
4766 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4767 if (buf == NULL) {
4768 xmlErrMemory(ctxt, NULL);
4769 return;
4772 GROW; /* Assure there's enough input data */
4773 q = CUR_CHAR(ql);
4774 if (q == 0)
4775 goto not_terminated;
4776 if (!IS_CHAR(q)) {
4777 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4778 "xmlParseComment: invalid xmlChar value %d\n",
4780 xmlFree (buf);
4781 return;
4783 NEXTL(ql);
4784 r = CUR_CHAR(rl);
4785 if (r == 0)
4786 goto not_terminated;
4787 if (!IS_CHAR(r)) {
4788 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4789 "xmlParseComment: invalid xmlChar value %d\n",
4791 xmlFree (buf);
4792 return;
4794 NEXTL(rl);
4795 cur = CUR_CHAR(l);
4796 if (cur == 0)
4797 goto not_terminated;
4798 while (IS_CHAR(cur) && /* checked */
4799 ((cur != '>') ||
4800 (r != '-') || (q != '-'))) {
4801 if ((r == '-') && (q == '-')) {
4802 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4804 if (len + 5 >= size) {
4805 xmlChar *new_buf;
4806 size_t new_size;
4808 new_size = size * 2;
4809 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4810 if (new_buf == NULL) {
4811 xmlFree (buf);
4812 xmlErrMemory(ctxt, NULL);
4813 return;
4815 buf = new_buf;
4816 size = new_size;
4818 COPY_BUF(ql,buf,len,q);
4819 q = r;
4820 ql = rl;
4821 r = cur;
4822 rl = l;
4824 count++;
4825 if (count > 50) {
4826 SHRINK;
4827 GROW;
4828 count = 0;
4829 if (ctxt->instate == XML_PARSER_EOF) {
4830 xmlFree(buf);
4831 return;
4834 NEXTL(l);
4835 cur = CUR_CHAR(l);
4836 if (cur == 0) {
4837 SHRINK;
4838 GROW;
4839 cur = CUR_CHAR(l);
4842 if (len > maxLength) {
4843 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4844 "Comment too big found", NULL);
4845 xmlFree (buf);
4846 return;
4849 buf[len] = 0;
4850 if (cur == 0) {
4851 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4852 "Comment not terminated \n<!--%.50s\n", buf);
4853 } else if (!IS_CHAR(cur)) {
4854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4855 "xmlParseComment: invalid xmlChar value %d\n",
4856 cur);
4857 } else {
4858 if (inputid != ctxt->input->id) {
4859 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4860 "Comment doesn't start and stop in the same"
4861 " entity\n");
4863 NEXT;
4864 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4865 (!ctxt->disableSAX))
4866 ctxt->sax->comment(ctxt->userData, buf);
4868 xmlFree(buf);
4869 return;
4870 not_terminated:
4871 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872 "Comment not terminated\n", NULL);
4873 xmlFree(buf);
4874 return;
4878 * xmlParseComment:
4879 * @ctxt: an XML parser context
4881 * Skip an XML (SGML) comment <!-- .... -->
4882 * The spec says that "For compatibility, the string "--" (double-hyphen)
4883 * must not occur within comments. "
4885 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4887 void
4888 xmlParseComment(xmlParserCtxtPtr ctxt) {
4889 xmlChar *buf = NULL;
4890 size_t size = XML_PARSER_BUFFER_SIZE;
4891 size_t len = 0;
4892 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4893 XML_MAX_HUGE_LENGTH :
4894 XML_MAX_TEXT_LENGTH;
4895 xmlParserInputState state;
4896 const xmlChar *in;
4897 size_t nbchar = 0;
4898 int ccol;
4899 int inputid;
4902 * Check that there is a comment right here.
4904 if ((RAW != '<') || (NXT(1) != '!') ||
4905 (NXT(2) != '-') || (NXT(3) != '-')) return;
4906 state = ctxt->instate;
4907 ctxt->instate = XML_PARSER_COMMENT;
4908 inputid = ctxt->input->id;
4909 SKIP(4);
4910 SHRINK;
4911 GROW;
4914 * Accelerated common case where input don't need to be
4915 * modified before passing it to the handler.
4917 in = ctxt->input->cur;
4918 do {
4919 if (*in == 0xA) {
4920 do {
4921 ctxt->input->line++; ctxt->input->col = 1;
4922 in++;
4923 } while (*in == 0xA);
4925 get_more:
4926 ccol = ctxt->input->col;
4927 while (((*in > '-') && (*in <= 0x7F)) ||
4928 ((*in >= 0x20) && (*in < '-')) ||
4929 (*in == 0x09)) {
4930 in++;
4931 ccol++;
4933 ctxt->input->col = ccol;
4934 if (*in == 0xA) {
4935 do {
4936 ctxt->input->line++; ctxt->input->col = 1;
4937 in++;
4938 } while (*in == 0xA);
4939 goto get_more;
4941 nbchar = in - ctxt->input->cur;
4943 * save current set of data
4945 if (nbchar > 0) {
4946 if ((ctxt->sax != NULL) &&
4947 (ctxt->sax->comment != NULL)) {
4948 if (buf == NULL) {
4949 if ((*in == '-') && (in[1] == '-'))
4950 size = nbchar + 1;
4951 else
4952 size = XML_PARSER_BUFFER_SIZE + nbchar;
4953 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4954 if (buf == NULL) {
4955 xmlErrMemory(ctxt, NULL);
4956 ctxt->instate = state;
4957 return;
4959 len = 0;
4960 } else if (len + nbchar + 1 >= size) {
4961 xmlChar *new_buf;
4962 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4963 new_buf = (xmlChar *) xmlRealloc(buf,
4964 size * sizeof(xmlChar));
4965 if (new_buf == NULL) {
4966 xmlFree (buf);
4967 xmlErrMemory(ctxt, NULL);
4968 ctxt->instate = state;
4969 return;
4971 buf = new_buf;
4973 memcpy(&buf[len], ctxt->input->cur, nbchar);
4974 len += nbchar;
4975 buf[len] = 0;
4978 if (len > maxLength) {
4979 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4980 "Comment too big found", NULL);
4981 xmlFree (buf);
4982 return;
4984 ctxt->input->cur = in;
4985 if (*in == 0xA) {
4986 in++;
4987 ctxt->input->line++; ctxt->input->col = 1;
4989 if (*in == 0xD) {
4990 in++;
4991 if (*in == 0xA) {
4992 ctxt->input->cur = in;
4993 in++;
4994 ctxt->input->line++; ctxt->input->col = 1;
4995 goto get_more;
4997 in--;
4999 SHRINK;
5000 GROW;
5001 if (ctxt->instate == XML_PARSER_EOF) {
5002 xmlFree(buf);
5003 return;
5005 in = ctxt->input->cur;
5006 if (*in == '-') {
5007 if (in[1] == '-') {
5008 if (in[2] == '>') {
5009 if (ctxt->input->id != inputid) {
5010 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5011 "comment doesn't start and stop in the"
5012 " same entity\n");
5014 SKIP(3);
5015 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5016 (!ctxt->disableSAX)) {
5017 if (buf != NULL)
5018 ctxt->sax->comment(ctxt->userData, buf);
5019 else
5020 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5022 if (buf != NULL)
5023 xmlFree(buf);
5024 if (ctxt->instate != XML_PARSER_EOF)
5025 ctxt->instate = state;
5026 return;
5028 if (buf != NULL) {
5029 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5030 "Double hyphen within comment: "
5031 "<!--%.50s\n",
5032 buf);
5033 } else
5034 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5035 "Double hyphen within comment\n", NULL);
5036 if (ctxt->instate == XML_PARSER_EOF) {
5037 xmlFree(buf);
5038 return;
5040 in++;
5041 ctxt->input->col++;
5043 in++;
5044 ctxt->input->col++;
5045 goto get_more;
5047 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5048 xmlParseCommentComplex(ctxt, buf, len, size);
5049 ctxt->instate = state;
5050 return;
5055 * xmlParsePITarget:
5056 * @ctxt: an XML parser context
5058 * parse the name of a PI
5060 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5062 * Returns the PITarget name or NULL
5065 const xmlChar *
5066 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5067 const xmlChar *name;
5069 name = xmlParseName(ctxt);
5070 if ((name != NULL) &&
5071 ((name[0] == 'x') || (name[0] == 'X')) &&
5072 ((name[1] == 'm') || (name[1] == 'M')) &&
5073 ((name[2] == 'l') || (name[2] == 'L'))) {
5074 int i;
5075 if ((name[0] == 'x') && (name[1] == 'm') &&
5076 (name[2] == 'l') && (name[3] == 0)) {
5077 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5078 "XML declaration allowed only at the start of the document\n");
5079 return(name);
5080 } else if (name[3] == 0) {
5081 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5082 return(name);
5084 for (i = 0;;i++) {
5085 if (xmlW3CPIs[i] == NULL) break;
5086 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5087 return(name);
5089 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5090 "xmlParsePITarget: invalid name prefix 'xml'\n",
5091 NULL, NULL);
5093 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5094 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5095 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5097 return(name);
5100 #ifdef LIBXML_CATALOG_ENABLED
5102 * xmlParseCatalogPI:
5103 * @ctxt: an XML parser context
5104 * @catalog: the PI value string
5106 * parse an XML Catalog Processing Instruction.
5108 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5110 * Occurs only if allowed by the user and if happening in the Misc
5111 * part of the document before any doctype information
5112 * This will add the given catalog to the parsing context in order
5113 * to be used if there is a resolution need further down in the document
5116 static void
5117 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5118 xmlChar *URL = NULL;
5119 const xmlChar *tmp, *base;
5120 xmlChar marker;
5122 tmp = catalog;
5123 while (IS_BLANK_CH(*tmp)) tmp++;
5124 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5125 goto error;
5126 tmp += 7;
5127 while (IS_BLANK_CH(*tmp)) tmp++;
5128 if (*tmp != '=') {
5129 return;
5131 tmp++;
5132 while (IS_BLANK_CH(*tmp)) tmp++;
5133 marker = *tmp;
5134 if ((marker != '\'') && (marker != '"'))
5135 goto error;
5136 tmp++;
5137 base = tmp;
5138 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5139 if (*tmp == 0)
5140 goto error;
5141 URL = xmlStrndup(base, tmp - base);
5142 tmp++;
5143 while (IS_BLANK_CH(*tmp)) tmp++;
5144 if (*tmp != 0)
5145 goto error;
5147 if (URL != NULL) {
5148 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5149 xmlFree(URL);
5151 return;
5153 error:
5154 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5155 "Catalog PI syntax error: %s\n",
5156 catalog, NULL);
5157 if (URL != NULL)
5158 xmlFree(URL);
5160 #endif
5163 * xmlParsePI:
5164 * @ctxt: an XML parser context
5166 * parse an XML Processing Instruction.
5168 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5170 * The processing is transferred to SAX once parsed.
5173 void
5174 xmlParsePI(xmlParserCtxtPtr ctxt) {
5175 xmlChar *buf = NULL;
5176 size_t len = 0;
5177 size_t size = XML_PARSER_BUFFER_SIZE;
5178 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5179 XML_MAX_HUGE_LENGTH :
5180 XML_MAX_TEXT_LENGTH;
5181 int cur, l;
5182 const xmlChar *target;
5183 xmlParserInputState state;
5184 int count = 0;
5186 if ((RAW == '<') && (NXT(1) == '?')) {
5187 int inputid = ctxt->input->id;
5188 state = ctxt->instate;
5189 ctxt->instate = XML_PARSER_PI;
5191 * this is a Processing Instruction.
5193 SKIP(2);
5194 SHRINK;
5197 * Parse the target name and check for special support like
5198 * namespace.
5200 target = xmlParsePITarget(ctxt);
5201 if (target != NULL) {
5202 if ((RAW == '?') && (NXT(1) == '>')) {
5203 if (inputid != ctxt->input->id) {
5204 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5205 "PI declaration doesn't start and stop in"
5206 " the same entity\n");
5208 SKIP(2);
5211 * SAX: PI detected.
5213 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5214 (ctxt->sax->processingInstruction != NULL))
5215 ctxt->sax->processingInstruction(ctxt->userData,
5216 target, NULL);
5217 if (ctxt->instate != XML_PARSER_EOF)
5218 ctxt->instate = state;
5219 return;
5221 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5222 if (buf == NULL) {
5223 xmlErrMemory(ctxt, NULL);
5224 ctxt->instate = state;
5225 return;
5227 if (SKIP_BLANKS == 0) {
5228 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5229 "ParsePI: PI %s space expected\n", target);
5231 cur = CUR_CHAR(l);
5232 while (IS_CHAR(cur) && /* checked */
5233 ((cur != '?') || (NXT(1) != '>'))) {
5234 if (len + 5 >= size) {
5235 xmlChar *tmp;
5236 size_t new_size = size * 2;
5237 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5238 if (tmp == NULL) {
5239 xmlErrMemory(ctxt, NULL);
5240 xmlFree(buf);
5241 ctxt->instate = state;
5242 return;
5244 buf = tmp;
5245 size = new_size;
5247 count++;
5248 if (count > 50) {
5249 SHRINK;
5250 GROW;
5251 if (ctxt->instate == XML_PARSER_EOF) {
5252 xmlFree(buf);
5253 return;
5255 count = 0;
5257 COPY_BUF(l,buf,len,cur);
5258 NEXTL(l);
5259 cur = CUR_CHAR(l);
5260 if (cur == 0) {
5261 SHRINK;
5262 GROW;
5263 cur = CUR_CHAR(l);
5265 if (len > maxLength) {
5266 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5267 "PI %s too big found", target);
5268 xmlFree(buf);
5269 ctxt->instate = state;
5270 return;
5273 buf[len] = 0;
5274 if (cur != '?') {
5275 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5276 "ParsePI: PI %s never end ...\n", target);
5277 } else {
5278 if (inputid != ctxt->input->id) {
5279 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5280 "PI declaration doesn't start and stop in"
5281 " the same entity\n");
5283 SKIP(2);
5285 #ifdef LIBXML_CATALOG_ENABLED
5286 if (((state == XML_PARSER_MISC) ||
5287 (state == XML_PARSER_START)) &&
5288 (xmlStrEqual(target, XML_CATALOG_PI))) {
5289 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5290 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5291 (allow == XML_CATA_ALLOW_ALL))
5292 xmlParseCatalogPI(ctxt, buf);
5294 #endif
5298 * SAX: PI detected.
5300 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5301 (ctxt->sax->processingInstruction != NULL))
5302 ctxt->sax->processingInstruction(ctxt->userData,
5303 target, buf);
5305 xmlFree(buf);
5306 } else {
5307 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5309 if (ctxt->instate != XML_PARSER_EOF)
5310 ctxt->instate = state;
5315 * xmlParseNotationDecl:
5316 * @ctxt: an XML parser context
5318 * parse a notation declaration
5320 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5322 * Hence there is actually 3 choices:
5323 * 'PUBLIC' S PubidLiteral
5324 * 'PUBLIC' S PubidLiteral S SystemLiteral
5325 * and 'SYSTEM' S SystemLiteral
5327 * See the NOTE on xmlParseExternalID().
5330 void
5331 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5332 const xmlChar *name;
5333 xmlChar *Pubid;
5334 xmlChar *Systemid;
5336 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5337 int inputid = ctxt->input->id;
5338 SHRINK;
5339 SKIP(10);
5340 if (SKIP_BLANKS == 0) {
5341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5342 "Space required after '<!NOTATION'\n");
5343 return;
5346 name = xmlParseName(ctxt);
5347 if (name == NULL) {
5348 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5349 return;
5351 if (xmlStrchr(name, ':') != NULL) {
5352 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5353 "colons are forbidden from notation names '%s'\n",
5354 name, NULL, NULL);
5356 if (SKIP_BLANKS == 0) {
5357 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5358 "Space required after the NOTATION name'\n");
5359 return;
5363 * Parse the IDs.
5365 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5366 SKIP_BLANKS;
5368 if (RAW == '>') {
5369 if (inputid != ctxt->input->id) {
5370 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5371 "Notation declaration doesn't start and stop"
5372 " in the same entity\n");
5374 NEXT;
5375 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5376 (ctxt->sax->notationDecl != NULL))
5377 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5378 } else {
5379 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5381 if (Systemid != NULL) xmlFree(Systemid);
5382 if (Pubid != NULL) xmlFree(Pubid);
5387 * xmlParseEntityDecl:
5388 * @ctxt: an XML parser context
5390 * parse <!ENTITY declarations
5392 * [70] EntityDecl ::= GEDecl | PEDecl
5394 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5396 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5398 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5400 * [74] PEDef ::= EntityValue | ExternalID
5402 * [76] NDataDecl ::= S 'NDATA' S Name
5404 * [ VC: Notation Declared ]
5405 * The Name must match the declared name of a notation.
5408 void
5409 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5410 const xmlChar *name = NULL;
5411 xmlChar *value = NULL;
5412 xmlChar *URI = NULL, *literal = NULL;
5413 const xmlChar *ndata = NULL;
5414 int isParameter = 0;
5415 xmlChar *orig = NULL;
5417 /* GROW; done in the caller */
5418 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5419 int inputid = ctxt->input->id;
5420 SHRINK;
5421 SKIP(8);
5422 if (SKIP_BLANKS == 0) {
5423 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5424 "Space required after '<!ENTITY'\n");
5427 if (RAW == '%') {
5428 NEXT;
5429 if (SKIP_BLANKS == 0) {
5430 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431 "Space required after '%%'\n");
5433 isParameter = 1;
5436 name = xmlParseName(ctxt);
5437 if (name == NULL) {
5438 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5439 "xmlParseEntityDecl: no name\n");
5440 return;
5442 if (xmlStrchr(name, ':') != NULL) {
5443 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5444 "colons are forbidden from entities names '%s'\n",
5445 name, NULL, NULL);
5447 if (SKIP_BLANKS == 0) {
5448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449 "Space required after the entity name\n");
5452 ctxt->instate = XML_PARSER_ENTITY_DECL;
5454 * handle the various case of definitions...
5456 if (isParameter) {
5457 if ((RAW == '"') || (RAW == '\'')) {
5458 value = xmlParseEntityValue(ctxt, &orig);
5459 if (value) {
5460 if ((ctxt->sax != NULL) &&
5461 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5462 ctxt->sax->entityDecl(ctxt->userData, name,
5463 XML_INTERNAL_PARAMETER_ENTITY,
5464 NULL, NULL, value);
5466 } else {
5467 URI = xmlParseExternalID(ctxt, &literal, 1);
5468 if ((URI == NULL) && (literal == NULL)) {
5469 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5471 if (URI) {
5472 xmlURIPtr uri;
5474 uri = xmlParseURI((const char *) URI);
5475 if (uri == NULL) {
5476 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5477 "Invalid URI: %s\n", URI);
5479 * This really ought to be a well formedness error
5480 * but the XML Core WG decided otherwise c.f. issue
5481 * E26 of the XML erratas.
5483 } else {
5484 if (uri->fragment != NULL) {
5486 * Okay this is foolish to block those but not
5487 * invalid URIs.
5489 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5490 } else {
5491 if ((ctxt->sax != NULL) &&
5492 (!ctxt->disableSAX) &&
5493 (ctxt->sax->entityDecl != NULL))
5494 ctxt->sax->entityDecl(ctxt->userData, name,
5495 XML_EXTERNAL_PARAMETER_ENTITY,
5496 literal, URI, NULL);
5498 xmlFreeURI(uri);
5502 } else {
5503 if ((RAW == '"') || (RAW == '\'')) {
5504 value = xmlParseEntityValue(ctxt, &orig);
5505 if ((ctxt->sax != NULL) &&
5506 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5507 ctxt->sax->entityDecl(ctxt->userData, name,
5508 XML_INTERNAL_GENERAL_ENTITY,
5509 NULL, NULL, value);
5511 * For expat compatibility in SAX mode.
5513 if ((ctxt->myDoc == NULL) ||
5514 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5515 if (ctxt->myDoc == NULL) {
5516 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5517 if (ctxt->myDoc == NULL) {
5518 xmlErrMemory(ctxt, "New Doc failed");
5519 return;
5521 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5523 if (ctxt->myDoc->intSubset == NULL)
5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525 BAD_CAST "fake", NULL, NULL);
5527 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5528 NULL, NULL, value);
5530 } else {
5531 URI = xmlParseExternalID(ctxt, &literal, 1);
5532 if ((URI == NULL) && (literal == NULL)) {
5533 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5535 if (URI) {
5536 xmlURIPtr uri;
5538 uri = xmlParseURI((const char *)URI);
5539 if (uri == NULL) {
5540 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5541 "Invalid URI: %s\n", URI);
5543 * This really ought to be a well formedness error
5544 * but the XML Core WG decided otherwise c.f. issue
5545 * E26 of the XML erratas.
5547 } else {
5548 if (uri->fragment != NULL) {
5550 * Okay this is foolish to block those but not
5551 * invalid URIs.
5553 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5555 xmlFreeURI(uri);
5558 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 "Space required before 'NDATA'\n");
5562 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5563 SKIP(5);
5564 if (SKIP_BLANKS == 0) {
5565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5566 "Space required after 'NDATA'\n");
5568 ndata = xmlParseName(ctxt);
5569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5570 (ctxt->sax->unparsedEntityDecl != NULL))
5571 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5572 literal, URI, ndata);
5573 } else {
5574 if ((ctxt->sax != NULL) &&
5575 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5576 ctxt->sax->entityDecl(ctxt->userData, name,
5577 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5578 literal, URI, NULL);
5580 * For expat compatibility in SAX mode.
5581 * assuming the entity replacement was asked for
5583 if ((ctxt->replaceEntities != 0) &&
5584 ((ctxt->myDoc == NULL) ||
5585 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5586 if (ctxt->myDoc == NULL) {
5587 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588 if (ctxt->myDoc == NULL) {
5589 xmlErrMemory(ctxt, "New Doc failed");
5590 return;
5592 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5595 if (ctxt->myDoc->intSubset == NULL)
5596 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5597 BAD_CAST "fake", NULL, NULL);
5598 xmlSAX2EntityDecl(ctxt, name,
5599 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5600 literal, URI, NULL);
5605 if (ctxt->instate == XML_PARSER_EOF)
5606 goto done;
5607 SKIP_BLANKS;
5608 if (RAW != '>') {
5609 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5610 "xmlParseEntityDecl: entity %s not terminated\n", name);
5611 xmlHaltParser(ctxt);
5612 } else {
5613 if (inputid != ctxt->input->id) {
5614 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5615 "Entity declaration doesn't start and stop in"
5616 " the same entity\n");
5618 NEXT;
5620 if (orig != NULL) {
5622 * Ugly mechanism to save the raw entity value.
5624 xmlEntityPtr cur = NULL;
5626 if (isParameter) {
5627 if ((ctxt->sax != NULL) &&
5628 (ctxt->sax->getParameterEntity != NULL))
5629 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5630 } else {
5631 if ((ctxt->sax != NULL) &&
5632 (ctxt->sax->getEntity != NULL))
5633 cur = ctxt->sax->getEntity(ctxt->userData, name);
5634 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5635 cur = xmlSAX2GetEntity(ctxt, name);
5638 if ((cur != NULL) && (cur->orig == NULL)) {
5639 cur->orig = orig;
5640 orig = NULL;
5644 done:
5645 if (value != NULL) xmlFree(value);
5646 if (URI != NULL) xmlFree(URI);
5647 if (literal != NULL) xmlFree(literal);
5648 if (orig != NULL) xmlFree(orig);
5653 * xmlParseDefaultDecl:
5654 * @ctxt: an XML parser context
5655 * @value: Receive a possible fixed default value for the attribute
5657 * Parse an attribute default declaration
5659 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5661 * [ VC: Required Attribute ]
5662 * if the default declaration is the keyword #REQUIRED, then the
5663 * attribute must be specified for all elements of the type in the
5664 * attribute-list declaration.
5666 * [ VC: Attribute Default Legal ]
5667 * The declared default value must meet the lexical constraints of
5668 * the declared attribute type c.f. xmlValidateAttributeDecl()
5670 * [ VC: Fixed Attribute Default ]
5671 * if an attribute has a default value declared with the #FIXED
5672 * keyword, instances of that attribute must match the default value.
5674 * [ WFC: No < in Attribute Values ]
5675 * handled in xmlParseAttValue()
5677 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5678 * or XML_ATTRIBUTE_FIXED.
5682 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5683 int val;
5684 xmlChar *ret;
5686 *value = NULL;
5687 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5688 SKIP(9);
5689 return(XML_ATTRIBUTE_REQUIRED);
5691 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5692 SKIP(8);
5693 return(XML_ATTRIBUTE_IMPLIED);
5695 val = XML_ATTRIBUTE_NONE;
5696 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5697 SKIP(6);
5698 val = XML_ATTRIBUTE_FIXED;
5699 if (SKIP_BLANKS == 0) {
5700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701 "Space required after '#FIXED'\n");
5704 ret = xmlParseAttValue(ctxt);
5705 ctxt->instate = XML_PARSER_DTD;
5706 if (ret == NULL) {
5707 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5708 "Attribute default value declaration error\n");
5709 } else
5710 *value = ret;
5711 return(val);
5715 * xmlParseNotationType:
5716 * @ctxt: an XML parser context
5718 * parse an Notation attribute type.
5720 * Note: the leading 'NOTATION' S part has already being parsed...
5722 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5724 * [ VC: Notation Attributes ]
5725 * Values of this type must match one of the notation names included
5726 * in the declaration; all notation names in the declaration must be declared.
5728 * Returns: the notation attribute tree built while parsing
5731 xmlEnumerationPtr
5732 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5733 const xmlChar *name;
5734 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5736 if (RAW != '(') {
5737 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5738 return(NULL);
5740 SHRINK;
5741 do {
5742 NEXT;
5743 SKIP_BLANKS;
5744 name = xmlParseName(ctxt);
5745 if (name == NULL) {
5746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747 "Name expected in NOTATION declaration\n");
5748 xmlFreeEnumeration(ret);
5749 return(NULL);
5751 tmp = ret;
5752 while (tmp != NULL) {
5753 if (xmlStrEqual(name, tmp->name)) {
5754 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5755 "standalone: attribute notation value token %s duplicated\n",
5756 name, NULL);
5757 if (!xmlDictOwns(ctxt->dict, name))
5758 xmlFree((xmlChar *) name);
5759 break;
5761 tmp = tmp->next;
5763 if (tmp == NULL) {
5764 cur = xmlCreateEnumeration(name);
5765 if (cur == NULL) {
5766 xmlFreeEnumeration(ret);
5767 return(NULL);
5769 if (last == NULL) ret = last = cur;
5770 else {
5771 last->next = cur;
5772 last = cur;
5775 SKIP_BLANKS;
5776 } while (RAW == '|');
5777 if (RAW != ')') {
5778 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5779 xmlFreeEnumeration(ret);
5780 return(NULL);
5782 NEXT;
5783 return(ret);
5787 * xmlParseEnumerationType:
5788 * @ctxt: an XML parser context
5790 * parse an Enumeration attribute type.
5792 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5794 * [ VC: Enumeration ]
5795 * Values of this type must match one of the Nmtoken tokens in
5796 * the declaration
5798 * Returns: the enumeration attribute tree built while parsing
5801 xmlEnumerationPtr
5802 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5803 xmlChar *name;
5804 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5806 if (RAW != '(') {
5807 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5808 return(NULL);
5810 SHRINK;
5811 do {
5812 NEXT;
5813 SKIP_BLANKS;
5814 name = xmlParseNmtoken(ctxt);
5815 if (name == NULL) {
5816 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5817 return(ret);
5819 tmp = ret;
5820 while (tmp != NULL) {
5821 if (xmlStrEqual(name, tmp->name)) {
5822 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5823 "standalone: attribute enumeration value token %s duplicated\n",
5824 name, NULL);
5825 if (!xmlDictOwns(ctxt->dict, name))
5826 xmlFree(name);
5827 break;
5829 tmp = tmp->next;
5831 if (tmp == NULL) {
5832 cur = xmlCreateEnumeration(name);
5833 if (!xmlDictOwns(ctxt->dict, name))
5834 xmlFree(name);
5835 if (cur == NULL) {
5836 xmlFreeEnumeration(ret);
5837 return(NULL);
5839 if (last == NULL) ret = last = cur;
5840 else {
5841 last->next = cur;
5842 last = cur;
5845 SKIP_BLANKS;
5846 } while (RAW == '|');
5847 if (RAW != ')') {
5848 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5849 return(ret);
5851 NEXT;
5852 return(ret);
5856 * xmlParseEnumeratedType:
5857 * @ctxt: an XML parser context
5858 * @tree: the enumeration tree built while parsing
5860 * parse an Enumerated attribute type.
5862 * [57] EnumeratedType ::= NotationType | Enumeration
5864 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5867 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5871 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5872 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5873 SKIP(8);
5874 if (SKIP_BLANKS == 0) {
5875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5876 "Space required after 'NOTATION'\n");
5877 return(0);
5879 *tree = xmlParseNotationType(ctxt);
5880 if (*tree == NULL) return(0);
5881 return(XML_ATTRIBUTE_NOTATION);
5883 *tree = xmlParseEnumerationType(ctxt);
5884 if (*tree == NULL) return(0);
5885 return(XML_ATTRIBUTE_ENUMERATION);
5889 * xmlParseAttributeType:
5890 * @ctxt: an XML parser context
5891 * @tree: the enumeration tree built while parsing
5893 * parse the Attribute list def for an element
5895 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5897 * [55] StringType ::= 'CDATA'
5899 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5900 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5902 * Validity constraints for attribute values syntax are checked in
5903 * xmlValidateAttributeValue()
5905 * [ VC: ID ]
5906 * Values of type ID must match the Name production. A name must not
5907 * appear more than once in an XML document as a value of this type;
5908 * i.e., ID values must uniquely identify the elements which bear them.
5910 * [ VC: One ID per Element Type ]
5911 * No element type may have more than one ID attribute specified.
5913 * [ VC: ID Attribute Default ]
5914 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5916 * [ VC: IDREF ]
5917 * Values of type IDREF must match the Name production, and values
5918 * of type IDREFS must match Names; each IDREF Name must match the value
5919 * of an ID attribute on some element in the XML document; i.e. IDREF
5920 * values must match the value of some ID attribute.
5922 * [ VC: Entity Name ]
5923 * Values of type ENTITY must match the Name production, values
5924 * of type ENTITIES must match Names; each Entity Name must match the
5925 * name of an unparsed entity declared in the DTD.
5927 * [ VC: Name Token ]
5928 * Values of type NMTOKEN must match the Nmtoken production; values
5929 * of type NMTOKENS must match Nmtokens.
5931 * Returns the attribute type
5934 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5935 SHRINK;
5936 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5937 SKIP(5);
5938 return(XML_ATTRIBUTE_CDATA);
5939 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5940 SKIP(6);
5941 return(XML_ATTRIBUTE_IDREFS);
5942 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5943 SKIP(5);
5944 return(XML_ATTRIBUTE_IDREF);
5945 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5946 SKIP(2);
5947 return(XML_ATTRIBUTE_ID);
5948 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5949 SKIP(6);
5950 return(XML_ATTRIBUTE_ENTITY);
5951 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5952 SKIP(8);
5953 return(XML_ATTRIBUTE_ENTITIES);
5954 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5955 SKIP(8);
5956 return(XML_ATTRIBUTE_NMTOKENS);
5957 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5958 SKIP(7);
5959 return(XML_ATTRIBUTE_NMTOKEN);
5961 return(xmlParseEnumeratedType(ctxt, tree));
5965 * xmlParseAttributeListDecl:
5966 * @ctxt: an XML parser context
5968 * : parse the Attribute list def for an element
5970 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5972 * [53] AttDef ::= S Name S AttType S DefaultDecl
5975 void
5976 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5977 const xmlChar *elemName;
5978 const xmlChar *attrName;
5979 xmlEnumerationPtr tree;
5981 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5982 int inputid = ctxt->input->id;
5984 SKIP(9);
5985 if (SKIP_BLANKS == 0) {
5986 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5987 "Space required after '<!ATTLIST'\n");
5989 elemName = xmlParseName(ctxt);
5990 if (elemName == NULL) {
5991 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5992 "ATTLIST: no name for Element\n");
5993 return;
5995 SKIP_BLANKS;
5996 GROW;
5997 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5998 int type;
5999 int def;
6000 xmlChar *defaultValue = NULL;
6002 GROW;
6003 tree = NULL;
6004 attrName = xmlParseName(ctxt);
6005 if (attrName == NULL) {
6006 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6007 "ATTLIST: no name for Attribute\n");
6008 break;
6010 GROW;
6011 if (SKIP_BLANKS == 0) {
6012 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6013 "Space required after the attribute name\n");
6014 break;
6017 type = xmlParseAttributeType(ctxt, &tree);
6018 if (type <= 0) {
6019 break;
6022 GROW;
6023 if (SKIP_BLANKS == 0) {
6024 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6025 "Space required after the attribute type\n");
6026 if (tree != NULL)
6027 xmlFreeEnumeration(tree);
6028 break;
6031 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6032 if (def <= 0) {
6033 if (defaultValue != NULL)
6034 xmlFree(defaultValue);
6035 if (tree != NULL)
6036 xmlFreeEnumeration(tree);
6037 break;
6039 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6040 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6042 GROW;
6043 if (RAW != '>') {
6044 if (SKIP_BLANKS == 0) {
6045 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6046 "Space required after the attribute default value\n");
6047 if (defaultValue != NULL)
6048 xmlFree(defaultValue);
6049 if (tree != NULL)
6050 xmlFreeEnumeration(tree);
6051 break;
6054 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6055 (ctxt->sax->attributeDecl != NULL))
6056 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6057 type, def, defaultValue, tree);
6058 else if (tree != NULL)
6059 xmlFreeEnumeration(tree);
6061 if ((ctxt->sax2) && (defaultValue != NULL) &&
6062 (def != XML_ATTRIBUTE_IMPLIED) &&
6063 (def != XML_ATTRIBUTE_REQUIRED)) {
6064 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6066 if (ctxt->sax2) {
6067 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6069 if (defaultValue != NULL)
6070 xmlFree(defaultValue);
6071 GROW;
6073 if (RAW == '>') {
6074 if (inputid != ctxt->input->id) {
6075 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6076 "Attribute list declaration doesn't start and"
6077 " stop in the same entity\n");
6079 NEXT;
6085 * xmlParseElementMixedContentDecl:
6086 * @ctxt: an XML parser context
6087 * @inputchk: the input used for the current entity, needed for boundary checks
6089 * parse the declaration for a Mixed Element content
6090 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6092 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6093 * '(' S? '#PCDATA' S? ')'
6095 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6097 * [ VC: No Duplicate Types ]
6098 * The same name must not appear more than once in a single
6099 * mixed-content declaration.
6101 * returns: the list of the xmlElementContentPtr describing the element choices
6103 xmlElementContentPtr
6104 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6105 xmlElementContentPtr ret = NULL, cur = NULL, n;
6106 const xmlChar *elem = NULL;
6108 GROW;
6109 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6110 SKIP(7);
6111 SKIP_BLANKS;
6112 SHRINK;
6113 if (RAW == ')') {
6114 if (ctxt->input->id != inputchk) {
6115 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6116 "Element content declaration doesn't start and"
6117 " stop in the same entity\n");
6119 NEXT;
6120 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6121 if (ret == NULL)
6122 return(NULL);
6123 if (RAW == '*') {
6124 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6125 NEXT;
6127 return(ret);
6129 if ((RAW == '(') || (RAW == '|')) {
6130 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6131 if (ret == NULL) return(NULL);
6133 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6134 NEXT;
6135 if (elem == NULL) {
6136 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6137 if (ret == NULL) {
6138 xmlFreeDocElementContent(ctxt->myDoc, cur);
6139 return(NULL);
6141 ret->c1 = cur;
6142 if (cur != NULL)
6143 cur->parent = ret;
6144 cur = ret;
6145 } else {
6146 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6147 if (n == NULL) {
6148 xmlFreeDocElementContent(ctxt->myDoc, ret);
6149 return(NULL);
6151 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6152 if (n->c1 != NULL)
6153 n->c1->parent = n;
6154 cur->c2 = n;
6155 if (n != NULL)
6156 n->parent = cur;
6157 cur = n;
6159 SKIP_BLANKS;
6160 elem = xmlParseName(ctxt);
6161 if (elem == NULL) {
6162 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6163 "xmlParseElementMixedContentDecl : Name expected\n");
6164 xmlFreeDocElementContent(ctxt->myDoc, ret);
6165 return(NULL);
6167 SKIP_BLANKS;
6168 GROW;
6170 if ((RAW == ')') && (NXT(1) == '*')) {
6171 if (elem != NULL) {
6172 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6173 XML_ELEMENT_CONTENT_ELEMENT);
6174 if (cur->c2 != NULL)
6175 cur->c2->parent = cur;
6177 if (ret != NULL)
6178 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6179 if (ctxt->input->id != inputchk) {
6180 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6181 "Element content declaration doesn't start and"
6182 " stop in the same entity\n");
6184 SKIP(2);
6185 } else {
6186 xmlFreeDocElementContent(ctxt->myDoc, ret);
6187 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6188 return(NULL);
6191 } else {
6192 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6194 return(ret);
6198 * xmlParseElementChildrenContentDeclPriv:
6199 * @ctxt: an XML parser context
6200 * @inputchk: the input used for the current entity, needed for boundary checks
6201 * @depth: the level of recursion
6203 * parse the declaration for a Mixed Element content
6204 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6207 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6209 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6211 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6213 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6215 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6216 * TODO Parameter-entity replacement text must be properly nested
6217 * with parenthesized groups. That is to say, if either of the
6218 * opening or closing parentheses in a choice, seq, or Mixed
6219 * construct is contained in the replacement text for a parameter
6220 * entity, both must be contained in the same replacement text. For
6221 * interoperability, if a parameter-entity reference appears in a
6222 * choice, seq, or Mixed construct, its replacement text should not
6223 * be empty, and neither the first nor last non-blank character of
6224 * the replacement text should be a connector (| or ,).
6226 * Returns the tree of xmlElementContentPtr describing the element
6227 * hierarchy.
6229 static xmlElementContentPtr
6230 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6231 int depth) {
6232 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6233 const xmlChar *elem;
6234 xmlChar type = 0;
6236 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6237 (depth > 2048)) {
6238 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6239 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6240 depth);
6241 return(NULL);
6243 SKIP_BLANKS;
6244 GROW;
6245 if (RAW == '(') {
6246 int inputid = ctxt->input->id;
6248 /* Recurse on first child */
6249 NEXT;
6250 SKIP_BLANKS;
6251 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6252 depth + 1);
6253 if (cur == NULL)
6254 return(NULL);
6255 SKIP_BLANKS;
6256 GROW;
6257 } else {
6258 elem = xmlParseName(ctxt);
6259 if (elem == NULL) {
6260 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6261 return(NULL);
6263 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6264 if (cur == NULL) {
6265 xmlErrMemory(ctxt, NULL);
6266 return(NULL);
6268 GROW;
6269 if (RAW == '?') {
6270 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6271 NEXT;
6272 } else if (RAW == '*') {
6273 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6274 NEXT;
6275 } else if (RAW == '+') {
6276 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6277 NEXT;
6278 } else {
6279 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6281 GROW;
6283 SKIP_BLANKS;
6284 SHRINK;
6285 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6287 * Each loop we parse one separator and one element.
6289 if (RAW == ',') {
6290 if (type == 0) type = CUR;
6293 * Detect "Name | Name , Name" error
6295 else if (type != CUR) {
6296 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6297 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6298 type);
6299 if ((last != NULL) && (last != ret))
6300 xmlFreeDocElementContent(ctxt->myDoc, last);
6301 if (ret != NULL)
6302 xmlFreeDocElementContent(ctxt->myDoc, ret);
6303 return(NULL);
6305 NEXT;
6307 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6308 if (op == NULL) {
6309 if ((last != NULL) && (last != ret))
6310 xmlFreeDocElementContent(ctxt->myDoc, last);
6311 xmlFreeDocElementContent(ctxt->myDoc, ret);
6312 return(NULL);
6314 if (last == NULL) {
6315 op->c1 = ret;
6316 if (ret != NULL)
6317 ret->parent = op;
6318 ret = cur = op;
6319 } else {
6320 cur->c2 = op;
6321 if (op != NULL)
6322 op->parent = cur;
6323 op->c1 = last;
6324 if (last != NULL)
6325 last->parent = op;
6326 cur =op;
6327 last = NULL;
6329 } else if (RAW == '|') {
6330 if (type == 0) type = CUR;
6333 * Detect "Name , Name | Name" error
6335 else if (type != CUR) {
6336 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6337 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6338 type);
6339 if ((last != NULL) && (last != ret))
6340 xmlFreeDocElementContent(ctxt->myDoc, last);
6341 if (ret != NULL)
6342 xmlFreeDocElementContent(ctxt->myDoc, ret);
6343 return(NULL);
6345 NEXT;
6347 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6348 if (op == NULL) {
6349 if ((last != NULL) && (last != ret))
6350 xmlFreeDocElementContent(ctxt->myDoc, last);
6351 if (ret != NULL)
6352 xmlFreeDocElementContent(ctxt->myDoc, ret);
6353 return(NULL);
6355 if (last == NULL) {
6356 op->c1 = ret;
6357 if (ret != NULL)
6358 ret->parent = op;
6359 ret = cur = op;
6360 } else {
6361 cur->c2 = op;
6362 if (op != NULL)
6363 op->parent = cur;
6364 op->c1 = last;
6365 if (last != NULL)
6366 last->parent = op;
6367 cur =op;
6368 last = NULL;
6370 } else {
6371 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6372 if ((last != NULL) && (last != ret))
6373 xmlFreeDocElementContent(ctxt->myDoc, last);
6374 if (ret != NULL)
6375 xmlFreeDocElementContent(ctxt->myDoc, ret);
6376 return(NULL);
6378 GROW;
6379 SKIP_BLANKS;
6380 GROW;
6381 if (RAW == '(') {
6382 int inputid = ctxt->input->id;
6383 /* Recurse on second child */
6384 NEXT;
6385 SKIP_BLANKS;
6386 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6387 depth + 1);
6388 if (last == NULL) {
6389 if (ret != NULL)
6390 xmlFreeDocElementContent(ctxt->myDoc, ret);
6391 return(NULL);
6393 SKIP_BLANKS;
6394 } else {
6395 elem = xmlParseName(ctxt);
6396 if (elem == NULL) {
6397 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6398 if (ret != NULL)
6399 xmlFreeDocElementContent(ctxt->myDoc, ret);
6400 return(NULL);
6402 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6403 if (last == NULL) {
6404 if (ret != NULL)
6405 xmlFreeDocElementContent(ctxt->myDoc, ret);
6406 return(NULL);
6408 if (RAW == '?') {
6409 last->ocur = XML_ELEMENT_CONTENT_OPT;
6410 NEXT;
6411 } else if (RAW == '*') {
6412 last->ocur = XML_ELEMENT_CONTENT_MULT;
6413 NEXT;
6414 } else if (RAW == '+') {
6415 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6416 NEXT;
6417 } else {
6418 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6421 SKIP_BLANKS;
6422 GROW;
6424 if ((cur != NULL) && (last != NULL)) {
6425 cur->c2 = last;
6426 if (last != NULL)
6427 last->parent = cur;
6429 if (ctxt->input->id != inputchk) {
6430 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6431 "Element content declaration doesn't start and stop in"
6432 " the same entity\n");
6434 NEXT;
6435 if (RAW == '?') {
6436 if (ret != NULL) {
6437 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6438 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6439 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6440 else
6441 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6443 NEXT;
6444 } else if (RAW == '*') {
6445 if (ret != NULL) {
6446 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6447 cur = ret;
6449 * Some normalization:
6450 * (a | b* | c?)* == (a | b | c)*
6452 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6453 if ((cur->c1 != NULL) &&
6454 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6456 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6457 if ((cur->c2 != NULL) &&
6458 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6459 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6460 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6461 cur = cur->c2;
6464 NEXT;
6465 } else if (RAW == '+') {
6466 if (ret != NULL) {
6467 int found = 0;
6469 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6470 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6471 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6472 else
6473 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6475 * Some normalization:
6476 * (a | b*)+ == (a | b)*
6477 * (a | b?)+ == (a | b)*
6479 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6480 if ((cur->c1 != NULL) &&
6481 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6484 found = 1;
6486 if ((cur->c2 != NULL) &&
6487 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6488 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6489 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6490 found = 1;
6492 cur = cur->c2;
6494 if (found)
6495 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6497 NEXT;
6499 return(ret);
6503 * xmlParseElementChildrenContentDecl:
6504 * @ctxt: an XML parser context
6505 * @inputchk: the input used for the current entity, needed for boundary checks
6507 * parse the declaration for a Mixed Element content
6508 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6510 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6512 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6514 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6516 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6518 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6519 * TODO Parameter-entity replacement text must be properly nested
6520 * with parenthesized groups. That is to say, if either of the
6521 * opening or closing parentheses in a choice, seq, or Mixed
6522 * construct is contained in the replacement text for a parameter
6523 * entity, both must be contained in the same replacement text. For
6524 * interoperability, if a parameter-entity reference appears in a
6525 * choice, seq, or Mixed construct, its replacement text should not
6526 * be empty, and neither the first nor last non-blank character of
6527 * the replacement text should be a connector (| or ,).
6529 * Returns the tree of xmlElementContentPtr describing the element
6530 * hierarchy.
6532 xmlElementContentPtr
6533 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6534 /* stub left for API/ABI compat */
6535 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6539 * xmlParseElementContentDecl:
6540 * @ctxt: an XML parser context
6541 * @name: the name of the element being defined.
6542 * @result: the Element Content pointer will be stored here if any
6544 * parse the declaration for an Element content either Mixed or Children,
6545 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6547 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6549 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6553 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6554 xmlElementContentPtr *result) {
6556 xmlElementContentPtr tree = NULL;
6557 int inputid = ctxt->input->id;
6558 int res;
6560 *result = NULL;
6562 if (RAW != '(') {
6563 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6564 "xmlParseElementContentDecl : %s '(' expected\n", name);
6565 return(-1);
6567 NEXT;
6568 GROW;
6569 if (ctxt->instate == XML_PARSER_EOF)
6570 return(-1);
6571 SKIP_BLANKS;
6572 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6573 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6574 res = XML_ELEMENT_TYPE_MIXED;
6575 } else {
6576 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6577 res = XML_ELEMENT_TYPE_ELEMENT;
6579 SKIP_BLANKS;
6580 *result = tree;
6581 return(res);
6585 * xmlParseElementDecl:
6586 * @ctxt: an XML parser context
6588 * parse an Element declaration.
6590 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6592 * [ VC: Unique Element Type Declaration ]
6593 * No element type may be declared more than once
6595 * Returns the type of the element, or -1 in case of error
6598 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6599 const xmlChar *name;
6600 int ret = -1;
6601 xmlElementContentPtr content = NULL;
6603 /* GROW; done in the caller */
6604 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6605 int inputid = ctxt->input->id;
6607 SKIP(9);
6608 if (SKIP_BLANKS == 0) {
6609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6610 "Space required after 'ELEMENT'\n");
6611 return(-1);
6613 name = xmlParseName(ctxt);
6614 if (name == NULL) {
6615 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6616 "xmlParseElementDecl: no name for Element\n");
6617 return(-1);
6619 if (SKIP_BLANKS == 0) {
6620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6621 "Space required after the element name\n");
6623 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6624 SKIP(5);
6626 * Element must always be empty.
6628 ret = XML_ELEMENT_TYPE_EMPTY;
6629 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6630 (NXT(2) == 'Y')) {
6631 SKIP(3);
6633 * Element is a generic container.
6635 ret = XML_ELEMENT_TYPE_ANY;
6636 } else if (RAW == '(') {
6637 ret = xmlParseElementContentDecl(ctxt, name, &content);
6638 } else {
6640 * [ WFC: PEs in Internal Subset ] error handling.
6642 if ((RAW == '%') && (ctxt->external == 0) &&
6643 (ctxt->inputNr == 1)) {
6644 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6645 "PEReference: forbidden within markup decl in internal subset\n");
6646 } else {
6647 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6648 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6650 return(-1);
6653 SKIP_BLANKS;
6655 if (RAW != '>') {
6656 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6657 if (content != NULL) {
6658 xmlFreeDocElementContent(ctxt->myDoc, content);
6660 } else {
6661 if (inputid != ctxt->input->id) {
6662 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6663 "Element declaration doesn't start and stop in"
6664 " the same entity\n");
6667 NEXT;
6668 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6669 (ctxt->sax->elementDecl != NULL)) {
6670 if (content != NULL)
6671 content->parent = NULL;
6672 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6673 content);
6674 if ((content != NULL) && (content->parent == NULL)) {
6676 * this is a trick: if xmlAddElementDecl is called,
6677 * instead of copying the full tree it is plugged directly
6678 * if called from the parser. Avoid duplicating the
6679 * interfaces or change the API/ABI
6681 xmlFreeDocElementContent(ctxt->myDoc, content);
6683 } else if (content != NULL) {
6684 xmlFreeDocElementContent(ctxt->myDoc, content);
6688 return(ret);
6692 * xmlParseConditionalSections
6693 * @ctxt: an XML parser context
6695 * [61] conditionalSect ::= includeSect | ignoreSect
6696 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6697 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6698 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6699 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6702 static void
6703 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6704 int *inputIds = NULL;
6705 size_t inputIdsSize = 0;
6706 size_t depth = 0;
6708 while (ctxt->instate != XML_PARSER_EOF) {
6709 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6710 int id = ctxt->input->id;
6712 SKIP(3);
6713 SKIP_BLANKS;
6715 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6716 SKIP(7);
6717 SKIP_BLANKS;
6718 if (RAW != '[') {
6719 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6720 xmlHaltParser(ctxt);
6721 goto error;
6723 if (ctxt->input->id != id) {
6724 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6725 "All markup of the conditional section is"
6726 " not in the same entity\n");
6728 NEXT;
6730 if (inputIdsSize <= depth) {
6731 int *tmp;
6733 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6734 tmp = (int *) xmlRealloc(inputIds,
6735 inputIdsSize * sizeof(int));
6736 if (tmp == NULL) {
6737 xmlErrMemory(ctxt, NULL);
6738 goto error;
6740 inputIds = tmp;
6742 inputIds[depth] = id;
6743 depth++;
6744 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6745 int state;
6746 xmlParserInputState instate;
6747 size_t ignoreDepth = 0;
6749 SKIP(6);
6750 SKIP_BLANKS;
6751 if (RAW != '[') {
6752 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6753 xmlHaltParser(ctxt);
6754 goto error;
6756 if (ctxt->input->id != id) {
6757 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6758 "All markup of the conditional section is"
6759 " not in the same entity\n");
6761 NEXT;
6764 * Parse up to the end of the conditional section but disable
6765 * SAX event generating DTD building in the meantime
6767 state = ctxt->disableSAX;
6768 instate = ctxt->instate;
6769 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6770 ctxt->instate = XML_PARSER_IGNORE;
6772 while (RAW != 0) {
6773 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6774 SKIP(3);
6775 ignoreDepth++;
6776 /* Check for integer overflow */
6777 if (ignoreDepth == 0) {
6778 xmlErrMemory(ctxt, NULL);
6779 goto error;
6781 } else if ((RAW == ']') && (NXT(1) == ']') &&
6782 (NXT(2) == '>')) {
6783 if (ignoreDepth == 0)
6784 break;
6785 SKIP(3);
6786 ignoreDepth--;
6787 } else {
6788 NEXT;
6792 ctxt->disableSAX = state;
6793 ctxt->instate = instate;
6795 if (RAW == 0) {
6796 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6797 goto error;
6799 if (ctxt->input->id != id) {
6800 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801 "All markup of the conditional section is"
6802 " not in the same entity\n");
6804 SKIP(3);
6805 } else {
6806 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6807 xmlHaltParser(ctxt);
6808 goto error;
6810 } else if ((depth > 0) &&
6811 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6812 depth--;
6813 if (ctxt->input->id != inputIds[depth]) {
6814 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6815 "All markup of the conditional section is not"
6816 " in the same entity\n");
6818 SKIP(3);
6819 } else {
6820 int id = ctxt->input->id;
6821 unsigned long cons = CUR_CONSUMED;
6823 xmlParseMarkupDecl(ctxt);
6825 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6826 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6827 xmlHaltParser(ctxt);
6828 goto error;
6832 if (depth == 0)
6833 break;
6835 SKIP_BLANKS;
6836 GROW;
6839 error:
6840 xmlFree(inputIds);
6844 * xmlParseMarkupDecl:
6845 * @ctxt: an XML parser context
6847 * parse Markup declarations
6849 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6850 * NotationDecl | PI | Comment
6852 * [ VC: Proper Declaration/PE Nesting ]
6853 * Parameter-entity replacement text must be properly nested with
6854 * markup declarations. That is to say, if either the first character
6855 * or the last character of a markup declaration (markupdecl above) is
6856 * contained in the replacement text for a parameter-entity reference,
6857 * both must be contained in the same replacement text.
6859 * [ WFC: PEs in Internal Subset ]
6860 * In the internal DTD subset, parameter-entity references can occur
6861 * only where markup declarations can occur, not within markup declarations.
6862 * (This does not apply to references that occur in external parameter
6863 * entities or to the external subset.)
6865 void
6866 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6867 GROW;
6868 if (CUR == '<') {
6869 if (NXT(1) == '!') {
6870 switch (NXT(2)) {
6871 case 'E':
6872 if (NXT(3) == 'L')
6873 xmlParseElementDecl(ctxt);
6874 else if (NXT(3) == 'N')
6875 xmlParseEntityDecl(ctxt);
6876 break;
6877 case 'A':
6878 xmlParseAttributeListDecl(ctxt);
6879 break;
6880 case 'N':
6881 xmlParseNotationDecl(ctxt);
6882 break;
6883 case '-':
6884 xmlParseComment(ctxt);
6885 break;
6886 default:
6887 /* there is an error but it will be detected later */
6888 break;
6890 } else if (NXT(1) == '?') {
6891 xmlParsePI(ctxt);
6896 * detect requirement to exit there and act accordingly
6897 * and avoid having instate overridden later on
6899 if (ctxt->instate == XML_PARSER_EOF)
6900 return;
6902 ctxt->instate = XML_PARSER_DTD;
6906 * xmlParseTextDecl:
6907 * @ctxt: an XML parser context
6909 * parse an XML declaration header for external entities
6911 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6914 void
6915 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6916 xmlChar *version;
6917 const xmlChar *encoding;
6918 int oldstate;
6921 * We know that '<?xml' is here.
6923 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6924 SKIP(5);
6925 } else {
6926 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6927 return;
6930 /* Avoid expansion of parameter entities when skipping blanks. */
6931 oldstate = ctxt->instate;
6932 ctxt->instate = XML_PARSER_START;
6934 if (SKIP_BLANKS == 0) {
6935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6936 "Space needed after '<?xml'\n");
6940 * We may have the VersionInfo here.
6942 version = xmlParseVersionInfo(ctxt);
6943 if (version == NULL)
6944 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6945 else {
6946 if (SKIP_BLANKS == 0) {
6947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6948 "Space needed here\n");
6951 ctxt->input->version = version;
6954 * We must have the encoding declaration
6956 encoding = xmlParseEncodingDecl(ctxt);
6957 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6959 * The XML REC instructs us to stop parsing right here
6961 ctxt->instate = oldstate;
6962 return;
6964 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6965 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6966 "Missing encoding in text declaration\n");
6969 SKIP_BLANKS;
6970 if ((RAW == '?') && (NXT(1) == '>')) {
6971 SKIP(2);
6972 } else if (RAW == '>') {
6973 /* Deprecated old WD ... */
6974 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6975 NEXT;
6976 } else {
6977 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6978 MOVETO_ENDTAG(CUR_PTR);
6979 NEXT;
6982 ctxt->instate = oldstate;
6986 * xmlParseExternalSubset:
6987 * @ctxt: an XML parser context
6988 * @ExternalID: the external identifier
6989 * @SystemID: the system identifier (or URL)
6991 * parse Markup declarations from an external subset
6993 * [30] extSubset ::= textDecl? extSubsetDecl
6995 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6997 void
6998 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6999 const xmlChar *SystemID) {
7000 xmlDetectSAX2(ctxt);
7001 GROW;
7003 if ((ctxt->encoding == NULL) &&
7004 (ctxt->input->end - ctxt->input->cur >= 4)) {
7005 xmlChar start[4];
7006 xmlCharEncoding enc;
7008 start[0] = RAW;
7009 start[1] = NXT(1);
7010 start[2] = NXT(2);
7011 start[3] = NXT(3);
7012 enc = xmlDetectCharEncoding(start, 4);
7013 if (enc != XML_CHAR_ENCODING_NONE)
7014 xmlSwitchEncoding(ctxt, enc);
7017 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7018 xmlParseTextDecl(ctxt);
7019 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7021 * The XML REC instructs us to stop parsing right here
7023 xmlHaltParser(ctxt);
7024 return;
7027 if (ctxt->myDoc == NULL) {
7028 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7029 if (ctxt->myDoc == NULL) {
7030 xmlErrMemory(ctxt, "New Doc failed");
7031 return;
7033 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7035 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7036 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7038 ctxt->instate = XML_PARSER_DTD;
7039 ctxt->external = 1;
7040 SKIP_BLANKS;
7041 while (((RAW == '<') && (NXT(1) == '?')) ||
7042 ((RAW == '<') && (NXT(1) == '!')) ||
7043 (RAW == '%')) {
7044 int id = ctxt->input->id;
7045 unsigned long cons = CUR_CONSUMED;
7047 GROW;
7048 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7049 xmlParseConditionalSections(ctxt);
7050 } else
7051 xmlParseMarkupDecl(ctxt);
7052 SKIP_BLANKS;
7054 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7055 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7056 break;
7060 if (RAW != 0) {
7061 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7067 * xmlParseReference:
7068 * @ctxt: an XML parser context
7070 * parse and handle entity references in content, depending on the SAX
7071 * interface, this may end-up in a call to character() if this is a
7072 * CharRef, a predefined entity, if there is no reference() callback.
7073 * or if the parser was asked to switch to that mode.
7075 * [67] Reference ::= EntityRef | CharRef
7077 void
7078 xmlParseReference(xmlParserCtxtPtr ctxt) {
7079 xmlEntityPtr ent;
7080 xmlChar *val;
7081 int was_checked;
7082 xmlNodePtr list = NULL;
7083 xmlParserErrors ret = XML_ERR_OK;
7086 if (RAW != '&')
7087 return;
7090 * Simple case of a CharRef
7092 if (NXT(1) == '#') {
7093 int i = 0;
7094 xmlChar out[16];
7095 int hex = NXT(2);
7096 int value = xmlParseCharRef(ctxt);
7098 if (value == 0)
7099 return;
7100 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7102 * So we are using non-UTF-8 buffers
7103 * Check that the char fit on 8bits, if not
7104 * generate a CharRef.
7106 if (value <= 0xFF) {
7107 out[0] = value;
7108 out[1] = 0;
7109 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7110 (!ctxt->disableSAX))
7111 ctxt->sax->characters(ctxt->userData, out, 1);
7112 } else {
7113 if ((hex == 'x') || (hex == 'X'))
7114 snprintf((char *)out, sizeof(out), "#x%X", value);
7115 else
7116 snprintf((char *)out, sizeof(out), "#%d", value);
7117 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7118 (!ctxt->disableSAX))
7119 ctxt->sax->reference(ctxt->userData, out);
7121 } else {
7123 * Just encode the value in UTF-8
7125 COPY_BUF(0 ,out, i, value);
7126 out[i] = 0;
7127 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7128 (!ctxt->disableSAX))
7129 ctxt->sax->characters(ctxt->userData, out, i);
7131 return;
7135 * We are seeing an entity reference
7137 ent = xmlParseEntityRef(ctxt);
7138 if (ent == NULL) return;
7139 if (!ctxt->wellFormed)
7140 return;
7141 was_checked = ent->checked;
7143 /* special case of predefined entities */
7144 if ((ent->name == NULL) ||
7145 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7146 val = ent->content;
7147 if (val == NULL) return;
7149 * inline the entity.
7151 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7152 (!ctxt->disableSAX))
7153 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7154 return;
7158 * The first reference to the entity trigger a parsing phase
7159 * where the ent->children is filled with the result from
7160 * the parsing.
7161 * Note: external parsed entities will not be loaded, it is not
7162 * required for a non-validating parser, unless the parsing option
7163 * of validating, or substituting entities were given. Doing so is
7164 * far more secure as the parser will only process data coming from
7165 * the document entity by default.
7167 if (((ent->checked == 0) ||
7168 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7169 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7170 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7171 unsigned long oldnbent = ctxt->nbentities, diff;
7174 * This is a bit hackish but this seems the best
7175 * way to make sure both SAX and DOM entity support
7176 * behaves okay.
7178 void *user_data;
7179 if (ctxt->userData == ctxt)
7180 user_data = NULL;
7181 else
7182 user_data = ctxt->userData;
7185 * Check that this entity is well formed
7186 * 4.3.2: An internal general parsed entity is well-formed
7187 * if its replacement text matches the production labeled
7188 * content.
7190 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7191 ctxt->depth++;
7192 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7193 user_data, &list);
7194 ctxt->depth--;
7196 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7197 ctxt->depth++;
7198 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7199 user_data, ctxt->depth, ent->URI,
7200 ent->ExternalID, &list);
7201 ctxt->depth--;
7202 } else {
7203 ret = XML_ERR_ENTITY_PE_INTERNAL;
7204 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7205 "invalid entity type found\n", NULL);
7209 * Store the number of entities needing parsing for this entity
7210 * content and do checkings
7212 diff = ctxt->nbentities - oldnbent + 1;
7213 if (diff > INT_MAX / 2)
7214 diff = INT_MAX / 2;
7215 ent->checked = diff * 2;
7216 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7217 ent->checked |= 1;
7218 if (ret == XML_ERR_ENTITY_LOOP) {
7219 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7220 xmlHaltParser(ctxt);
7221 xmlFreeNodeList(list);
7222 return;
7224 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7225 xmlFreeNodeList(list);
7226 return;
7229 if ((ret == XML_ERR_OK) && (list != NULL)) {
7230 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7231 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7232 (ent->children == NULL)) {
7233 ent->children = list;
7235 * Prune it directly in the generated document
7236 * except for single text nodes.
7238 if ((ctxt->replaceEntities == 0) ||
7239 (ctxt->parseMode == XML_PARSE_READER) ||
7240 ((list->type == XML_TEXT_NODE) &&
7241 (list->next == NULL))) {
7242 ent->owner = 1;
7243 while (list != NULL) {
7244 list->parent = (xmlNodePtr) ent;
7245 if (list->doc != ent->doc)
7246 xmlSetTreeDoc(list, ent->doc);
7247 if (list->next == NULL)
7248 ent->last = list;
7249 list = list->next;
7251 list = NULL;
7252 } else {
7253 ent->owner = 0;
7254 while (list != NULL) {
7255 list->parent = (xmlNodePtr) ctxt->node;
7256 list->doc = ctxt->myDoc;
7257 if (list->next == NULL)
7258 ent->last = list;
7259 list = list->next;
7261 list = ent->children;
7262 #ifdef LIBXML_LEGACY_ENABLED
7263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7264 xmlAddEntityReference(ent, list, NULL);
7265 #endif /* LIBXML_LEGACY_ENABLED */
7267 } else {
7268 xmlFreeNodeList(list);
7269 list = NULL;
7271 } else if ((ret != XML_ERR_OK) &&
7272 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7273 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7274 "Entity '%s' failed to parse\n", ent->name);
7275 if (ent->content != NULL)
7276 ent->content[0] = 0;
7277 xmlParserEntityCheck(ctxt, 0, ent, 0);
7278 } else if (list != NULL) {
7279 xmlFreeNodeList(list);
7280 list = NULL;
7282 if (ent->checked == 0)
7283 ent->checked = 2;
7285 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7286 was_checked = 0;
7287 } else if (ent->checked != 1) {
7288 ctxt->nbentities += ent->checked / 2;
7292 * Now that the entity content has been gathered
7293 * provide it to the application, this can take different forms based
7294 * on the parsing modes.
7296 if (ent->children == NULL) {
7298 * Probably running in SAX mode and the callbacks don't
7299 * build the entity content. So unless we already went
7300 * though parsing for first checking go though the entity
7301 * content to generate callbacks associated to the entity
7303 if (was_checked != 0) {
7304 void *user_data;
7306 * This is a bit hackish but this seems the best
7307 * way to make sure both SAX and DOM entity support
7308 * behaves okay.
7310 if (ctxt->userData == ctxt)
7311 user_data = NULL;
7312 else
7313 user_data = ctxt->userData;
7315 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7316 ctxt->depth++;
7317 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7318 ent->content, user_data, NULL);
7319 ctxt->depth--;
7320 } else if (ent->etype ==
7321 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7322 ctxt->depth++;
7323 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7324 ctxt->sax, user_data, ctxt->depth,
7325 ent->URI, ent->ExternalID, NULL);
7326 ctxt->depth--;
7327 } else {
7328 ret = XML_ERR_ENTITY_PE_INTERNAL;
7329 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7330 "invalid entity type found\n", NULL);
7332 if (ret == XML_ERR_ENTITY_LOOP) {
7333 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7334 return;
7337 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7338 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7340 * Entity reference callback comes second, it's somewhat
7341 * superfluous but a compatibility to historical behaviour
7343 ctxt->sax->reference(ctxt->userData, ent->name);
7345 return;
7349 * If we didn't get any children for the entity being built
7351 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7352 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7354 * Create a node.
7356 ctxt->sax->reference(ctxt->userData, ent->name);
7357 return;
7360 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7362 * There is a problem on the handling of _private for entities
7363 * (bug 155816): Should we copy the content of the field from
7364 * the entity (possibly overwriting some value set by the user
7365 * when a copy is created), should we leave it alone, or should
7366 * we try to take care of different situations? The problem
7367 * is exacerbated by the usage of this field by the xmlReader.
7368 * To fix this bug, we look at _private on the created node
7369 * and, if it's NULL, we copy in whatever was in the entity.
7370 * If it's not NULL we leave it alone. This is somewhat of a
7371 * hack - maybe we should have further tests to determine
7372 * what to do.
7374 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7376 * Seems we are generating the DOM content, do
7377 * a simple tree copy for all references except the first
7378 * In the first occurrence list contains the replacement.
7380 if (((list == NULL) && (ent->owner == 0)) ||
7381 (ctxt->parseMode == XML_PARSE_READER)) {
7382 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7385 * We are copying here, make sure there is no abuse
7387 ctxt->sizeentcopy += ent->length + 5;
7388 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7389 return;
7392 * when operating on a reader, the entities definitions
7393 * are always owning the entities subtree.
7394 if (ctxt->parseMode == XML_PARSE_READER)
7395 ent->owner = 1;
7398 cur = ent->children;
7399 while (cur != NULL) {
7400 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7401 if (nw != NULL) {
7402 if (nw->_private == NULL)
7403 nw->_private = cur->_private;
7404 if (firstChild == NULL){
7405 firstChild = nw;
7407 nw = xmlAddChild(ctxt->node, nw);
7409 if (cur == ent->last) {
7411 * needed to detect some strange empty
7412 * node cases in the reader tests
7414 if ((ctxt->parseMode == XML_PARSE_READER) &&
7415 (nw != NULL) &&
7416 (nw->type == XML_ELEMENT_NODE) &&
7417 (nw->children == NULL))
7418 nw->extra = 1;
7420 break;
7422 cur = cur->next;
7424 #ifdef LIBXML_LEGACY_ENABLED
7425 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7426 xmlAddEntityReference(ent, firstChild, nw);
7427 #endif /* LIBXML_LEGACY_ENABLED */
7428 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7429 xmlNodePtr nw = NULL, cur, next, last,
7430 firstChild = NULL;
7433 * We are copying here, make sure there is no abuse
7435 ctxt->sizeentcopy += ent->length + 5;
7436 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7437 return;
7440 * Copy the entity child list and make it the new
7441 * entity child list. The goal is to make sure any
7442 * ID or REF referenced will be the one from the
7443 * document content and not the entity copy.
7445 cur = ent->children;
7446 ent->children = NULL;
7447 last = ent->last;
7448 ent->last = NULL;
7449 while (cur != NULL) {
7450 next = cur->next;
7451 cur->next = NULL;
7452 cur->parent = NULL;
7453 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7454 if (nw != NULL) {
7455 if (nw->_private == NULL)
7456 nw->_private = cur->_private;
7457 if (firstChild == NULL){
7458 firstChild = cur;
7460 xmlAddChild((xmlNodePtr) ent, nw);
7461 xmlAddChild(ctxt->node, cur);
7463 if (cur == last)
7464 break;
7465 cur = next;
7467 if (ent->owner == 0)
7468 ent->owner = 1;
7469 #ifdef LIBXML_LEGACY_ENABLED
7470 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7471 xmlAddEntityReference(ent, firstChild, nw);
7472 #endif /* LIBXML_LEGACY_ENABLED */
7473 } else {
7474 const xmlChar *nbktext;
7477 * the name change is to avoid coalescing of the
7478 * node with a possible previous text one which
7479 * would make ent->children a dangling pointer
7481 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7482 -1);
7483 if (ent->children->type == XML_TEXT_NODE)
7484 ent->children->name = nbktext;
7485 if ((ent->last != ent->children) &&
7486 (ent->last->type == XML_TEXT_NODE))
7487 ent->last->name = nbktext;
7488 xmlAddChildList(ctxt->node, ent->children);
7492 * This is to avoid a nasty side effect, see
7493 * characters() in SAX.c
7495 ctxt->nodemem = 0;
7496 ctxt->nodelen = 0;
7497 return;
7503 * xmlParseEntityRef:
7504 * @ctxt: an XML parser context
7506 * parse ENTITY references declarations
7508 * [68] EntityRef ::= '&' Name ';'
7510 * [ WFC: Entity Declared ]
7511 * In a document without any DTD, a document with only an internal DTD
7512 * subset which contains no parameter entity references, or a document
7513 * with "standalone='yes'", the Name given in the entity reference
7514 * must match that in an entity declaration, except that well-formed
7515 * documents need not declare any of the following entities: amp, lt,
7516 * gt, apos, quot. The declaration of a parameter entity must precede
7517 * any reference to it. Similarly, the declaration of a general entity
7518 * must precede any reference to it which appears in a default value in an
7519 * attribute-list declaration. Note that if entities are declared in the
7520 * external subset or in external parameter entities, a non-validating
7521 * processor is not obligated to read and process their declarations;
7522 * for such documents, the rule that an entity must be declared is a
7523 * well-formedness constraint only if standalone='yes'.
7525 * [ WFC: Parsed Entity ]
7526 * An entity reference must not contain the name of an unparsed entity
7528 * Returns the xmlEntityPtr if found, or NULL otherwise.
7530 xmlEntityPtr
7531 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7532 const xmlChar *name;
7533 xmlEntityPtr ent = NULL;
7535 GROW;
7536 if (ctxt->instate == XML_PARSER_EOF)
7537 return(NULL);
7539 if (RAW != '&')
7540 return(NULL);
7541 NEXT;
7542 name = xmlParseName(ctxt);
7543 if (name == NULL) {
7544 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7545 "xmlParseEntityRef: no name\n");
7546 return(NULL);
7548 if (RAW != ';') {
7549 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7550 return(NULL);
7552 NEXT;
7555 * Predefined entities override any extra definition
7557 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7558 ent = xmlGetPredefinedEntity(name);
7559 if (ent != NULL)
7560 return(ent);
7564 * Increase the number of entity references parsed
7566 ctxt->nbentities++;
7569 * Ask first SAX for entity resolution, otherwise try the
7570 * entities which may have stored in the parser context.
7572 if (ctxt->sax != NULL) {
7573 if (ctxt->sax->getEntity != NULL)
7574 ent = ctxt->sax->getEntity(ctxt->userData, name);
7575 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7576 (ctxt->options & XML_PARSE_OLDSAX))
7577 ent = xmlGetPredefinedEntity(name);
7578 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7579 (ctxt->userData==ctxt)) {
7580 ent = xmlSAX2GetEntity(ctxt, name);
7583 if (ctxt->instate == XML_PARSER_EOF)
7584 return(NULL);
7586 * [ WFC: Entity Declared ]
7587 * In a document without any DTD, a document with only an
7588 * internal DTD subset which contains no parameter entity
7589 * references, or a document with "standalone='yes'", the
7590 * Name given in the entity reference must match that in an
7591 * entity declaration, except that well-formed documents
7592 * need not declare any of the following entities: amp, lt,
7593 * gt, apos, quot.
7594 * The declaration of a parameter entity must precede any
7595 * reference to it.
7596 * Similarly, the declaration of a general entity must
7597 * precede any reference to it which appears in a default
7598 * value in an attribute-list declaration. Note that if
7599 * entities are declared in the external subset or in
7600 * external parameter entities, a non-validating processor
7601 * is not obligated to read and process their declarations;
7602 * for such documents, the rule that an entity must be
7603 * declared is a well-formedness constraint only if
7604 * standalone='yes'.
7606 if (ent == NULL) {
7607 if ((ctxt->standalone == 1) ||
7608 ((ctxt->hasExternalSubset == 0) &&
7609 (ctxt->hasPErefs == 0))) {
7610 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7611 "Entity '%s' not defined\n", name);
7612 } else {
7613 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7614 "Entity '%s' not defined\n", name);
7615 if ((ctxt->inSubset == 0) &&
7616 (ctxt->sax != NULL) &&
7617 (ctxt->sax->reference != NULL)) {
7618 ctxt->sax->reference(ctxt->userData, name);
7621 xmlParserEntityCheck(ctxt, 0, ent, 0);
7622 ctxt->valid = 0;
7626 * [ WFC: Parsed Entity ]
7627 * An entity reference must not contain the name of an
7628 * unparsed entity
7630 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7631 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7632 "Entity reference to unparsed entity %s\n", name);
7636 * [ WFC: No External Entity References ]
7637 * Attribute values cannot contain direct or indirect
7638 * entity references to external entities.
7640 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7641 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7642 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7643 "Attribute references external entity '%s'\n", name);
7646 * [ WFC: No < in Attribute Values ]
7647 * The replacement text of any entity referred to directly or
7648 * indirectly in an attribute value (other than "&lt;") must
7649 * not contain a <.
7651 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7652 (ent != NULL) &&
7653 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7654 if (((ent->checked & 1) || (ent->checked == 0)) &&
7655 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7656 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7657 "'<' in entity '%s' is not allowed in attributes values\n", name);
7662 * Internal check, no parameter entities here ...
7664 else {
7665 switch (ent->etype) {
7666 case XML_INTERNAL_PARAMETER_ENTITY:
7667 case XML_EXTERNAL_PARAMETER_ENTITY:
7668 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7669 "Attempt to reference the parameter entity '%s'\n",
7670 name);
7671 break;
7672 default:
7673 break;
7678 * [ WFC: No Recursion ]
7679 * A parsed entity must not contain a recursive reference
7680 * to itself, either directly or indirectly.
7681 * Done somewhere else
7683 return(ent);
7687 * xmlParseStringEntityRef:
7688 * @ctxt: an XML parser context
7689 * @str: a pointer to an index in the string
7691 * parse ENTITY references declarations, but this version parses it from
7692 * a string value.
7694 * [68] EntityRef ::= '&' Name ';'
7696 * [ WFC: Entity Declared ]
7697 * In a document without any DTD, a document with only an internal DTD
7698 * subset which contains no parameter entity references, or a document
7699 * with "standalone='yes'", the Name given in the entity reference
7700 * must match that in an entity declaration, except that well-formed
7701 * documents need not declare any of the following entities: amp, lt,
7702 * gt, apos, quot. The declaration of a parameter entity must precede
7703 * any reference to it. Similarly, the declaration of a general entity
7704 * must precede any reference to it which appears in a default value in an
7705 * attribute-list declaration. Note that if entities are declared in the
7706 * external subset or in external parameter entities, a non-validating
7707 * processor is not obligated to read and process their declarations;
7708 * for such documents, the rule that an entity must be declared is a
7709 * well-formedness constraint only if standalone='yes'.
7711 * [ WFC: Parsed Entity ]
7712 * An entity reference must not contain the name of an unparsed entity
7714 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7715 * is updated to the current location in the string.
7717 static xmlEntityPtr
7718 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7719 xmlChar *name;
7720 const xmlChar *ptr;
7721 xmlChar cur;
7722 xmlEntityPtr ent = NULL;
7724 if ((str == NULL) || (*str == NULL))
7725 return(NULL);
7726 ptr = *str;
7727 cur = *ptr;
7728 if (cur != '&')
7729 return(NULL);
7731 ptr++;
7732 name = xmlParseStringName(ctxt, &ptr);
7733 if (name == NULL) {
7734 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7735 "xmlParseStringEntityRef: no name\n");
7736 *str = ptr;
7737 return(NULL);
7739 if (*ptr != ';') {
7740 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7741 xmlFree(name);
7742 *str = ptr;
7743 return(NULL);
7745 ptr++;
7749 * Predefined entities override any extra definition
7751 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7752 ent = xmlGetPredefinedEntity(name);
7753 if (ent != NULL) {
7754 xmlFree(name);
7755 *str = ptr;
7756 return(ent);
7761 * Increase the number of entity references parsed
7763 ctxt->nbentities++;
7766 * Ask first SAX for entity resolution, otherwise try the
7767 * entities which may have stored in the parser context.
7769 if (ctxt->sax != NULL) {
7770 if (ctxt->sax->getEntity != NULL)
7771 ent = ctxt->sax->getEntity(ctxt->userData, name);
7772 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7773 ent = xmlGetPredefinedEntity(name);
7774 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7775 ent = xmlSAX2GetEntity(ctxt, name);
7778 if (ctxt->instate == XML_PARSER_EOF) {
7779 xmlFree(name);
7780 return(NULL);
7784 * [ WFC: Entity Declared ]
7785 * In a document without any DTD, a document with only an
7786 * internal DTD subset which contains no parameter entity
7787 * references, or a document with "standalone='yes'", the
7788 * Name given in the entity reference must match that in an
7789 * entity declaration, except that well-formed documents
7790 * need not declare any of the following entities: amp, lt,
7791 * gt, apos, quot.
7792 * The declaration of a parameter entity must precede any
7793 * reference to it.
7794 * Similarly, the declaration of a general entity must
7795 * precede any reference to it which appears in a default
7796 * value in an attribute-list declaration. Note that if
7797 * entities are declared in the external subset or in
7798 * external parameter entities, a non-validating processor
7799 * is not obligated to read and process their declarations;
7800 * for such documents, the rule that an entity must be
7801 * declared is a well-formedness constraint only if
7802 * standalone='yes'.
7804 if (ent == NULL) {
7805 if ((ctxt->standalone == 1) ||
7806 ((ctxt->hasExternalSubset == 0) &&
7807 (ctxt->hasPErefs == 0))) {
7808 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7809 "Entity '%s' not defined\n", name);
7810 } else {
7811 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7812 "Entity '%s' not defined\n",
7813 name);
7815 xmlParserEntityCheck(ctxt, 0, ent, 0);
7816 /* TODO ? check regressions ctxt->valid = 0; */
7820 * [ WFC: Parsed Entity ]
7821 * An entity reference must not contain the name of an
7822 * unparsed entity
7824 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7825 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7826 "Entity reference to unparsed entity %s\n", name);
7830 * [ WFC: No External Entity References ]
7831 * Attribute values cannot contain direct or indirect
7832 * entity references to external entities.
7834 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7835 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7836 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7837 "Attribute references external entity '%s'\n", name);
7840 * [ WFC: No < in Attribute Values ]
7841 * The replacement text of any entity referred to directly or
7842 * indirectly in an attribute value (other than "&lt;") must
7843 * not contain a <.
7845 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7846 (ent != NULL) && (ent->content != NULL) &&
7847 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7848 (xmlStrchr(ent->content, '<'))) {
7849 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7850 "'<' in entity '%s' is not allowed in attributes values\n",
7851 name);
7855 * Internal check, no parameter entities here ...
7857 else {
7858 switch (ent->etype) {
7859 case XML_INTERNAL_PARAMETER_ENTITY:
7860 case XML_EXTERNAL_PARAMETER_ENTITY:
7861 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7862 "Attempt to reference the parameter entity '%s'\n",
7863 name);
7864 break;
7865 default:
7866 break;
7871 * [ WFC: No Recursion ]
7872 * A parsed entity must not contain a recursive reference
7873 * to itself, either directly or indirectly.
7874 * Done somewhere else
7877 xmlFree(name);
7878 *str = ptr;
7879 return(ent);
7883 * xmlParsePEReference:
7884 * @ctxt: an XML parser context
7886 * parse PEReference declarations
7887 * The entity content is handled directly by pushing it's content as
7888 * a new input stream.
7890 * [69] PEReference ::= '%' Name ';'
7892 * [ WFC: No Recursion ]
7893 * A parsed entity must not contain a recursive
7894 * reference to itself, either directly or indirectly.
7896 * [ WFC: Entity Declared ]
7897 * In a document without any DTD, a document with only an internal DTD
7898 * subset which contains no parameter entity references, or a document
7899 * with "standalone='yes'", ... ... The declaration of a parameter
7900 * entity must precede any reference to it...
7902 * [ VC: Entity Declared ]
7903 * In a document with an external subset or external parameter entities
7904 * with "standalone='no'", ... ... The declaration of a parameter entity
7905 * must precede any reference to it...
7907 * [ WFC: In DTD ]
7908 * Parameter-entity references may only appear in the DTD.
7909 * NOTE: misleading but this is handled.
7911 void
7912 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7914 const xmlChar *name;
7915 xmlEntityPtr entity = NULL;
7916 xmlParserInputPtr input;
7918 if (RAW != '%')
7919 return;
7920 NEXT;
7921 name = xmlParseName(ctxt);
7922 if (name == NULL) {
7923 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7924 return;
7926 if (xmlParserDebugEntities)
7927 xmlGenericError(xmlGenericErrorContext,
7928 "PEReference: %s\n", name);
7929 if (RAW != ';') {
7930 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7931 return;
7934 NEXT;
7937 * Increase the number of entity references parsed
7939 ctxt->nbentities++;
7942 * Request the entity from SAX
7944 if ((ctxt->sax != NULL) &&
7945 (ctxt->sax->getParameterEntity != NULL))
7946 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7947 if (ctxt->instate == XML_PARSER_EOF)
7948 return;
7949 if (entity == NULL) {
7951 * [ WFC: Entity Declared ]
7952 * In a document without any DTD, a document with only an
7953 * internal DTD subset which contains no parameter entity
7954 * references, or a document with "standalone='yes'", ...
7955 * ... The declaration of a parameter entity must precede
7956 * any reference to it...
7958 if ((ctxt->standalone == 1) ||
7959 ((ctxt->hasExternalSubset == 0) &&
7960 (ctxt->hasPErefs == 0))) {
7961 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7962 "PEReference: %%%s; not found\n",
7963 name);
7964 } else {
7966 * [ VC: Entity Declared ]
7967 * In a document with an external subset or external
7968 * parameter entities with "standalone='no'", ...
7969 * ... The declaration of a parameter entity must
7970 * precede any reference to it...
7972 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7973 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7974 "PEReference: %%%s; not found\n",
7975 name, NULL);
7976 } else
7977 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7978 "PEReference: %%%s; not found\n",
7979 name, NULL);
7980 ctxt->valid = 0;
7982 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7983 } else {
7985 * Internal checking in case the entity quest barfed
7987 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7988 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7989 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7990 "Internal: %%%s; is not a parameter entity\n",
7991 name, NULL);
7992 } else {
7993 xmlChar start[4];
7994 xmlCharEncoding enc;
7996 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7997 return;
7999 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8000 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8001 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8002 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8003 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8004 (ctxt->replaceEntities == 0) &&
8005 (ctxt->validate == 0))
8006 return;
8008 input = xmlNewEntityInputStream(ctxt, entity);
8009 if (xmlPushInput(ctxt, input) < 0) {
8010 xmlFreeInputStream(input);
8011 return;
8014 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8016 * Get the 4 first bytes and decode the charset
8017 * if enc != XML_CHAR_ENCODING_NONE
8018 * plug some encoding conversion routines.
8019 * Note that, since we may have some non-UTF8
8020 * encoding (like UTF16, bug 135229), the 'length'
8021 * is not known, but we can calculate based upon
8022 * the amount of data in the buffer.
8024 GROW
8025 if (ctxt->instate == XML_PARSER_EOF)
8026 return;
8027 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8028 start[0] = RAW;
8029 start[1] = NXT(1);
8030 start[2] = NXT(2);
8031 start[3] = NXT(3);
8032 enc = xmlDetectCharEncoding(start, 4);
8033 if (enc != XML_CHAR_ENCODING_NONE) {
8034 xmlSwitchEncoding(ctxt, enc);
8038 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8039 (IS_BLANK_CH(NXT(5)))) {
8040 xmlParseTextDecl(ctxt);
8045 ctxt->hasPErefs = 1;
8049 * xmlLoadEntityContent:
8050 * @ctxt: an XML parser context
8051 * @entity: an unloaded system entity
8053 * Load the original content of the given system entity from the
8054 * ExternalID/SystemID given. This is to be used for Included in Literal
8055 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8057 * Returns 0 in case of success and -1 in case of failure
8059 static int
8060 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8061 xmlParserInputPtr input;
8062 xmlBufferPtr buf;
8063 int l, c;
8064 int count = 0;
8066 if ((ctxt == NULL) || (entity == NULL) ||
8067 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8068 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8069 (entity->content != NULL)) {
8070 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8071 "xmlLoadEntityContent parameter error");
8072 return(-1);
8075 if (xmlParserDebugEntities)
8076 xmlGenericError(xmlGenericErrorContext,
8077 "Reading %s entity content input\n", entity->name);
8079 buf = xmlBufferCreate();
8080 if (buf == NULL) {
8081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8082 "xmlLoadEntityContent parameter error");
8083 return(-1);
8085 xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8087 input = xmlNewEntityInputStream(ctxt, entity);
8088 if (input == NULL) {
8089 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8090 "xmlLoadEntityContent input error");
8091 xmlBufferFree(buf);
8092 return(-1);
8096 * Push the entity as the current input, read char by char
8097 * saving to the buffer until the end of the entity or an error
8099 if (xmlPushInput(ctxt, input) < 0) {
8100 xmlBufferFree(buf);
8101 xmlFreeInputStream(input);
8102 return(-1);
8105 GROW;
8106 c = CUR_CHAR(l);
8107 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8108 (IS_CHAR(c))) {
8109 xmlBufferAdd(buf, ctxt->input->cur, l);
8110 if (count++ > XML_PARSER_CHUNK_SIZE) {
8111 count = 0;
8112 GROW;
8113 if (ctxt->instate == XML_PARSER_EOF) {
8114 xmlBufferFree(buf);
8115 return(-1);
8118 NEXTL(l);
8119 c = CUR_CHAR(l);
8120 if (c == 0) {
8121 count = 0;
8122 GROW;
8123 if (ctxt->instate == XML_PARSER_EOF) {
8124 xmlBufferFree(buf);
8125 return(-1);
8127 c = CUR_CHAR(l);
8131 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8132 xmlPopInput(ctxt);
8133 } else if (!IS_CHAR(c)) {
8134 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8135 "xmlLoadEntityContent: invalid char value %d\n",
8137 xmlBufferFree(buf);
8138 return(-1);
8140 entity->content = buf->content;
8141 buf->content = NULL;
8142 xmlBufferFree(buf);
8144 return(0);
8148 * xmlParseStringPEReference:
8149 * @ctxt: an XML parser context
8150 * @str: a pointer to an index in the string
8152 * parse PEReference declarations
8154 * [69] PEReference ::= '%' Name ';'
8156 * [ WFC: No Recursion ]
8157 * A parsed entity must not contain a recursive
8158 * reference to itself, either directly or indirectly.
8160 * [ WFC: Entity Declared ]
8161 * In a document without any DTD, a document with only an internal DTD
8162 * subset which contains no parameter entity references, or a document
8163 * with "standalone='yes'", ... ... The declaration of a parameter
8164 * entity must precede any reference to it...
8166 * [ VC: Entity Declared ]
8167 * In a document with an external subset or external parameter entities
8168 * with "standalone='no'", ... ... The declaration of a parameter entity
8169 * must precede any reference to it...
8171 * [ WFC: In DTD ]
8172 * Parameter-entity references may only appear in the DTD.
8173 * NOTE: misleading but this is handled.
8175 * Returns the string of the entity content.
8176 * str is updated to the current value of the index
8178 static xmlEntityPtr
8179 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8180 const xmlChar *ptr;
8181 xmlChar cur;
8182 xmlChar *name;
8183 xmlEntityPtr entity = NULL;
8185 if ((str == NULL) || (*str == NULL)) return(NULL);
8186 ptr = *str;
8187 cur = *ptr;
8188 if (cur != '%')
8189 return(NULL);
8190 ptr++;
8191 name = xmlParseStringName(ctxt, &ptr);
8192 if (name == NULL) {
8193 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8194 "xmlParseStringPEReference: no name\n");
8195 *str = ptr;
8196 return(NULL);
8198 cur = *ptr;
8199 if (cur != ';') {
8200 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8201 xmlFree(name);
8202 *str = ptr;
8203 return(NULL);
8205 ptr++;
8208 * Increase the number of entity references parsed
8210 ctxt->nbentities++;
8213 * Request the entity from SAX
8215 if ((ctxt->sax != NULL) &&
8216 (ctxt->sax->getParameterEntity != NULL))
8217 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8218 if (ctxt->instate == XML_PARSER_EOF) {
8219 xmlFree(name);
8220 *str = ptr;
8221 return(NULL);
8223 if (entity == NULL) {
8225 * [ WFC: Entity Declared ]
8226 * In a document without any DTD, a document with only an
8227 * internal DTD subset which contains no parameter entity
8228 * references, or a document with "standalone='yes'", ...
8229 * ... The declaration of a parameter entity must precede
8230 * any reference to it...
8232 if ((ctxt->standalone == 1) ||
8233 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8234 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8235 "PEReference: %%%s; not found\n", name);
8236 } else {
8238 * [ VC: Entity Declared ]
8239 * In a document with an external subset or external
8240 * parameter entities with "standalone='no'", ...
8241 * ... The declaration of a parameter entity must
8242 * precede any reference to it...
8244 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8245 "PEReference: %%%s; not found\n",
8246 name, NULL);
8247 ctxt->valid = 0;
8249 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8250 } else {
8252 * Internal checking in case the entity quest barfed
8254 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8255 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8256 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8257 "%%%s; is not a parameter entity\n",
8258 name, NULL);
8261 ctxt->hasPErefs = 1;
8262 xmlFree(name);
8263 *str = ptr;
8264 return(entity);
8268 * xmlParseDocTypeDecl:
8269 * @ctxt: an XML parser context
8271 * parse a DOCTYPE declaration
8273 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8274 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8276 * [ VC: Root Element Type ]
8277 * The Name in the document type declaration must match the element
8278 * type of the root element.
8281 void
8282 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8283 const xmlChar *name = NULL;
8284 xmlChar *ExternalID = NULL;
8285 xmlChar *URI = NULL;
8288 * We know that '<!DOCTYPE' has been detected.
8290 SKIP(9);
8292 SKIP_BLANKS;
8295 * Parse the DOCTYPE name.
8297 name = xmlParseName(ctxt);
8298 if (name == NULL) {
8299 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8300 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8302 ctxt->intSubName = name;
8304 SKIP_BLANKS;
8307 * Check for SystemID and ExternalID
8309 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8311 if ((URI != NULL) || (ExternalID != NULL)) {
8312 ctxt->hasExternalSubset = 1;
8314 ctxt->extSubURI = URI;
8315 ctxt->extSubSystem = ExternalID;
8317 SKIP_BLANKS;
8320 * Create and update the internal subset.
8322 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8323 (!ctxt->disableSAX))
8324 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8325 if (ctxt->instate == XML_PARSER_EOF)
8326 return;
8329 * Is there any internal subset declarations ?
8330 * they are handled separately in xmlParseInternalSubset()
8332 if (RAW == '[')
8333 return;
8336 * We should be at the end of the DOCTYPE declaration.
8338 if (RAW != '>') {
8339 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8341 NEXT;
8345 * xmlParseInternalSubset:
8346 * @ctxt: an XML parser context
8348 * parse the internal subset declaration
8350 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8353 static void
8354 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8356 * Is there any DTD definition ?
8358 if (RAW == '[') {
8359 int baseInputNr = ctxt->inputNr;
8360 ctxt->instate = XML_PARSER_DTD;
8361 NEXT;
8363 * Parse the succession of Markup declarations and
8364 * PEReferences.
8365 * Subsequence (markupdecl | PEReference | S)*
8367 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8368 (ctxt->instate != XML_PARSER_EOF)) {
8369 int id = ctxt->input->id;
8370 unsigned long cons = CUR_CONSUMED;
8372 SKIP_BLANKS;
8373 xmlParseMarkupDecl(ctxt);
8374 xmlParsePEReference(ctxt);
8377 * Conditional sections are allowed from external entities included
8378 * by PE References in the internal subset.
8380 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8381 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8382 xmlParseConditionalSections(ctxt);
8385 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8386 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8387 "xmlParseInternalSubset: error detected in Markup declaration\n");
8388 if (ctxt->inputNr > baseInputNr)
8389 xmlPopInput(ctxt);
8390 else
8391 break;
8394 if (RAW == ']') {
8395 NEXT;
8396 SKIP_BLANKS;
8401 * We should be at the end of the DOCTYPE declaration.
8403 if (RAW != '>') {
8404 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8405 return;
8407 NEXT;
8410 #ifdef LIBXML_SAX1_ENABLED
8412 * xmlParseAttribute:
8413 * @ctxt: an XML parser context
8414 * @value: a xmlChar ** used to store the value of the attribute
8416 * parse an attribute
8418 * [41] Attribute ::= Name Eq AttValue
8420 * [ WFC: No External Entity References ]
8421 * Attribute values cannot contain direct or indirect entity references
8422 * to external entities.
8424 * [ WFC: No < in Attribute Values ]
8425 * The replacement text of any entity referred to directly or indirectly in
8426 * an attribute value (other than "&lt;") must not contain a <.
8428 * [ VC: Attribute Value Type ]
8429 * The attribute must have been declared; the value must be of the type
8430 * declared for it.
8432 * [25] Eq ::= S? '=' S?
8434 * With namespace:
8436 * [NS 11] Attribute ::= QName Eq AttValue
8438 * Also the case QName == xmlns:??? is handled independently as a namespace
8439 * definition.
8441 * Returns the attribute name, and the value in *value.
8444 const xmlChar *
8445 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8446 const xmlChar *name;
8447 xmlChar *val;
8449 *value = NULL;
8450 GROW;
8451 name = xmlParseName(ctxt);
8452 if (name == NULL) {
8453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8454 "error parsing attribute name\n");
8455 return(NULL);
8459 * read the value
8461 SKIP_BLANKS;
8462 if (RAW == '=') {
8463 NEXT;
8464 SKIP_BLANKS;
8465 val = xmlParseAttValue(ctxt);
8466 ctxt->instate = XML_PARSER_CONTENT;
8467 } else {
8468 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8469 "Specification mandates value for attribute %s\n", name);
8470 return(NULL);
8474 * Check that xml:lang conforms to the specification
8475 * No more registered as an error, just generate a warning now
8476 * since this was deprecated in XML second edition
8478 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8479 if (!xmlCheckLanguageID(val)) {
8480 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8481 "Malformed value for xml:lang : %s\n",
8482 val, NULL);
8487 * Check that xml:space conforms to the specification
8489 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8490 if (xmlStrEqual(val, BAD_CAST "default"))
8491 *(ctxt->space) = 0;
8492 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8493 *(ctxt->space) = 1;
8494 else {
8495 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8496 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8497 val, NULL);
8501 *value = val;
8502 return(name);
8506 * xmlParseStartTag:
8507 * @ctxt: an XML parser context
8509 * parse a start of tag either for rule element or
8510 * EmptyElement. In both case we don't parse the tag closing chars.
8512 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8514 * [ WFC: Unique Att Spec ]
8515 * No attribute name may appear more than once in the same start-tag or
8516 * empty-element tag.
8518 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8520 * [ WFC: Unique Att Spec ]
8521 * No attribute name may appear more than once in the same start-tag or
8522 * empty-element tag.
8524 * With namespace:
8526 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8528 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8530 * Returns the element name parsed
8533 const xmlChar *
8534 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8535 const xmlChar *name;
8536 const xmlChar *attname;
8537 xmlChar *attvalue;
8538 const xmlChar **atts = ctxt->atts;
8539 int nbatts = 0;
8540 int maxatts = ctxt->maxatts;
8541 int i;
8543 if (RAW != '<') return(NULL);
8544 NEXT1;
8546 name = xmlParseName(ctxt);
8547 if (name == NULL) {
8548 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8549 "xmlParseStartTag: invalid element name\n");
8550 return(NULL);
8554 * Now parse the attributes, it ends up with the ending
8556 * (S Attribute)* S?
8558 SKIP_BLANKS;
8559 GROW;
8561 while (((RAW != '>') &&
8562 ((RAW != '/') || (NXT(1) != '>')) &&
8563 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8564 int id = ctxt->input->id;
8565 unsigned long cons = CUR_CONSUMED;
8567 attname = xmlParseAttribute(ctxt, &attvalue);
8568 if ((attname != NULL) && (attvalue != NULL)) {
8570 * [ WFC: Unique Att Spec ]
8571 * No attribute name may appear more than once in the same
8572 * start-tag or empty-element tag.
8574 for (i = 0; i < nbatts;i += 2) {
8575 if (xmlStrEqual(atts[i], attname)) {
8576 xmlErrAttributeDup(ctxt, NULL, attname);
8577 xmlFree(attvalue);
8578 goto failed;
8582 * Add the pair to atts
8584 if (atts == NULL) {
8585 maxatts = 22; /* allow for 10 attrs by default */
8586 atts = (const xmlChar **)
8587 xmlMalloc(maxatts * sizeof(xmlChar *));
8588 if (atts == NULL) {
8589 xmlErrMemory(ctxt, NULL);
8590 if (attvalue != NULL)
8591 xmlFree(attvalue);
8592 goto failed;
8594 ctxt->atts = atts;
8595 ctxt->maxatts = maxatts;
8596 } else if (nbatts + 4 > maxatts) {
8597 const xmlChar **n;
8599 maxatts *= 2;
8600 n = (const xmlChar **) xmlRealloc((void *) atts,
8601 maxatts * sizeof(const xmlChar *));
8602 if (n == NULL) {
8603 xmlErrMemory(ctxt, NULL);
8604 if (attvalue != NULL)
8605 xmlFree(attvalue);
8606 goto failed;
8608 atts = n;
8609 ctxt->atts = atts;
8610 ctxt->maxatts = maxatts;
8612 atts[nbatts++] = attname;
8613 atts[nbatts++] = attvalue;
8614 atts[nbatts] = NULL;
8615 atts[nbatts + 1] = NULL;
8616 } else {
8617 if (attvalue != NULL)
8618 xmlFree(attvalue);
8621 failed:
8623 GROW
8624 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8625 break;
8626 if (SKIP_BLANKS == 0) {
8627 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8628 "attributes construct error\n");
8630 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8631 (attname == NULL) && (attvalue == NULL)) {
8632 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8633 "xmlParseStartTag: problem parsing attributes\n");
8634 break;
8636 SHRINK;
8637 GROW;
8641 * SAX: Start of Element !
8643 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8644 (!ctxt->disableSAX)) {
8645 if (nbatts > 0)
8646 ctxt->sax->startElement(ctxt->userData, name, atts);
8647 else
8648 ctxt->sax->startElement(ctxt->userData, name, NULL);
8651 if (atts != NULL) {
8652 /* Free only the content strings */
8653 for (i = 1;i < nbatts;i+=2)
8654 if (atts[i] != NULL)
8655 xmlFree((xmlChar *) atts[i]);
8657 return(name);
8661 * xmlParseEndTag1:
8662 * @ctxt: an XML parser context
8663 * @line: line of the start tag
8664 * @nsNr: number of namespaces on the start tag
8666 * parse an end of tag
8668 * [42] ETag ::= '</' Name S? '>'
8670 * With namespace
8672 * [NS 9] ETag ::= '</' QName S? '>'
8675 static void
8676 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8677 const xmlChar *name;
8679 GROW;
8680 if ((RAW != '<') || (NXT(1) != '/')) {
8681 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8682 "xmlParseEndTag: '</' not found\n");
8683 return;
8685 SKIP(2);
8687 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8690 * We should definitely be at the ending "S? '>'" part
8692 GROW;
8693 SKIP_BLANKS;
8694 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8695 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8696 } else
8697 NEXT1;
8700 * [ WFC: Element Type Match ]
8701 * The Name in an element's end-tag must match the element type in the
8702 * start-tag.
8705 if (name != (xmlChar*)1) {
8706 if (name == NULL) name = BAD_CAST "unparsable";
8707 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8708 "Opening and ending tag mismatch: %s line %d and %s\n",
8709 ctxt->name, line, name);
8713 * SAX: End of Tag
8715 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8716 (!ctxt->disableSAX))
8717 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8719 namePop(ctxt);
8720 spacePop(ctxt);
8721 return;
8725 * xmlParseEndTag:
8726 * @ctxt: an XML parser context
8728 * parse an end of tag
8730 * [42] ETag ::= '</' Name S? '>'
8732 * With namespace
8734 * [NS 9] ETag ::= '</' QName S? '>'
8737 void
8738 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8739 xmlParseEndTag1(ctxt, 0);
8741 #endif /* LIBXML_SAX1_ENABLED */
8743 /************************************************************************
8745 * SAX 2 specific operations *
8747 ************************************************************************/
8750 * xmlGetNamespace:
8751 * @ctxt: an XML parser context
8752 * @prefix: the prefix to lookup
8754 * Lookup the namespace name for the @prefix (which ca be NULL)
8755 * The prefix must come from the @ctxt->dict dictionary
8757 * Returns the namespace name or NULL if not bound
8759 static const xmlChar *
8760 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8761 int i;
8763 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8764 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8765 if (ctxt->nsTab[i] == prefix) {
8766 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8767 return(NULL);
8768 return(ctxt->nsTab[i + 1]);
8770 return(NULL);
8774 * xmlParseQName:
8775 * @ctxt: an XML parser context
8776 * @prefix: pointer to store the prefix part
8778 * parse an XML Namespace QName
8780 * [6] QName ::= (Prefix ':')? LocalPart
8781 * [7] Prefix ::= NCName
8782 * [8] LocalPart ::= NCName
8784 * Returns the Name parsed or NULL
8787 static const xmlChar *
8788 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8789 const xmlChar *l, *p;
8791 GROW;
8793 l = xmlParseNCName(ctxt);
8794 if (l == NULL) {
8795 if (CUR == ':') {
8796 l = xmlParseName(ctxt);
8797 if (l != NULL) {
8798 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8799 "Failed to parse QName '%s'\n", l, NULL, NULL);
8800 *prefix = NULL;
8801 return(l);
8804 return(NULL);
8806 if (CUR == ':') {
8807 NEXT;
8808 p = l;
8809 l = xmlParseNCName(ctxt);
8810 if (l == NULL) {
8811 xmlChar *tmp;
8813 if (ctxt->instate == XML_PARSER_EOF)
8814 return(NULL);
8815 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8816 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8817 l = xmlParseNmtoken(ctxt);
8818 if (l == NULL) {
8819 if (ctxt->instate == XML_PARSER_EOF)
8820 return(NULL);
8821 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8822 } else {
8823 tmp = xmlBuildQName(l, p, NULL, 0);
8824 xmlFree((char *)l);
8826 p = xmlDictLookup(ctxt->dict, tmp, -1);
8827 if (tmp != NULL) xmlFree(tmp);
8828 *prefix = NULL;
8829 return(p);
8831 if (CUR == ':') {
8832 xmlChar *tmp;
8834 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8835 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8836 NEXT;
8837 tmp = (xmlChar *) xmlParseName(ctxt);
8838 if (tmp != NULL) {
8839 tmp = xmlBuildQName(tmp, l, NULL, 0);
8840 l = xmlDictLookup(ctxt->dict, tmp, -1);
8841 if (tmp != NULL) xmlFree(tmp);
8842 *prefix = p;
8843 return(l);
8845 if (ctxt->instate == XML_PARSER_EOF)
8846 return(NULL);
8847 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8848 l = xmlDictLookup(ctxt->dict, tmp, -1);
8849 if (tmp != NULL) xmlFree(tmp);
8850 *prefix = p;
8851 return(l);
8853 *prefix = p;
8854 } else
8855 *prefix = NULL;
8856 return(l);
8860 * xmlParseQNameAndCompare:
8861 * @ctxt: an XML parser context
8862 * @name: the localname
8863 * @prefix: the prefix, if any.
8865 * parse an XML name and compares for match
8866 * (specialized for endtag parsing)
8868 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8869 * and the name for mismatch
8872 static const xmlChar *
8873 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8874 xmlChar const *prefix) {
8875 const xmlChar *cmp;
8876 const xmlChar *in;
8877 const xmlChar *ret;
8878 const xmlChar *prefix2;
8880 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8882 GROW;
8883 in = ctxt->input->cur;
8885 cmp = prefix;
8886 while (*in != 0 && *in == *cmp) {
8887 ++in;
8888 ++cmp;
8890 if ((*cmp == 0) && (*in == ':')) {
8891 in++;
8892 cmp = name;
8893 while (*in != 0 && *in == *cmp) {
8894 ++in;
8895 ++cmp;
8897 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8898 /* success */
8899 ctxt->input->col += in - ctxt->input->cur;
8900 ctxt->input->cur = in;
8901 return((const xmlChar*) 1);
8905 * all strings coms from the dictionary, equality can be done directly
8907 ret = xmlParseQName (ctxt, &prefix2);
8908 if ((ret == name) && (prefix == prefix2))
8909 return((const xmlChar*) 1);
8910 return ret;
8914 * xmlParseAttValueInternal:
8915 * @ctxt: an XML parser context
8916 * @len: attribute len result
8917 * @alloc: whether the attribute was reallocated as a new string
8918 * @normalize: if 1 then further non-CDATA normalization must be done
8920 * parse a value for an attribute.
8921 * NOTE: if no normalization is needed, the routine will return pointers
8922 * directly from the data buffer.
8924 * 3.3.3 Attribute-Value Normalization:
8925 * Before the value of an attribute is passed to the application or
8926 * checked for validity, the XML processor must normalize it as follows:
8927 * - a character reference is processed by appending the referenced
8928 * character to the attribute value
8929 * - an entity reference is processed by recursively processing the
8930 * replacement text of the entity
8931 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8932 * appending #x20 to the normalized value, except that only a single
8933 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8934 * parsed entity or the literal entity value of an internal parsed entity
8935 * - other characters are processed by appending them to the normalized value
8936 * If the declared value is not CDATA, then the XML processor must further
8937 * process the normalized attribute value by discarding any leading and
8938 * trailing space (#x20) characters, and by replacing sequences of space
8939 * (#x20) characters by a single space (#x20) character.
8940 * All attributes for which no declaration has been read should be treated
8941 * by a non-validating parser as if declared CDATA.
8943 * Returns the AttValue parsed or NULL. The value has to be freed by the
8944 * caller if it was copied, this can be detected by val[*len] == 0.
8947 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8948 const xmlChar *oldbase = ctxt->input->base;\
8949 GROW;\
8950 if (ctxt->instate == XML_PARSER_EOF)\
8951 return(NULL);\
8952 if (oldbase != ctxt->input->base) {\
8953 ptrdiff_t delta = ctxt->input->base - oldbase;\
8954 start = start + delta;\
8955 in = in + delta;\
8957 end = ctxt->input->end;
8959 static xmlChar *
8960 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8961 int normalize)
8963 xmlChar limit = 0;
8964 const xmlChar *in = NULL, *start, *end, *last;
8965 xmlChar *ret = NULL;
8966 int line, col;
8967 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8968 XML_MAX_HUGE_LENGTH :
8969 XML_MAX_TEXT_LENGTH;
8971 GROW;
8972 in = (xmlChar *) CUR_PTR;
8973 line = ctxt->input->line;
8974 col = ctxt->input->col;
8975 if (*in != '"' && *in != '\'') {
8976 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8977 return (NULL);
8979 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8982 * try to handle in this routine the most common case where no
8983 * allocation of a new string is required and where content is
8984 * pure ASCII.
8986 limit = *in++;
8987 col++;
8988 end = ctxt->input->end;
8989 start = in;
8990 if (in >= end) {
8991 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8993 if (normalize) {
8995 * Skip any leading spaces
8997 while ((in < end) && (*in != limit) &&
8998 ((*in == 0x20) || (*in == 0x9) ||
8999 (*in == 0xA) || (*in == 0xD))) {
9000 if (*in == 0xA) {
9001 line++; col = 1;
9002 } else {
9003 col++;
9005 in++;
9006 start = in;
9007 if (in >= end) {
9008 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9009 if ((in - start) > maxLength) {
9010 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9011 "AttValue length too long\n");
9012 return(NULL);
9016 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9017 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9018 col++;
9019 if ((*in++ == 0x20) && (*in == 0x20)) break;
9020 if (in >= end) {
9021 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9022 if ((in - start) > maxLength) {
9023 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9024 "AttValue length too long\n");
9025 return(NULL);
9029 last = in;
9031 * skip the trailing blanks
9033 while ((last[-1] == 0x20) && (last > start)) last--;
9034 while ((in < end) && (*in != limit) &&
9035 ((*in == 0x20) || (*in == 0x9) ||
9036 (*in == 0xA) || (*in == 0xD))) {
9037 if (*in == 0xA) {
9038 line++, col = 1;
9039 } else {
9040 col++;
9042 in++;
9043 if (in >= end) {
9044 const xmlChar *oldbase = ctxt->input->base;
9045 GROW;
9046 if (ctxt->instate == XML_PARSER_EOF)
9047 return(NULL);
9048 if (oldbase != ctxt->input->base) {
9049 ptrdiff_t delta = ctxt->input->base - oldbase;
9050 start = start + delta;
9051 in = in + delta;
9052 last = last + delta;
9054 end = ctxt->input->end;
9055 if ((in - start) > maxLength) {
9056 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9057 "AttValue length too long\n");
9058 return(NULL);
9062 if ((in - start) > maxLength) {
9063 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9064 "AttValue length too long\n");
9065 return(NULL);
9067 if (*in != limit) goto need_complex;
9068 } else {
9069 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9070 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9071 in++;
9072 col++;
9073 if (in >= end) {
9074 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9075 if ((in - start) > maxLength) {
9076 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9077 "AttValue length too long\n");
9078 return(NULL);
9082 last = in;
9083 if ((in - start) > maxLength) {
9084 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9085 "AttValue length too long\n");
9086 return(NULL);
9088 if (*in != limit) goto need_complex;
9090 in++;
9091 col++;
9092 if (len != NULL) {
9093 *len = last - start;
9094 ret = (xmlChar *) start;
9095 } else {
9096 if (alloc) *alloc = 1;
9097 ret = xmlStrndup(start, last - start);
9099 CUR_PTR = in;
9100 ctxt->input->line = line;
9101 ctxt->input->col = col;
9102 if (alloc) *alloc = 0;
9103 return ret;
9104 need_complex:
9105 if (alloc) *alloc = 1;
9106 return xmlParseAttValueComplex(ctxt, len, normalize);
9110 * xmlParseAttribute2:
9111 * @ctxt: an XML parser context
9112 * @pref: the element prefix
9113 * @elem: the element name
9114 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9115 * @value: a xmlChar ** used to store the value of the attribute
9116 * @len: an int * to save the length of the attribute
9117 * @alloc: an int * to indicate if the attribute was allocated
9119 * parse an attribute in the new SAX2 framework.
9121 * Returns the attribute name, and the value in *value, .
9124 static const xmlChar *
9125 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9126 const xmlChar * pref, const xmlChar * elem,
9127 const xmlChar ** prefix, xmlChar ** value,
9128 int *len, int *alloc)
9130 const xmlChar *name;
9131 xmlChar *val, *internal_val = NULL;
9132 int normalize = 0;
9134 *value = NULL;
9135 GROW;
9136 name = xmlParseQName(ctxt, prefix);
9137 if (name == NULL) {
9138 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9139 "error parsing attribute name\n");
9140 return (NULL);
9144 * get the type if needed
9146 if (ctxt->attsSpecial != NULL) {
9147 int type;
9149 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9150 pref, elem, *prefix, name);
9151 if (type != 0)
9152 normalize = 1;
9156 * read the value
9158 SKIP_BLANKS;
9159 if (RAW == '=') {
9160 NEXT;
9161 SKIP_BLANKS;
9162 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9163 if (normalize) {
9165 * Sometimes a second normalisation pass for spaces is needed
9166 * but that only happens if charrefs or entities references
9167 * have been used in the attribute value, i.e. the attribute
9168 * value have been extracted in an allocated string already.
9170 if (*alloc) {
9171 const xmlChar *val2;
9173 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9174 if ((val2 != NULL) && (val2 != val)) {
9175 xmlFree(val);
9176 val = (xmlChar *) val2;
9180 ctxt->instate = XML_PARSER_CONTENT;
9181 } else {
9182 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9183 "Specification mandates value for attribute %s\n",
9184 name);
9185 return (NULL);
9188 if (*prefix == ctxt->str_xml) {
9190 * Check that xml:lang conforms to the specification
9191 * No more registered as an error, just generate a warning now
9192 * since this was deprecated in XML second edition
9194 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9195 internal_val = xmlStrndup(val, *len);
9196 if (!xmlCheckLanguageID(internal_val)) {
9197 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9198 "Malformed value for xml:lang : %s\n",
9199 internal_val, NULL);
9204 * Check that xml:space conforms to the specification
9206 if (xmlStrEqual(name, BAD_CAST "space")) {
9207 internal_val = xmlStrndup(val, *len);
9208 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9209 *(ctxt->space) = 0;
9210 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9211 *(ctxt->space) = 1;
9212 else {
9213 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9214 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9215 internal_val, NULL);
9218 if (internal_val) {
9219 xmlFree(internal_val);
9223 *value = val;
9224 return (name);
9227 * xmlParseStartTag2:
9228 * @ctxt: an XML parser context
9230 * parse a start of tag either for rule element or
9231 * EmptyElement. In both case we don't parse the tag closing chars.
9232 * This routine is called when running SAX2 parsing
9234 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9236 * [ WFC: Unique Att Spec ]
9237 * No attribute name may appear more than once in the same start-tag or
9238 * empty-element tag.
9240 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9242 * [ WFC: Unique Att Spec ]
9243 * No attribute name may appear more than once in the same start-tag or
9244 * empty-element tag.
9246 * With namespace:
9248 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9250 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9252 * Returns the element name parsed
9255 static const xmlChar *
9256 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9257 const xmlChar **URI, int *tlen) {
9258 const xmlChar *localname;
9259 const xmlChar *prefix;
9260 const xmlChar *attname;
9261 const xmlChar *aprefix;
9262 const xmlChar *nsname;
9263 xmlChar *attvalue;
9264 const xmlChar **atts = ctxt->atts;
9265 int maxatts = ctxt->maxatts;
9266 int nratts, nbatts, nbdef, inputid;
9267 int i, j, nbNs, attval;
9268 unsigned long cur;
9269 int nsNr = ctxt->nsNr;
9271 if (RAW != '<') return(NULL);
9272 NEXT1;
9275 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9276 * point since the attribute values may be stored as pointers to
9277 * the buffer and calling SHRINK would destroy them !
9278 * The Shrinking is only possible once the full set of attribute
9279 * callbacks have been done.
9281 SHRINK;
9282 cur = ctxt->input->cur - ctxt->input->base;
9283 inputid = ctxt->input->id;
9284 nbatts = 0;
9285 nratts = 0;
9286 nbdef = 0;
9287 nbNs = 0;
9288 attval = 0;
9289 /* Forget any namespaces added during an earlier parse of this element. */
9290 ctxt->nsNr = nsNr;
9292 localname = xmlParseQName(ctxt, &prefix);
9293 if (localname == NULL) {
9294 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9295 "StartTag: invalid element name\n");
9296 return(NULL);
9298 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9301 * Now parse the attributes, it ends up with the ending
9303 * (S Attribute)* S?
9305 SKIP_BLANKS;
9306 GROW;
9308 while (((RAW != '>') &&
9309 ((RAW != '/') || (NXT(1) != '>')) &&
9310 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9311 int id = ctxt->input->id;
9312 unsigned long cons = CUR_CONSUMED;
9313 int len = -1, alloc = 0;
9315 attname = xmlParseAttribute2(ctxt, prefix, localname,
9316 &aprefix, &attvalue, &len, &alloc);
9317 if ((attname == NULL) || (attvalue == NULL))
9318 goto next_attr;
9319 if (len < 0) len = xmlStrlen(attvalue);
9321 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9322 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9323 xmlURIPtr uri;
9325 if (URL == NULL) {
9326 xmlErrMemory(ctxt, "dictionary allocation failure");
9327 if ((attvalue != NULL) && (alloc != 0))
9328 xmlFree(attvalue);
9329 localname = NULL;
9330 goto done;
9332 if (*URL != 0) {
9333 uri = xmlParseURI((const char *) URL);
9334 if (uri == NULL) {
9335 xmlNsErr(ctxt, XML_WAR_NS_URI,
9336 "xmlns: '%s' is not a valid URI\n",
9337 URL, NULL, NULL);
9338 } else {
9339 if (uri->scheme == NULL) {
9340 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9341 "xmlns: URI %s is not absolute\n",
9342 URL, NULL, NULL);
9344 xmlFreeURI(uri);
9346 if (URL == ctxt->str_xml_ns) {
9347 if (attname != ctxt->str_xml) {
9348 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9349 "xml namespace URI cannot be the default namespace\n",
9350 NULL, NULL, NULL);
9352 goto next_attr;
9354 if ((len == 29) &&
9355 (xmlStrEqual(URL,
9356 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9357 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9358 "reuse of the xmlns namespace name is forbidden\n",
9359 NULL, NULL, NULL);
9360 goto next_attr;
9364 * check that it's not a defined namespace
9366 for (j = 1;j <= nbNs;j++)
9367 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9368 break;
9369 if (j <= nbNs)
9370 xmlErrAttributeDup(ctxt, NULL, attname);
9371 else
9372 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9374 } else if (aprefix == ctxt->str_xmlns) {
9375 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9376 xmlURIPtr uri;
9378 if (attname == ctxt->str_xml) {
9379 if (URL != ctxt->str_xml_ns) {
9380 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9381 "xml namespace prefix mapped to wrong URI\n",
9382 NULL, NULL, NULL);
9385 * Do not keep a namespace definition node
9387 goto next_attr;
9389 if (URL == ctxt->str_xml_ns) {
9390 if (attname != ctxt->str_xml) {
9391 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9392 "xml namespace URI mapped to wrong prefix\n",
9393 NULL, NULL, NULL);
9395 goto next_attr;
9397 if (attname == ctxt->str_xmlns) {
9398 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9399 "redefinition of the xmlns prefix is forbidden\n",
9400 NULL, NULL, NULL);
9401 goto next_attr;
9403 if ((len == 29) &&
9404 (xmlStrEqual(URL,
9405 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9406 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407 "reuse of the xmlns namespace name is forbidden\n",
9408 NULL, NULL, NULL);
9409 goto next_attr;
9411 if ((URL == NULL) || (URL[0] == 0)) {
9412 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9413 "xmlns:%s: Empty XML namespace is not allowed\n",
9414 attname, NULL, NULL);
9415 goto next_attr;
9416 } else {
9417 uri = xmlParseURI((const char *) URL);
9418 if (uri == NULL) {
9419 xmlNsErr(ctxt, XML_WAR_NS_URI,
9420 "xmlns:%s: '%s' is not a valid URI\n",
9421 attname, URL, NULL);
9422 } else {
9423 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9424 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9425 "xmlns:%s: URI %s is not absolute\n",
9426 attname, URL, NULL);
9428 xmlFreeURI(uri);
9433 * check that it's not a defined namespace
9435 for (j = 1;j <= nbNs;j++)
9436 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9437 break;
9438 if (j <= nbNs)
9439 xmlErrAttributeDup(ctxt, aprefix, attname);
9440 else
9441 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9443 } else {
9445 * Add the pair to atts
9447 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9448 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9449 goto next_attr;
9451 maxatts = ctxt->maxatts;
9452 atts = ctxt->atts;
9454 ctxt->attallocs[nratts++] = alloc;
9455 atts[nbatts++] = attname;
9456 atts[nbatts++] = aprefix;
9458 * The namespace URI field is used temporarily to point at the
9459 * base of the current input buffer for non-alloced attributes.
9460 * When the input buffer is reallocated, all the pointers become
9461 * invalid, but they can be reconstructed later.
9463 if (alloc)
9464 atts[nbatts++] = NULL;
9465 else
9466 atts[nbatts++] = ctxt->input->base;
9467 atts[nbatts++] = attvalue;
9468 attvalue += len;
9469 atts[nbatts++] = attvalue;
9471 * tag if some deallocation is needed
9473 if (alloc != 0) attval = 1;
9474 attvalue = NULL; /* moved into atts */
9477 next_attr:
9478 if ((attvalue != NULL) && (alloc != 0)) {
9479 xmlFree(attvalue);
9480 attvalue = NULL;
9483 GROW
9484 if (ctxt->instate == XML_PARSER_EOF)
9485 break;
9486 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9487 break;
9488 if (SKIP_BLANKS == 0) {
9489 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9490 "attributes construct error\n");
9491 break;
9493 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9494 (attname == NULL) && (attvalue == NULL)) {
9495 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9496 "xmlParseStartTag: problem parsing attributes\n");
9497 break;
9499 GROW;
9502 if (ctxt->input->id != inputid) {
9503 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9504 "Unexpected change of input\n");
9505 localname = NULL;
9506 goto done;
9509 /* Reconstruct attribute value pointers. */
9510 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9511 if (atts[i+2] != NULL) {
9513 * Arithmetic on dangling pointers is technically undefined
9514 * behavior, but well...
9516 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9517 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9518 atts[i+3] += offset; /* value */
9519 atts[i+4] += offset; /* valuend */
9524 * The attributes defaulting
9526 if (ctxt->attsDefault != NULL) {
9527 xmlDefAttrsPtr defaults;
9529 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9530 if (defaults != NULL) {
9531 for (i = 0;i < defaults->nbAttrs;i++) {
9532 attname = defaults->values[5 * i];
9533 aprefix = defaults->values[5 * i + 1];
9536 * special work for namespaces defaulted defs
9538 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9540 * check that it's not a defined namespace
9542 for (j = 1;j <= nbNs;j++)
9543 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9544 break;
9545 if (j <= nbNs) continue;
9547 nsname = xmlGetNamespace(ctxt, NULL);
9548 if (nsname != defaults->values[5 * i + 2]) {
9549 if (nsPush(ctxt, NULL,
9550 defaults->values[5 * i + 2]) > 0)
9551 nbNs++;
9553 } else if (aprefix == ctxt->str_xmlns) {
9555 * check that it's not a defined namespace
9557 for (j = 1;j <= nbNs;j++)
9558 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9559 break;
9560 if (j <= nbNs) continue;
9562 nsname = xmlGetNamespace(ctxt, attname);
9563 if (nsname != defaults->values[2]) {
9564 if (nsPush(ctxt, attname,
9565 defaults->values[5 * i + 2]) > 0)
9566 nbNs++;
9568 } else {
9570 * check that it's not a defined attribute
9572 for (j = 0;j < nbatts;j+=5) {
9573 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9574 break;
9576 if (j < nbatts) continue;
9578 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9579 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9580 localname = NULL;
9581 goto done;
9583 maxatts = ctxt->maxatts;
9584 atts = ctxt->atts;
9586 atts[nbatts++] = attname;
9587 atts[nbatts++] = aprefix;
9588 if (aprefix == NULL)
9589 atts[nbatts++] = NULL;
9590 else
9591 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9592 atts[nbatts++] = defaults->values[5 * i + 2];
9593 atts[nbatts++] = defaults->values[5 * i + 3];
9594 if ((ctxt->standalone == 1) &&
9595 (defaults->values[5 * i + 4] != NULL)) {
9596 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9597 "standalone: attribute %s on %s defaulted from external subset\n",
9598 attname, localname);
9600 nbdef++;
9607 * The attributes checkings
9609 for (i = 0; i < nbatts;i += 5) {
9611 * The default namespace does not apply to attribute names.
9613 if (atts[i + 1] != NULL) {
9614 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9615 if (nsname == NULL) {
9616 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9617 "Namespace prefix %s for %s on %s is not defined\n",
9618 atts[i + 1], atts[i], localname);
9620 atts[i + 2] = nsname;
9621 } else
9622 nsname = NULL;
9624 * [ WFC: Unique Att Spec ]
9625 * No attribute name may appear more than once in the same
9626 * start-tag or empty-element tag.
9627 * As extended by the Namespace in XML REC.
9629 for (j = 0; j < i;j += 5) {
9630 if (atts[i] == atts[j]) {
9631 if (atts[i+1] == atts[j+1]) {
9632 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9633 break;
9635 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9636 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9637 "Namespaced Attribute %s in '%s' redefined\n",
9638 atts[i], nsname, NULL);
9639 break;
9645 nsname = xmlGetNamespace(ctxt, prefix);
9646 if ((prefix != NULL) && (nsname == NULL)) {
9647 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9648 "Namespace prefix %s on %s is not defined\n",
9649 prefix, localname, NULL);
9651 *pref = prefix;
9652 *URI = nsname;
9655 * SAX: Start of Element !
9657 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9658 (!ctxt->disableSAX)) {
9659 if (nbNs > 0)
9660 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9661 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9662 nbatts / 5, nbdef, atts);
9663 else
9664 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9665 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9668 done:
9670 * Free up attribute allocated strings if needed
9672 if (attval != 0) {
9673 for (i = 3,j = 0; j < nratts;i += 5,j++)
9674 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9675 xmlFree((xmlChar *) atts[i]);
9678 return(localname);
9682 * xmlParseEndTag2:
9683 * @ctxt: an XML parser context
9684 * @line: line of the start tag
9685 * @nsNr: number of namespaces on the start tag
9687 * parse an end of tag
9689 * [42] ETag ::= '</' Name S? '>'
9691 * With namespace
9693 * [NS 9] ETag ::= '</' QName S? '>'
9696 static void
9697 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9698 const xmlChar *name;
9700 GROW;
9701 if ((RAW != '<') || (NXT(1) != '/')) {
9702 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9703 return;
9705 SKIP(2);
9707 if (tag->prefix == NULL)
9708 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9709 else
9710 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9713 * We should definitely be at the ending "S? '>'" part
9715 GROW;
9716 if (ctxt->instate == XML_PARSER_EOF)
9717 return;
9718 SKIP_BLANKS;
9719 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9720 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9721 } else
9722 NEXT1;
9725 * [ WFC: Element Type Match ]
9726 * The Name in an element's end-tag must match the element type in the
9727 * start-tag.
9730 if (name != (xmlChar*)1) {
9731 if (name == NULL) name = BAD_CAST "unparsable";
9732 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9733 "Opening and ending tag mismatch: %s line %d and %s\n",
9734 ctxt->name, tag->line, name);
9738 * SAX: End of Tag
9740 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9741 (!ctxt->disableSAX))
9742 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9743 tag->URI);
9745 spacePop(ctxt);
9746 if (tag->nsNr != 0)
9747 nsPop(ctxt, tag->nsNr);
9751 * xmlParseCDSect:
9752 * @ctxt: an XML parser context
9754 * Parse escaped pure raw content.
9756 * [18] CDSect ::= CDStart CData CDEnd
9758 * [19] CDStart ::= '<![CDATA['
9760 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9762 * [21] CDEnd ::= ']]>'
9764 void
9765 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9766 xmlChar *buf = NULL;
9767 int len = 0;
9768 int size = XML_PARSER_BUFFER_SIZE;
9769 int r, rl;
9770 int s, sl;
9771 int cur, l;
9772 int count = 0;
9773 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9774 XML_MAX_HUGE_LENGTH :
9775 XML_MAX_TEXT_LENGTH;
9777 /* Check 2.6.0 was NXT(0) not RAW */
9778 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9779 SKIP(9);
9780 } else
9781 return;
9783 ctxt->instate = XML_PARSER_CDATA_SECTION;
9784 r = CUR_CHAR(rl);
9785 if (!IS_CHAR(r)) {
9786 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9787 ctxt->instate = XML_PARSER_CONTENT;
9788 return;
9790 NEXTL(rl);
9791 s = CUR_CHAR(sl);
9792 if (!IS_CHAR(s)) {
9793 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9794 ctxt->instate = XML_PARSER_CONTENT;
9795 return;
9797 NEXTL(sl);
9798 cur = CUR_CHAR(l);
9799 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9800 if (buf == NULL) {
9801 xmlErrMemory(ctxt, NULL);
9802 return;
9804 while (IS_CHAR(cur) &&
9805 ((r != ']') || (s != ']') || (cur != '>'))) {
9806 if (len + 5 >= size) {
9807 xmlChar *tmp;
9809 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9810 if (tmp == NULL) {
9811 xmlFree(buf);
9812 xmlErrMemory(ctxt, NULL);
9813 return;
9815 buf = tmp;
9816 size *= 2;
9818 COPY_BUF(rl,buf,len,r);
9819 r = s;
9820 rl = sl;
9821 s = cur;
9822 sl = l;
9823 count++;
9824 if (count > 50) {
9825 SHRINK;
9826 GROW;
9827 if (ctxt->instate == XML_PARSER_EOF) {
9828 xmlFree(buf);
9829 return;
9831 count = 0;
9833 NEXTL(l);
9834 cur = CUR_CHAR(l);
9835 if (len > maxLength) {
9836 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9837 "CData section too big found\n");
9838 xmlFree(buf);
9839 return;
9842 buf[len] = 0;
9843 ctxt->instate = XML_PARSER_CONTENT;
9844 if (cur != '>') {
9845 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9846 "CData section not finished\n%.50s\n", buf);
9847 xmlFree(buf);
9848 return;
9850 NEXTL(l);
9853 * OK the buffer is to be consumed as cdata.
9855 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9856 if (ctxt->sax->cdataBlock != NULL)
9857 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9858 else if (ctxt->sax->characters != NULL)
9859 ctxt->sax->characters(ctxt->userData, buf, len);
9861 xmlFree(buf);
9865 * xmlParseContentInternal:
9866 * @ctxt: an XML parser context
9868 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9869 * unexpected EOF to the caller.
9872 static void
9873 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9874 int nameNr = ctxt->nameNr;
9876 GROW;
9877 while ((RAW != 0) &&
9878 (ctxt->instate != XML_PARSER_EOF)) {
9879 int id = ctxt->input->id;
9880 unsigned long cons = CUR_CONSUMED;
9881 const xmlChar *cur = ctxt->input->cur;
9884 * First case : a Processing Instruction.
9886 if ((*cur == '<') && (cur[1] == '?')) {
9887 xmlParsePI(ctxt);
9891 * Second case : a CDSection
9893 /* 2.6.0 test was *cur not RAW */
9894 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9895 xmlParseCDSect(ctxt);
9899 * Third case : a comment
9901 else if ((*cur == '<') && (NXT(1) == '!') &&
9902 (NXT(2) == '-') && (NXT(3) == '-')) {
9903 xmlParseComment(ctxt);
9904 ctxt->instate = XML_PARSER_CONTENT;
9908 * Fourth case : a sub-element.
9910 else if (*cur == '<') {
9911 if (NXT(1) == '/') {
9912 if (ctxt->nameNr <= nameNr)
9913 break;
9914 xmlParseElementEnd(ctxt);
9915 } else {
9916 xmlParseElementStart(ctxt);
9921 * Fifth case : a reference. If if has not been resolved,
9922 * parsing returns it's Name, create the node
9925 else if (*cur == '&') {
9926 xmlParseReference(ctxt);
9930 * Last case, text. Note that References are handled directly.
9932 else {
9933 xmlParseCharData(ctxt, 0);
9936 GROW;
9937 SHRINK;
9939 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
9940 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9941 "detected an error in element content\n");
9942 xmlHaltParser(ctxt);
9943 break;
9949 * xmlParseContent:
9950 * @ctxt: an XML parser context
9952 * Parse a content sequence. Stops at EOF or '</'.
9954 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9957 void
9958 xmlParseContent(xmlParserCtxtPtr ctxt) {
9959 int nameNr = ctxt->nameNr;
9961 xmlParseContentInternal(ctxt);
9963 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9964 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9965 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9966 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9967 "Premature end of data in tag %s line %d\n",
9968 name, line, NULL);
9973 * xmlParseElement:
9974 * @ctxt: an XML parser context
9976 * parse an XML element
9978 * [39] element ::= EmptyElemTag | STag content ETag
9980 * [ WFC: Element Type Match ]
9981 * The Name in an element's end-tag must match the element type in the
9982 * start-tag.
9986 void
9987 xmlParseElement(xmlParserCtxtPtr ctxt) {
9988 if (xmlParseElementStart(ctxt) != 0)
9989 return;
9991 xmlParseContentInternal(ctxt);
9992 if (ctxt->instate == XML_PARSER_EOF)
9993 return;
9995 if (CUR == 0) {
9996 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9997 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9998 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9999 "Premature end of data in tag %s line %d\n",
10000 name, line, NULL);
10001 return;
10004 xmlParseElementEnd(ctxt);
10008 * xmlParseElementStart:
10009 * @ctxt: an XML parser context
10011 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10012 * opening tag was parsed, 1 if an empty element was parsed.
10014 static int
10015 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10016 const xmlChar *name;
10017 const xmlChar *prefix = NULL;
10018 const xmlChar *URI = NULL;
10019 xmlParserNodeInfo node_info;
10020 int line, tlen = 0;
10021 xmlNodePtr ret;
10022 int nsNr = ctxt->nsNr;
10024 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10025 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10026 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10027 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10028 xmlParserMaxDepth);
10029 xmlHaltParser(ctxt);
10030 return(-1);
10033 /* Capture start position */
10034 if (ctxt->record_info) {
10035 node_info.begin_pos = ctxt->input->consumed +
10036 (CUR_PTR - ctxt->input->base);
10037 node_info.begin_line = ctxt->input->line;
10040 if (ctxt->spaceNr == 0)
10041 spacePush(ctxt, -1);
10042 else if (*ctxt->space == -2)
10043 spacePush(ctxt, -1);
10044 else
10045 spacePush(ctxt, *ctxt->space);
10047 line = ctxt->input->line;
10048 #ifdef LIBXML_SAX1_ENABLED
10049 if (ctxt->sax2)
10050 #endif /* LIBXML_SAX1_ENABLED */
10051 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10052 #ifdef LIBXML_SAX1_ENABLED
10053 else
10054 name = xmlParseStartTag(ctxt);
10055 #endif /* LIBXML_SAX1_ENABLED */
10056 if (ctxt->instate == XML_PARSER_EOF)
10057 return(-1);
10058 if (name == NULL) {
10059 spacePop(ctxt);
10060 return(-1);
10062 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10063 ret = ctxt->node;
10065 #ifdef LIBXML_VALID_ENABLED
10067 * [ VC: Root Element Type ]
10068 * The Name in the document type declaration must match the element
10069 * type of the root element.
10071 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10072 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10073 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10074 #endif /* LIBXML_VALID_ENABLED */
10077 * Check for an Empty Element.
10079 if ((RAW == '/') && (NXT(1) == '>')) {
10080 SKIP(2);
10081 if (ctxt->sax2) {
10082 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10083 (!ctxt->disableSAX))
10084 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10085 #ifdef LIBXML_SAX1_ENABLED
10086 } else {
10087 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10088 (!ctxt->disableSAX))
10089 ctxt->sax->endElement(ctxt->userData, name);
10090 #endif /* LIBXML_SAX1_ENABLED */
10092 namePop(ctxt);
10093 spacePop(ctxt);
10094 if (nsNr != ctxt->nsNr)
10095 nsPop(ctxt, ctxt->nsNr - nsNr);
10096 if ( ret != NULL && ctxt->record_info ) {
10097 node_info.end_pos = ctxt->input->consumed +
10098 (CUR_PTR - ctxt->input->base);
10099 node_info.end_line = ctxt->input->line;
10100 node_info.node = ret;
10101 xmlParserAddNodeInfo(ctxt, &node_info);
10103 return(1);
10105 if (RAW == '>') {
10106 NEXT1;
10107 } else {
10108 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10109 "Couldn't find end of Start Tag %s line %d\n",
10110 name, line, NULL);
10113 * end of parsing of this node.
10115 nodePop(ctxt);
10116 namePop(ctxt);
10117 spacePop(ctxt);
10118 if (nsNr != ctxt->nsNr)
10119 nsPop(ctxt, ctxt->nsNr - nsNr);
10122 * Capture end position and add node
10124 if ( ret != NULL && ctxt->record_info ) {
10125 node_info.end_pos = ctxt->input->consumed +
10126 (CUR_PTR - ctxt->input->base);
10127 node_info.end_line = ctxt->input->line;
10128 node_info.node = ret;
10129 xmlParserAddNodeInfo(ctxt, &node_info);
10131 return(-1);
10134 return(0);
10138 * xmlParseElementEnd:
10139 * @ctxt: an XML parser context
10141 * Parse the end of an XML element.
10143 static void
10144 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10145 xmlParserNodeInfo node_info;
10146 xmlNodePtr ret = ctxt->node;
10148 if (ctxt->nameNr <= 0)
10149 return;
10152 * parse the end of tag: '</' should be here.
10154 if (ctxt->sax2) {
10155 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10156 namePop(ctxt);
10158 #ifdef LIBXML_SAX1_ENABLED
10159 else
10160 xmlParseEndTag1(ctxt, 0);
10161 #endif /* LIBXML_SAX1_ENABLED */
10164 * Capture end position and add node
10166 if ( ret != NULL && ctxt->record_info ) {
10167 node_info.end_pos = ctxt->input->consumed +
10168 (CUR_PTR - ctxt->input->base);
10169 node_info.end_line = ctxt->input->line;
10170 node_info.node = ret;
10171 xmlParserAddNodeInfo(ctxt, &node_info);
10176 * xmlParseVersionNum:
10177 * @ctxt: an XML parser context
10179 * parse the XML version value.
10181 * [26] VersionNum ::= '1.' [0-9]+
10183 * In practice allow [0-9].[0-9]+ at that level
10185 * Returns the string giving the XML version number, or NULL
10187 xmlChar *
10188 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10189 xmlChar *buf = NULL;
10190 int len = 0;
10191 int size = 10;
10192 xmlChar cur;
10194 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10195 if (buf == NULL) {
10196 xmlErrMemory(ctxt, NULL);
10197 return(NULL);
10199 cur = CUR;
10200 if (!((cur >= '0') && (cur <= '9'))) {
10201 xmlFree(buf);
10202 return(NULL);
10204 buf[len++] = cur;
10205 NEXT;
10206 cur=CUR;
10207 if (cur != '.') {
10208 xmlFree(buf);
10209 return(NULL);
10211 buf[len++] = cur;
10212 NEXT;
10213 cur=CUR;
10214 while ((cur >= '0') && (cur <= '9')) {
10215 if (len + 1 >= size) {
10216 xmlChar *tmp;
10218 size *= 2;
10219 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10220 if (tmp == NULL) {
10221 xmlFree(buf);
10222 xmlErrMemory(ctxt, NULL);
10223 return(NULL);
10225 buf = tmp;
10227 buf[len++] = cur;
10228 NEXT;
10229 cur=CUR;
10231 buf[len] = 0;
10232 return(buf);
10236 * xmlParseVersionInfo:
10237 * @ctxt: an XML parser context
10239 * parse the XML version.
10241 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10243 * [25] Eq ::= S? '=' S?
10245 * Returns the version string, e.g. "1.0"
10248 xmlChar *
10249 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10250 xmlChar *version = NULL;
10252 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10253 SKIP(7);
10254 SKIP_BLANKS;
10255 if (RAW != '=') {
10256 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10257 return(NULL);
10259 NEXT;
10260 SKIP_BLANKS;
10261 if (RAW == '"') {
10262 NEXT;
10263 version = xmlParseVersionNum(ctxt);
10264 if (RAW != '"') {
10265 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10266 } else
10267 NEXT;
10268 } else if (RAW == '\''){
10269 NEXT;
10270 version = xmlParseVersionNum(ctxt);
10271 if (RAW != '\'') {
10272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10273 } else
10274 NEXT;
10275 } else {
10276 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10279 return(version);
10283 * xmlParseEncName:
10284 * @ctxt: an XML parser context
10286 * parse the XML encoding name
10288 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10290 * Returns the encoding name value or NULL
10292 xmlChar *
10293 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10294 xmlChar *buf = NULL;
10295 int len = 0;
10296 int size = 10;
10297 xmlChar cur;
10299 cur = CUR;
10300 if (((cur >= 'a') && (cur <= 'z')) ||
10301 ((cur >= 'A') && (cur <= 'Z'))) {
10302 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10303 if (buf == NULL) {
10304 xmlErrMemory(ctxt, NULL);
10305 return(NULL);
10308 buf[len++] = cur;
10309 NEXT;
10310 cur = CUR;
10311 while (((cur >= 'a') && (cur <= 'z')) ||
10312 ((cur >= 'A') && (cur <= 'Z')) ||
10313 ((cur >= '0') && (cur <= '9')) ||
10314 (cur == '.') || (cur == '_') ||
10315 (cur == '-')) {
10316 if (len + 1 >= size) {
10317 xmlChar *tmp;
10319 size *= 2;
10320 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10321 if (tmp == NULL) {
10322 xmlErrMemory(ctxt, NULL);
10323 xmlFree(buf);
10324 return(NULL);
10326 buf = tmp;
10328 buf[len++] = cur;
10329 NEXT;
10330 cur = CUR;
10331 if (cur == 0) {
10332 SHRINK;
10333 GROW;
10334 cur = CUR;
10337 buf[len] = 0;
10338 } else {
10339 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10341 return(buf);
10345 * xmlParseEncodingDecl:
10346 * @ctxt: an XML parser context
10348 * parse the XML encoding declaration
10350 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10352 * this setups the conversion filters.
10354 * Returns the encoding value or NULL
10357 const xmlChar *
10358 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10359 xmlChar *encoding = NULL;
10361 SKIP_BLANKS;
10362 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10363 SKIP(8);
10364 SKIP_BLANKS;
10365 if (RAW != '=') {
10366 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10367 return(NULL);
10369 NEXT;
10370 SKIP_BLANKS;
10371 if (RAW == '"') {
10372 NEXT;
10373 encoding = xmlParseEncName(ctxt);
10374 if (RAW != '"') {
10375 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10376 xmlFree((xmlChar *) encoding);
10377 return(NULL);
10378 } else
10379 NEXT;
10380 } else if (RAW == '\''){
10381 NEXT;
10382 encoding = xmlParseEncName(ctxt);
10383 if (RAW != '\'') {
10384 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10385 xmlFree((xmlChar *) encoding);
10386 return(NULL);
10387 } else
10388 NEXT;
10389 } else {
10390 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10394 * Non standard parsing, allowing the user to ignore encoding
10396 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10397 xmlFree((xmlChar *) encoding);
10398 return(NULL);
10402 * UTF-16 encoding switch has already taken place at this stage,
10403 * more over the little-endian/big-endian selection is already done
10405 if ((encoding != NULL) &&
10406 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10407 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10409 * If no encoding was passed to the parser, that we are
10410 * using UTF-16 and no decoder is present i.e. the
10411 * document is apparently UTF-8 compatible, then raise an
10412 * encoding mismatch fatal error
10414 if ((ctxt->encoding == NULL) &&
10415 (ctxt->input->buf != NULL) &&
10416 (ctxt->input->buf->encoder == NULL)) {
10417 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10418 "Document labelled UTF-16 but has UTF-8 content\n");
10420 if (ctxt->encoding != NULL)
10421 xmlFree((xmlChar *) ctxt->encoding);
10422 ctxt->encoding = encoding;
10425 * UTF-8 encoding is handled natively
10427 else if ((encoding != NULL) &&
10428 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10429 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10430 if (ctxt->encoding != NULL)
10431 xmlFree((xmlChar *) ctxt->encoding);
10432 ctxt->encoding = encoding;
10434 else if (encoding != NULL) {
10435 xmlCharEncodingHandlerPtr handler;
10437 if (ctxt->input->encoding != NULL)
10438 xmlFree((xmlChar *) ctxt->input->encoding);
10439 ctxt->input->encoding = encoding;
10441 handler = xmlFindCharEncodingHandler((const char *) encoding);
10442 if (handler != NULL) {
10443 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10444 /* failed to convert */
10445 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10446 return(NULL);
10448 } else {
10449 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10450 "Unsupported encoding %s\n", encoding);
10451 return(NULL);
10455 return(encoding);
10459 * xmlParseSDDecl:
10460 * @ctxt: an XML parser context
10462 * parse the XML standalone declaration
10464 * [32] SDDecl ::= S 'standalone' Eq
10465 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10467 * [ VC: Standalone Document Declaration ]
10468 * TODO The standalone document declaration must have the value "no"
10469 * if any external markup declarations contain declarations of:
10470 * - attributes with default values, if elements to which these
10471 * attributes apply appear in the document without specifications
10472 * of values for these attributes, or
10473 * - entities (other than amp, lt, gt, apos, quot), if references
10474 * to those entities appear in the document, or
10475 * - attributes with values subject to normalization, where the
10476 * attribute appears in the document with a value which will change
10477 * as a result of normalization, or
10478 * - element types with element content, if white space occurs directly
10479 * within any instance of those types.
10481 * Returns:
10482 * 1 if standalone="yes"
10483 * 0 if standalone="no"
10484 * -2 if standalone attribute is missing or invalid
10485 * (A standalone value of -2 means that the XML declaration was found,
10486 * but no value was specified for the standalone attribute).
10490 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10491 int standalone = -2;
10493 SKIP_BLANKS;
10494 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10495 SKIP(10);
10496 SKIP_BLANKS;
10497 if (RAW != '=') {
10498 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10499 return(standalone);
10501 NEXT;
10502 SKIP_BLANKS;
10503 if (RAW == '\''){
10504 NEXT;
10505 if ((RAW == 'n') && (NXT(1) == 'o')) {
10506 standalone = 0;
10507 SKIP(2);
10508 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10509 (NXT(2) == 's')) {
10510 standalone = 1;
10511 SKIP(3);
10512 } else {
10513 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10515 if (RAW != '\'') {
10516 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10517 } else
10518 NEXT;
10519 } else if (RAW == '"'){
10520 NEXT;
10521 if ((RAW == 'n') && (NXT(1) == 'o')) {
10522 standalone = 0;
10523 SKIP(2);
10524 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10525 (NXT(2) == 's')) {
10526 standalone = 1;
10527 SKIP(3);
10528 } else {
10529 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10531 if (RAW != '"') {
10532 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10533 } else
10534 NEXT;
10535 } else {
10536 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10539 return(standalone);
10543 * xmlParseXMLDecl:
10544 * @ctxt: an XML parser context
10546 * parse an XML declaration header
10548 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10551 void
10552 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10553 xmlChar *version;
10556 * This value for standalone indicates that the document has an
10557 * XML declaration but it does not have a standalone attribute.
10558 * It will be overwritten later if a standalone attribute is found.
10560 ctxt->input->standalone = -2;
10563 * We know that '<?xml' is here.
10565 SKIP(5);
10567 if (!IS_BLANK_CH(RAW)) {
10568 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10569 "Blank needed after '<?xml'\n");
10571 SKIP_BLANKS;
10574 * We must have the VersionInfo here.
10576 version = xmlParseVersionInfo(ctxt);
10577 if (version == NULL) {
10578 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10579 } else {
10580 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10582 * Changed here for XML-1.0 5th edition
10584 if (ctxt->options & XML_PARSE_OLD10) {
10585 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10586 "Unsupported version '%s'\n",
10587 version);
10588 } else {
10589 if ((version[0] == '1') && ((version[1] == '.'))) {
10590 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10591 "Unsupported version '%s'\n",
10592 version, NULL);
10593 } else {
10594 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10595 "Unsupported version '%s'\n",
10596 version);
10600 if (ctxt->version != NULL)
10601 xmlFree((void *) ctxt->version);
10602 ctxt->version = version;
10606 * We may have the encoding declaration
10608 if (!IS_BLANK_CH(RAW)) {
10609 if ((RAW == '?') && (NXT(1) == '>')) {
10610 SKIP(2);
10611 return;
10613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10615 xmlParseEncodingDecl(ctxt);
10616 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10617 (ctxt->instate == XML_PARSER_EOF)) {
10619 * The XML REC instructs us to stop parsing right here
10621 return;
10625 * We may have the standalone status.
10627 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10628 if ((RAW == '?') && (NXT(1) == '>')) {
10629 SKIP(2);
10630 return;
10632 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10636 * We can grow the input buffer freely at that point
10638 GROW;
10640 SKIP_BLANKS;
10641 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10643 SKIP_BLANKS;
10644 if ((RAW == '?') && (NXT(1) == '>')) {
10645 SKIP(2);
10646 } else if (RAW == '>') {
10647 /* Deprecated old WD ... */
10648 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10649 NEXT;
10650 } else {
10651 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10652 MOVETO_ENDTAG(CUR_PTR);
10653 NEXT;
10658 * xmlParseMisc:
10659 * @ctxt: an XML parser context
10661 * parse an XML Misc* optional field.
10663 * [27] Misc ::= Comment | PI | S
10666 void
10667 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10668 while (ctxt->instate != XML_PARSER_EOF) {
10669 SKIP_BLANKS;
10670 GROW;
10671 if ((RAW == '<') && (NXT(1) == '?')) {
10672 xmlParsePI(ctxt);
10673 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10674 xmlParseComment(ctxt);
10675 } else {
10676 break;
10682 * xmlParseDocument:
10683 * @ctxt: an XML parser context
10685 * parse an XML document (and build a tree if using the standard SAX
10686 * interface).
10688 * [1] document ::= prolog element Misc*
10690 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10692 * Returns 0, -1 in case of error. the parser context is augmented
10693 * as a result of the parsing.
10697 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10698 xmlChar start[4];
10699 xmlCharEncoding enc;
10701 xmlInitParser();
10703 if ((ctxt == NULL) || (ctxt->input == NULL))
10704 return(-1);
10706 GROW;
10709 * SAX: detecting the level.
10711 xmlDetectSAX2(ctxt);
10714 * SAX: beginning of the document processing.
10716 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10717 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10718 if (ctxt->instate == XML_PARSER_EOF)
10719 return(-1);
10721 if ((ctxt->encoding == NULL) &&
10722 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10724 * Get the 4 first bytes and decode the charset
10725 * if enc != XML_CHAR_ENCODING_NONE
10726 * plug some encoding conversion routines.
10728 start[0] = RAW;
10729 start[1] = NXT(1);
10730 start[2] = NXT(2);
10731 start[3] = NXT(3);
10732 enc = xmlDetectCharEncoding(&start[0], 4);
10733 if (enc != XML_CHAR_ENCODING_NONE) {
10734 xmlSwitchEncoding(ctxt, enc);
10739 if (CUR == 0) {
10740 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10741 return(-1);
10745 * Check for the XMLDecl in the Prolog.
10746 * do not GROW here to avoid the detected encoder to decode more
10747 * than just the first line, unless the amount of data is really
10748 * too small to hold "<?xml version="1.0" encoding="foo"
10750 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10751 GROW;
10753 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10756 * Note that we will switch encoding on the fly.
10758 xmlParseXMLDecl(ctxt);
10759 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10760 (ctxt->instate == XML_PARSER_EOF)) {
10762 * The XML REC instructs us to stop parsing right here
10764 return(-1);
10766 ctxt->standalone = ctxt->input->standalone;
10767 SKIP_BLANKS;
10768 } else {
10769 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10771 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10772 ctxt->sax->startDocument(ctxt->userData);
10773 if (ctxt->instate == XML_PARSER_EOF)
10774 return(-1);
10775 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10776 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10777 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10781 * The Misc part of the Prolog
10783 xmlParseMisc(ctxt);
10786 * Then possibly doc type declaration(s) and more Misc
10787 * (doctypedecl Misc*)?
10789 GROW;
10790 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10792 ctxt->inSubset = 1;
10793 xmlParseDocTypeDecl(ctxt);
10794 if (RAW == '[') {
10795 ctxt->instate = XML_PARSER_DTD;
10796 xmlParseInternalSubset(ctxt);
10797 if (ctxt->instate == XML_PARSER_EOF)
10798 return(-1);
10802 * Create and update the external subset.
10804 ctxt->inSubset = 2;
10805 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10806 (!ctxt->disableSAX))
10807 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10808 ctxt->extSubSystem, ctxt->extSubURI);
10809 if (ctxt->instate == XML_PARSER_EOF)
10810 return(-1);
10811 ctxt->inSubset = 0;
10813 xmlCleanSpecialAttr(ctxt);
10815 ctxt->instate = XML_PARSER_PROLOG;
10816 xmlParseMisc(ctxt);
10820 * Time to start parsing the tree itself
10822 GROW;
10823 if (RAW != '<') {
10824 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10825 "Start tag expected, '<' not found\n");
10826 } else {
10827 ctxt->instate = XML_PARSER_CONTENT;
10828 xmlParseElement(ctxt);
10829 ctxt->instate = XML_PARSER_EPILOG;
10833 * The Misc part at the end
10835 xmlParseMisc(ctxt);
10837 if (RAW != 0) {
10838 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10840 ctxt->instate = XML_PARSER_EOF;
10844 * SAX: end of the document processing.
10846 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10847 ctxt->sax->endDocument(ctxt->userData);
10850 * Remove locally kept entity definitions if the tree was not built
10852 if ((ctxt->myDoc != NULL) &&
10853 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10854 xmlFreeDoc(ctxt->myDoc);
10855 ctxt->myDoc = NULL;
10858 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10859 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10860 if (ctxt->valid)
10861 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10862 if (ctxt->nsWellFormed)
10863 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10864 if (ctxt->options & XML_PARSE_OLD10)
10865 ctxt->myDoc->properties |= XML_DOC_OLD10;
10867 if (! ctxt->wellFormed) {
10868 ctxt->valid = 0;
10869 return(-1);
10871 return(0);
10875 * xmlParseExtParsedEnt:
10876 * @ctxt: an XML parser context
10878 * parse a general parsed entity
10879 * An external general parsed entity is well-formed if it matches the
10880 * production labeled extParsedEnt.
10882 * [78] extParsedEnt ::= TextDecl? content
10884 * Returns 0, -1 in case of error. the parser context is augmented
10885 * as a result of the parsing.
10889 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10890 xmlChar start[4];
10891 xmlCharEncoding enc;
10893 if ((ctxt == NULL) || (ctxt->input == NULL))
10894 return(-1);
10896 xmlDetectSAX2(ctxt);
10898 GROW;
10901 * SAX: beginning of the document processing.
10903 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10904 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10907 * Get the 4 first bytes and decode the charset
10908 * if enc != XML_CHAR_ENCODING_NONE
10909 * plug some encoding conversion routines.
10911 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10912 start[0] = RAW;
10913 start[1] = NXT(1);
10914 start[2] = NXT(2);
10915 start[3] = NXT(3);
10916 enc = xmlDetectCharEncoding(start, 4);
10917 if (enc != XML_CHAR_ENCODING_NONE) {
10918 xmlSwitchEncoding(ctxt, enc);
10923 if (CUR == 0) {
10924 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10928 * Check for the XMLDecl in the Prolog.
10930 GROW;
10931 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10934 * Note that we will switch encoding on the fly.
10936 xmlParseXMLDecl(ctxt);
10937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10939 * The XML REC instructs us to stop parsing right here
10941 return(-1);
10943 SKIP_BLANKS;
10944 } else {
10945 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10947 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10948 ctxt->sax->startDocument(ctxt->userData);
10949 if (ctxt->instate == XML_PARSER_EOF)
10950 return(-1);
10953 * Doing validity checking on chunk doesn't make sense
10955 ctxt->instate = XML_PARSER_CONTENT;
10956 ctxt->validate = 0;
10957 ctxt->loadsubset = 0;
10958 ctxt->depth = 0;
10960 xmlParseContent(ctxt);
10961 if (ctxt->instate == XML_PARSER_EOF)
10962 return(-1);
10964 if ((RAW == '<') && (NXT(1) == '/')) {
10965 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10966 } else if (RAW != 0) {
10967 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10971 * SAX: end of the document processing.
10973 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10974 ctxt->sax->endDocument(ctxt->userData);
10976 if (! ctxt->wellFormed) return(-1);
10977 return(0);
10980 #ifdef LIBXML_PUSH_ENABLED
10981 /************************************************************************
10983 * Progressive parsing interfaces *
10985 ************************************************************************/
10988 * xmlParseLookupSequence:
10989 * @ctxt: an XML parser context
10990 * @first: the first char to lookup
10991 * @next: the next char to lookup or zero
10992 * @third: the next char to lookup or zero
10994 * Try to find if a sequence (first, next, third) or just (first next) or
10995 * (first) is available in the input stream.
10996 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10997 * to avoid rescanning sequences of bytes, it DOES change the state of the
10998 * parser, do not use liberally.
11000 * Returns the index to the current parsing point if the full sequence
11001 * is available, -1 otherwise.
11003 static int
11004 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11005 xmlChar next, xmlChar third) {
11006 int base, len;
11007 xmlParserInputPtr in;
11008 const xmlChar *buf;
11010 in = ctxt->input;
11011 if (in == NULL) return(-1);
11012 base = in->cur - in->base;
11013 if (base < 0) return(-1);
11014 if (ctxt->checkIndex > base)
11015 base = ctxt->checkIndex;
11016 if (in->buf == NULL) {
11017 buf = in->base;
11018 len = in->length;
11019 } else {
11020 buf = xmlBufContent(in->buf->buffer);
11021 len = xmlBufUse(in->buf->buffer);
11023 /* take into account the sequence length */
11024 if (third) len -= 2;
11025 else if (next) len --;
11026 for (;base < len;base++) {
11027 if (buf[base] == first) {
11028 if (third != 0) {
11029 if ((buf[base + 1] != next) ||
11030 (buf[base + 2] != third)) continue;
11031 } else if (next != 0) {
11032 if (buf[base + 1] != next) continue;
11034 ctxt->checkIndex = 0;
11035 #ifdef DEBUG_PUSH
11036 if (next == 0)
11037 xmlGenericError(xmlGenericErrorContext,
11038 "PP: lookup '%c' found at %d\n",
11039 first, base);
11040 else if (third == 0)
11041 xmlGenericError(xmlGenericErrorContext,
11042 "PP: lookup '%c%c' found at %d\n",
11043 first, next, base);
11044 else
11045 xmlGenericError(xmlGenericErrorContext,
11046 "PP: lookup '%c%c%c' found at %d\n",
11047 first, next, third, base);
11048 #endif
11049 return(base - (in->cur - in->base));
11052 ctxt->checkIndex = base;
11053 #ifdef DEBUG_PUSH
11054 if (next == 0)
11055 xmlGenericError(xmlGenericErrorContext,
11056 "PP: lookup '%c' failed\n", first);
11057 else if (third == 0)
11058 xmlGenericError(xmlGenericErrorContext,
11059 "PP: lookup '%c%c' failed\n", first, next);
11060 else
11061 xmlGenericError(xmlGenericErrorContext,
11062 "PP: lookup '%c%c%c' failed\n", first, next, third);
11063 #endif
11064 return(-1);
11068 * xmlParseGetLasts:
11069 * @ctxt: an XML parser context
11070 * @lastlt: pointer to store the last '<' from the input
11071 * @lastgt: pointer to store the last '>' from the input
11073 * Lookup the last < and > in the current chunk
11075 static void
11076 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11077 const xmlChar **lastgt) {
11078 const xmlChar *tmp;
11080 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11081 xmlGenericError(xmlGenericErrorContext,
11082 "Internal error: xmlParseGetLasts\n");
11083 return;
11085 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11086 tmp = ctxt->input->end;
11087 tmp--;
11088 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11089 if (tmp < ctxt->input->base) {
11090 *lastlt = NULL;
11091 *lastgt = NULL;
11092 } else {
11093 *lastlt = tmp;
11094 tmp++;
11095 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11096 if (*tmp == '\'') {
11097 tmp++;
11098 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11099 if (tmp < ctxt->input->end) tmp++;
11100 } else if (*tmp == '"') {
11101 tmp++;
11102 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11103 if (tmp < ctxt->input->end) tmp++;
11104 } else
11105 tmp++;
11107 if (tmp < ctxt->input->end)
11108 *lastgt = tmp;
11109 else {
11110 tmp = *lastlt;
11111 tmp--;
11112 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11113 if (tmp >= ctxt->input->base)
11114 *lastgt = tmp;
11115 else
11116 *lastgt = NULL;
11119 } else {
11120 *lastlt = NULL;
11121 *lastgt = NULL;
11125 * xmlCheckCdataPush:
11126 * @cur: pointer to the block of characters
11127 * @len: length of the block in bytes
11128 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11130 * Check that the block of characters is okay as SCdata content [20]
11132 * Returns the number of bytes to pass if okay, a negative index where an
11133 * UTF-8 error occurred otherwise
11135 static int
11136 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11137 int ix;
11138 unsigned char c;
11139 int codepoint;
11141 if ((utf == NULL) || (len <= 0))
11142 return(0);
11144 for (ix = 0; ix < len;) { /* string is 0-terminated */
11145 c = utf[ix];
11146 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11147 if (c >= 0x20)
11148 ix++;
11149 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11150 ix++;
11151 else
11152 return(-ix);
11153 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11154 if (ix + 2 > len) return(complete ? -ix : ix);
11155 if ((utf[ix+1] & 0xc0 ) != 0x80)
11156 return(-ix);
11157 codepoint = (utf[ix] & 0x1f) << 6;
11158 codepoint |= utf[ix+1] & 0x3f;
11159 if (!xmlIsCharQ(codepoint))
11160 return(-ix);
11161 ix += 2;
11162 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11163 if (ix + 3 > len) return(complete ? -ix : ix);
11164 if (((utf[ix+1] & 0xc0) != 0x80) ||
11165 ((utf[ix+2] & 0xc0) != 0x80))
11166 return(-ix);
11167 codepoint = (utf[ix] & 0xf) << 12;
11168 codepoint |= (utf[ix+1] & 0x3f) << 6;
11169 codepoint |= utf[ix+2] & 0x3f;
11170 if (!xmlIsCharQ(codepoint))
11171 return(-ix);
11172 ix += 3;
11173 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11174 if (ix + 4 > len) return(complete ? -ix : ix);
11175 if (((utf[ix+1] & 0xc0) != 0x80) ||
11176 ((utf[ix+2] & 0xc0) != 0x80) ||
11177 ((utf[ix+3] & 0xc0) != 0x80))
11178 return(-ix);
11179 codepoint = (utf[ix] & 0x7) << 18;
11180 codepoint |= (utf[ix+1] & 0x3f) << 12;
11181 codepoint |= (utf[ix+2] & 0x3f) << 6;
11182 codepoint |= utf[ix+3] & 0x3f;
11183 if (!xmlIsCharQ(codepoint))
11184 return(-ix);
11185 ix += 4;
11186 } else /* unknown encoding */
11187 return(-ix);
11189 return(ix);
11193 * xmlParseTryOrFinish:
11194 * @ctxt: an XML parser context
11195 * @terminate: last chunk indicator
11197 * Try to progress on parsing
11199 * Returns zero if no parsing was possible
11201 static int
11202 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11203 int ret = 0;
11204 int avail, tlen;
11205 xmlChar cur, next;
11206 const xmlChar *lastlt, *lastgt;
11208 if (ctxt->input == NULL)
11209 return(0);
11211 #ifdef DEBUG_PUSH
11212 switch (ctxt->instate) {
11213 case XML_PARSER_EOF:
11214 xmlGenericError(xmlGenericErrorContext,
11215 "PP: try EOF\n"); break;
11216 case XML_PARSER_START:
11217 xmlGenericError(xmlGenericErrorContext,
11218 "PP: try START\n"); break;
11219 case XML_PARSER_MISC:
11220 xmlGenericError(xmlGenericErrorContext,
11221 "PP: try MISC\n");break;
11222 case XML_PARSER_COMMENT:
11223 xmlGenericError(xmlGenericErrorContext,
11224 "PP: try COMMENT\n");break;
11225 case XML_PARSER_PROLOG:
11226 xmlGenericError(xmlGenericErrorContext,
11227 "PP: try PROLOG\n");break;
11228 case XML_PARSER_START_TAG:
11229 xmlGenericError(xmlGenericErrorContext,
11230 "PP: try START_TAG\n");break;
11231 case XML_PARSER_CONTENT:
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: try CONTENT\n");break;
11234 case XML_PARSER_CDATA_SECTION:
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: try CDATA_SECTION\n");break;
11237 case XML_PARSER_END_TAG:
11238 xmlGenericError(xmlGenericErrorContext,
11239 "PP: try END_TAG\n");break;
11240 case XML_PARSER_ENTITY_DECL:
11241 xmlGenericError(xmlGenericErrorContext,
11242 "PP: try ENTITY_DECL\n");break;
11243 case XML_PARSER_ENTITY_VALUE:
11244 xmlGenericError(xmlGenericErrorContext,
11245 "PP: try ENTITY_VALUE\n");break;
11246 case XML_PARSER_ATTRIBUTE_VALUE:
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: try ATTRIBUTE_VALUE\n");break;
11249 case XML_PARSER_DTD:
11250 xmlGenericError(xmlGenericErrorContext,
11251 "PP: try DTD\n");break;
11252 case XML_PARSER_EPILOG:
11253 xmlGenericError(xmlGenericErrorContext,
11254 "PP: try EPILOG\n");break;
11255 case XML_PARSER_PI:
11256 xmlGenericError(xmlGenericErrorContext,
11257 "PP: try PI\n");break;
11258 case XML_PARSER_IGNORE:
11259 xmlGenericError(xmlGenericErrorContext,
11260 "PP: try IGNORE\n");break;
11262 #endif
11264 if ((ctxt->input != NULL) &&
11265 (ctxt->input->cur - ctxt->input->base > 4096)) {
11266 xmlSHRINK(ctxt);
11267 ctxt->checkIndex = 0;
11269 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11271 while (ctxt->instate != XML_PARSER_EOF) {
11272 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11273 return(0);
11275 if (ctxt->input == NULL) break;
11276 if (ctxt->input->buf == NULL)
11277 avail = ctxt->input->length -
11278 (ctxt->input->cur - ctxt->input->base);
11279 else {
11281 * If we are operating on converted input, try to flush
11282 * remaining chars to avoid them stalling in the non-converted
11283 * buffer. But do not do this in document start where
11284 * encoding="..." may not have been read and we work on a
11285 * guessed encoding.
11287 if ((ctxt->instate != XML_PARSER_START) &&
11288 (ctxt->input->buf->raw != NULL) &&
11289 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11290 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11291 ctxt->input);
11292 size_t current = ctxt->input->cur - ctxt->input->base;
11294 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11295 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11296 base, current);
11298 avail = xmlBufUse(ctxt->input->buf->buffer) -
11299 (ctxt->input->cur - ctxt->input->base);
11301 if (avail < 1)
11302 goto done;
11303 switch (ctxt->instate) {
11304 case XML_PARSER_EOF:
11306 * Document parsing is done !
11308 goto done;
11309 case XML_PARSER_START:
11310 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11311 xmlChar start[4];
11312 xmlCharEncoding enc;
11315 * Very first chars read from the document flow.
11317 if (avail < 4)
11318 goto done;
11321 * Get the 4 first bytes and decode the charset
11322 * if enc != XML_CHAR_ENCODING_NONE
11323 * plug some encoding conversion routines,
11324 * else xmlSwitchEncoding will set to (default)
11325 * UTF8.
11327 start[0] = RAW;
11328 start[1] = NXT(1);
11329 start[2] = NXT(2);
11330 start[3] = NXT(3);
11331 enc = xmlDetectCharEncoding(start, 4);
11332 xmlSwitchEncoding(ctxt, enc);
11333 break;
11336 if (avail < 2)
11337 goto done;
11338 cur = ctxt->input->cur[0];
11339 next = ctxt->input->cur[1];
11340 if (cur == 0) {
11341 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11342 ctxt->sax->setDocumentLocator(ctxt->userData,
11343 &xmlDefaultSAXLocator);
11344 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11345 xmlHaltParser(ctxt);
11346 #ifdef DEBUG_PUSH
11347 xmlGenericError(xmlGenericErrorContext,
11348 "PP: entering EOF\n");
11349 #endif
11350 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11351 ctxt->sax->endDocument(ctxt->userData);
11352 goto done;
11354 if ((cur == '<') && (next == '?')) {
11355 /* PI or XML decl */
11356 if (avail < 5) return(ret);
11357 if ((!terminate) &&
11358 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11359 return(ret);
11360 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11361 ctxt->sax->setDocumentLocator(ctxt->userData,
11362 &xmlDefaultSAXLocator);
11363 if ((ctxt->input->cur[2] == 'x') &&
11364 (ctxt->input->cur[3] == 'm') &&
11365 (ctxt->input->cur[4] == 'l') &&
11366 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11367 ret += 5;
11368 #ifdef DEBUG_PUSH
11369 xmlGenericError(xmlGenericErrorContext,
11370 "PP: Parsing XML Decl\n");
11371 #endif
11372 xmlParseXMLDecl(ctxt);
11373 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11375 * The XML REC instructs us to stop parsing right
11376 * here
11378 xmlHaltParser(ctxt);
11379 return(0);
11381 ctxt->standalone = ctxt->input->standalone;
11382 if ((ctxt->encoding == NULL) &&
11383 (ctxt->input->encoding != NULL))
11384 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11385 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11386 (!ctxt->disableSAX))
11387 ctxt->sax->startDocument(ctxt->userData);
11388 ctxt->instate = XML_PARSER_MISC;
11389 #ifdef DEBUG_PUSH
11390 xmlGenericError(xmlGenericErrorContext,
11391 "PP: entering MISC\n");
11392 #endif
11393 } else {
11394 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11395 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11396 (!ctxt->disableSAX))
11397 ctxt->sax->startDocument(ctxt->userData);
11398 ctxt->instate = XML_PARSER_MISC;
11399 #ifdef DEBUG_PUSH
11400 xmlGenericError(xmlGenericErrorContext,
11401 "PP: entering MISC\n");
11402 #endif
11404 } else {
11405 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11406 ctxt->sax->setDocumentLocator(ctxt->userData,
11407 &xmlDefaultSAXLocator);
11408 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11409 if (ctxt->version == NULL) {
11410 xmlErrMemory(ctxt, NULL);
11411 break;
11413 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11414 (!ctxt->disableSAX))
11415 ctxt->sax->startDocument(ctxt->userData);
11416 ctxt->instate = XML_PARSER_MISC;
11417 #ifdef DEBUG_PUSH
11418 xmlGenericError(xmlGenericErrorContext,
11419 "PP: entering MISC\n");
11420 #endif
11422 break;
11423 case XML_PARSER_START_TAG: {
11424 const xmlChar *name;
11425 const xmlChar *prefix = NULL;
11426 const xmlChar *URI = NULL;
11427 int line = ctxt->input->line;
11428 int nsNr = ctxt->nsNr;
11430 if ((avail < 2) && (ctxt->inputNr == 1))
11431 goto done;
11432 cur = ctxt->input->cur[0];
11433 if (cur != '<') {
11434 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11435 xmlHaltParser(ctxt);
11436 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11437 ctxt->sax->endDocument(ctxt->userData);
11438 goto done;
11440 if (!terminate) {
11441 if (ctxt->progressive) {
11442 /* > can be found unescaped in attribute values */
11443 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11444 goto done;
11445 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11446 goto done;
11449 if (ctxt->spaceNr == 0)
11450 spacePush(ctxt, -1);
11451 else if (*ctxt->space == -2)
11452 spacePush(ctxt, -1);
11453 else
11454 spacePush(ctxt, *ctxt->space);
11455 #ifdef LIBXML_SAX1_ENABLED
11456 if (ctxt->sax2)
11457 #endif /* LIBXML_SAX1_ENABLED */
11458 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11459 #ifdef LIBXML_SAX1_ENABLED
11460 else
11461 name = xmlParseStartTag(ctxt);
11462 #endif /* LIBXML_SAX1_ENABLED */
11463 if (ctxt->instate == XML_PARSER_EOF)
11464 goto done;
11465 if (name == NULL) {
11466 spacePop(ctxt);
11467 xmlHaltParser(ctxt);
11468 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11469 ctxt->sax->endDocument(ctxt->userData);
11470 goto done;
11472 #ifdef LIBXML_VALID_ENABLED
11474 * [ VC: Root Element Type ]
11475 * The Name in the document type declaration must match
11476 * the element type of the root element.
11478 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11479 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11480 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11481 #endif /* LIBXML_VALID_ENABLED */
11484 * Check for an Empty Element.
11486 if ((RAW == '/') && (NXT(1) == '>')) {
11487 SKIP(2);
11489 if (ctxt->sax2) {
11490 if ((ctxt->sax != NULL) &&
11491 (ctxt->sax->endElementNs != NULL) &&
11492 (!ctxt->disableSAX))
11493 ctxt->sax->endElementNs(ctxt->userData, name,
11494 prefix, URI);
11495 if (ctxt->nsNr - nsNr > 0)
11496 nsPop(ctxt, ctxt->nsNr - nsNr);
11497 #ifdef LIBXML_SAX1_ENABLED
11498 } else {
11499 if ((ctxt->sax != NULL) &&
11500 (ctxt->sax->endElement != NULL) &&
11501 (!ctxt->disableSAX))
11502 ctxt->sax->endElement(ctxt->userData, name);
11503 #endif /* LIBXML_SAX1_ENABLED */
11505 if (ctxt->instate == XML_PARSER_EOF)
11506 goto done;
11507 spacePop(ctxt);
11508 if (ctxt->nameNr == 0) {
11509 ctxt->instate = XML_PARSER_EPILOG;
11510 } else {
11511 ctxt->instate = XML_PARSER_CONTENT;
11513 ctxt->progressive = 1;
11514 break;
11516 if (RAW == '>') {
11517 NEXT;
11518 } else {
11519 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11520 "Couldn't find end of Start Tag %s\n",
11521 name);
11522 nodePop(ctxt);
11523 spacePop(ctxt);
11525 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11527 ctxt->instate = XML_PARSER_CONTENT;
11528 ctxt->progressive = 1;
11529 break;
11531 case XML_PARSER_CONTENT: {
11532 int id;
11533 unsigned long cons;
11534 if ((avail < 2) && (ctxt->inputNr == 1))
11535 goto done;
11536 cur = ctxt->input->cur[0];
11537 next = ctxt->input->cur[1];
11539 id = ctxt->input->id;
11540 cons = CUR_CONSUMED;
11541 if ((cur == '<') && (next == '/')) {
11542 ctxt->instate = XML_PARSER_END_TAG;
11543 break;
11544 } else if ((cur == '<') && (next == '?')) {
11545 if ((!terminate) &&
11546 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11547 ctxt->progressive = XML_PARSER_PI;
11548 goto done;
11550 xmlParsePI(ctxt);
11551 ctxt->instate = XML_PARSER_CONTENT;
11552 ctxt->progressive = 1;
11553 } else if ((cur == '<') && (next != '!')) {
11554 ctxt->instate = XML_PARSER_START_TAG;
11555 break;
11556 } else if ((cur == '<') && (next == '!') &&
11557 (ctxt->input->cur[2] == '-') &&
11558 (ctxt->input->cur[3] == '-')) {
11559 int term;
11561 if (avail < 4)
11562 goto done;
11563 ctxt->input->cur += 4;
11564 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11565 ctxt->input->cur -= 4;
11566 if ((!terminate) && (term < 0)) {
11567 ctxt->progressive = XML_PARSER_COMMENT;
11568 goto done;
11570 xmlParseComment(ctxt);
11571 ctxt->instate = XML_PARSER_CONTENT;
11572 ctxt->progressive = 1;
11573 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11574 (ctxt->input->cur[2] == '[') &&
11575 (ctxt->input->cur[3] == 'C') &&
11576 (ctxt->input->cur[4] == 'D') &&
11577 (ctxt->input->cur[5] == 'A') &&
11578 (ctxt->input->cur[6] == 'T') &&
11579 (ctxt->input->cur[7] == 'A') &&
11580 (ctxt->input->cur[8] == '[')) {
11581 SKIP(9);
11582 ctxt->instate = XML_PARSER_CDATA_SECTION;
11583 break;
11584 } else if ((cur == '<') && (next == '!') &&
11585 (avail < 9)) {
11586 goto done;
11587 } else if (cur == '&') {
11588 if ((!terminate) &&
11589 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11590 goto done;
11591 xmlParseReference(ctxt);
11592 } else {
11593 /* TODO Avoid the extra copy, handle directly !!! */
11595 * Goal of the following test is:
11596 * - minimize calls to the SAX 'character' callback
11597 * when they are mergeable
11598 * - handle an problem for isBlank when we only parse
11599 * a sequence of blank chars and the next one is
11600 * not available to check against '<' presence.
11601 * - tries to homogenize the differences in SAX
11602 * callbacks between the push and pull versions
11603 * of the parser.
11605 if ((ctxt->inputNr == 1) &&
11606 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11607 if (!terminate) {
11608 if (ctxt->progressive) {
11609 if ((lastlt == NULL) ||
11610 (ctxt->input->cur > lastlt))
11611 goto done;
11612 } else if (xmlParseLookupSequence(ctxt,
11613 '<', 0, 0) < 0) {
11614 goto done;
11618 ctxt->checkIndex = 0;
11619 xmlParseCharData(ctxt, 0);
11621 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11622 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11623 "detected an error in element content\n");
11624 xmlHaltParser(ctxt);
11625 break;
11627 break;
11629 case XML_PARSER_END_TAG:
11630 if (avail < 2)
11631 goto done;
11632 if (!terminate) {
11633 if (ctxt->progressive) {
11634 /* > can be found unescaped in attribute values */
11635 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11636 goto done;
11637 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11638 goto done;
11641 if (ctxt->sax2) {
11642 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11643 nameNsPop(ctxt);
11645 #ifdef LIBXML_SAX1_ENABLED
11646 else
11647 xmlParseEndTag1(ctxt, 0);
11648 #endif /* LIBXML_SAX1_ENABLED */
11649 if (ctxt->instate == XML_PARSER_EOF) {
11650 /* Nothing */
11651 } else if (ctxt->nameNr == 0) {
11652 ctxt->instate = XML_PARSER_EPILOG;
11653 } else {
11654 ctxt->instate = XML_PARSER_CONTENT;
11656 break;
11657 case XML_PARSER_CDATA_SECTION: {
11659 * The Push mode need to have the SAX callback for
11660 * cdataBlock merge back contiguous callbacks.
11662 int base;
11664 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11665 if (base < 0) {
11666 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11667 int tmp;
11669 tmp = xmlCheckCdataPush(ctxt->input->cur,
11670 XML_PARSER_BIG_BUFFER_SIZE, 0);
11671 if (tmp < 0) {
11672 tmp = -tmp;
11673 ctxt->input->cur += tmp;
11674 goto encoding_error;
11676 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11677 if (ctxt->sax->cdataBlock != NULL)
11678 ctxt->sax->cdataBlock(ctxt->userData,
11679 ctxt->input->cur, tmp);
11680 else if (ctxt->sax->characters != NULL)
11681 ctxt->sax->characters(ctxt->userData,
11682 ctxt->input->cur, tmp);
11684 if (ctxt->instate == XML_PARSER_EOF)
11685 goto done;
11686 SKIPL(tmp);
11687 ctxt->checkIndex = 0;
11689 goto done;
11690 } else {
11691 int tmp;
11693 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11694 if ((tmp < 0) || (tmp != base)) {
11695 tmp = -tmp;
11696 ctxt->input->cur += tmp;
11697 goto encoding_error;
11699 if ((ctxt->sax != NULL) && (base == 0) &&
11700 (ctxt->sax->cdataBlock != NULL) &&
11701 (!ctxt->disableSAX)) {
11703 * Special case to provide identical behaviour
11704 * between pull and push parsers on enpty CDATA
11705 * sections
11707 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11708 (!strncmp((const char *)&ctxt->input->cur[-9],
11709 "<![CDATA[", 9)))
11710 ctxt->sax->cdataBlock(ctxt->userData,
11711 BAD_CAST "", 0);
11712 } else if ((ctxt->sax != NULL) && (base > 0) &&
11713 (!ctxt->disableSAX)) {
11714 if (ctxt->sax->cdataBlock != NULL)
11715 ctxt->sax->cdataBlock(ctxt->userData,
11716 ctxt->input->cur, base);
11717 else if (ctxt->sax->characters != NULL)
11718 ctxt->sax->characters(ctxt->userData,
11719 ctxt->input->cur, base);
11721 if (ctxt->instate == XML_PARSER_EOF)
11722 goto done;
11723 SKIPL(base + 3);
11724 ctxt->checkIndex = 0;
11725 ctxt->instate = XML_PARSER_CONTENT;
11726 #ifdef DEBUG_PUSH
11727 xmlGenericError(xmlGenericErrorContext,
11728 "PP: entering CONTENT\n");
11729 #endif
11731 break;
11733 case XML_PARSER_MISC:
11734 SKIP_BLANKS;
11735 if (ctxt->input->buf == NULL)
11736 avail = ctxt->input->length -
11737 (ctxt->input->cur - ctxt->input->base);
11738 else
11739 avail = xmlBufUse(ctxt->input->buf->buffer) -
11740 (ctxt->input->cur - ctxt->input->base);
11741 if (avail < 2)
11742 goto done;
11743 cur = ctxt->input->cur[0];
11744 next = ctxt->input->cur[1];
11745 if ((cur == '<') && (next == '?')) {
11746 if ((!terminate) &&
11747 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11748 ctxt->progressive = XML_PARSER_PI;
11749 goto done;
11751 #ifdef DEBUG_PUSH
11752 xmlGenericError(xmlGenericErrorContext,
11753 "PP: Parsing PI\n");
11754 #endif
11755 xmlParsePI(ctxt);
11756 if (ctxt->instate == XML_PARSER_EOF)
11757 goto done;
11758 ctxt->instate = XML_PARSER_MISC;
11759 ctxt->progressive = 1;
11760 ctxt->checkIndex = 0;
11761 } else if ((cur == '<') && (next == '!') &&
11762 (ctxt->input->cur[2] == '-') &&
11763 (ctxt->input->cur[3] == '-')) {
11764 if ((!terminate) &&
11765 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11766 ctxt->progressive = XML_PARSER_COMMENT;
11767 goto done;
11769 #ifdef DEBUG_PUSH
11770 xmlGenericError(xmlGenericErrorContext,
11771 "PP: Parsing Comment\n");
11772 #endif
11773 xmlParseComment(ctxt);
11774 if (ctxt->instate == XML_PARSER_EOF)
11775 goto done;
11776 ctxt->instate = XML_PARSER_MISC;
11777 ctxt->progressive = 1;
11778 ctxt->checkIndex = 0;
11779 } else if ((cur == '<') && (next == '!') &&
11780 (ctxt->input->cur[2] == 'D') &&
11781 (ctxt->input->cur[3] == 'O') &&
11782 (ctxt->input->cur[4] == 'C') &&
11783 (ctxt->input->cur[5] == 'T') &&
11784 (ctxt->input->cur[6] == 'Y') &&
11785 (ctxt->input->cur[7] == 'P') &&
11786 (ctxt->input->cur[8] == 'E')) {
11787 if ((!terminate) &&
11788 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11789 ctxt->progressive = XML_PARSER_DTD;
11790 goto done;
11792 #ifdef DEBUG_PUSH
11793 xmlGenericError(xmlGenericErrorContext,
11794 "PP: Parsing internal subset\n");
11795 #endif
11796 ctxt->inSubset = 1;
11797 ctxt->progressive = 0;
11798 ctxt->checkIndex = 0;
11799 xmlParseDocTypeDecl(ctxt);
11800 if (ctxt->instate == XML_PARSER_EOF)
11801 goto done;
11802 if (RAW == '[') {
11803 ctxt->instate = XML_PARSER_DTD;
11804 #ifdef DEBUG_PUSH
11805 xmlGenericError(xmlGenericErrorContext,
11806 "PP: entering DTD\n");
11807 #endif
11808 } else {
11810 * Create and update the external subset.
11812 ctxt->inSubset = 2;
11813 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11814 (ctxt->sax->externalSubset != NULL))
11815 ctxt->sax->externalSubset(ctxt->userData,
11816 ctxt->intSubName, ctxt->extSubSystem,
11817 ctxt->extSubURI);
11818 ctxt->inSubset = 0;
11819 xmlCleanSpecialAttr(ctxt);
11820 ctxt->instate = XML_PARSER_PROLOG;
11821 #ifdef DEBUG_PUSH
11822 xmlGenericError(xmlGenericErrorContext,
11823 "PP: entering PROLOG\n");
11824 #endif
11826 } else if ((cur == '<') && (next == '!') &&
11827 (avail < 9)) {
11828 goto done;
11829 } else {
11830 ctxt->instate = XML_PARSER_START_TAG;
11831 ctxt->progressive = XML_PARSER_START_TAG;
11832 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11833 #ifdef DEBUG_PUSH
11834 xmlGenericError(xmlGenericErrorContext,
11835 "PP: entering START_TAG\n");
11836 #endif
11838 break;
11839 case XML_PARSER_PROLOG:
11840 SKIP_BLANKS;
11841 if (ctxt->input->buf == NULL)
11842 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11843 else
11844 avail = xmlBufUse(ctxt->input->buf->buffer) -
11845 (ctxt->input->cur - ctxt->input->base);
11846 if (avail < 2)
11847 goto done;
11848 cur = ctxt->input->cur[0];
11849 next = ctxt->input->cur[1];
11850 if ((cur == '<') && (next == '?')) {
11851 if ((!terminate) &&
11852 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11853 ctxt->progressive = XML_PARSER_PI;
11854 goto done;
11856 #ifdef DEBUG_PUSH
11857 xmlGenericError(xmlGenericErrorContext,
11858 "PP: Parsing PI\n");
11859 #endif
11860 xmlParsePI(ctxt);
11861 if (ctxt->instate == XML_PARSER_EOF)
11862 goto done;
11863 ctxt->instate = XML_PARSER_PROLOG;
11864 ctxt->progressive = 1;
11865 } else if ((cur == '<') && (next == '!') &&
11866 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11867 if ((!terminate) &&
11868 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11869 ctxt->progressive = XML_PARSER_COMMENT;
11870 goto done;
11872 #ifdef DEBUG_PUSH
11873 xmlGenericError(xmlGenericErrorContext,
11874 "PP: Parsing Comment\n");
11875 #endif
11876 xmlParseComment(ctxt);
11877 if (ctxt->instate == XML_PARSER_EOF)
11878 goto done;
11879 ctxt->instate = XML_PARSER_PROLOG;
11880 ctxt->progressive = 1;
11881 } else if ((cur == '<') && (next == '!') &&
11882 (avail < 4)) {
11883 goto done;
11884 } else {
11885 ctxt->instate = XML_PARSER_START_TAG;
11886 if (ctxt->progressive == 0)
11887 ctxt->progressive = XML_PARSER_START_TAG;
11888 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11889 #ifdef DEBUG_PUSH
11890 xmlGenericError(xmlGenericErrorContext,
11891 "PP: entering START_TAG\n");
11892 #endif
11894 break;
11895 case XML_PARSER_EPILOG:
11896 SKIP_BLANKS;
11897 if (ctxt->input->buf == NULL)
11898 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11899 else
11900 avail = xmlBufUse(ctxt->input->buf->buffer) -
11901 (ctxt->input->cur - ctxt->input->base);
11902 if (avail < 2)
11903 goto done;
11904 cur = ctxt->input->cur[0];
11905 next = ctxt->input->cur[1];
11906 if ((cur == '<') && (next == '?')) {
11907 if ((!terminate) &&
11908 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11909 ctxt->progressive = XML_PARSER_PI;
11910 goto done;
11912 #ifdef DEBUG_PUSH
11913 xmlGenericError(xmlGenericErrorContext,
11914 "PP: Parsing PI\n");
11915 #endif
11916 xmlParsePI(ctxt);
11917 if (ctxt->instate == XML_PARSER_EOF)
11918 goto done;
11919 ctxt->instate = XML_PARSER_EPILOG;
11920 ctxt->progressive = 1;
11921 } else if ((cur == '<') && (next == '!') &&
11922 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11923 if ((!terminate) &&
11924 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11925 ctxt->progressive = XML_PARSER_COMMENT;
11926 goto done;
11928 #ifdef DEBUG_PUSH
11929 xmlGenericError(xmlGenericErrorContext,
11930 "PP: Parsing Comment\n");
11931 #endif
11932 xmlParseComment(ctxt);
11933 if (ctxt->instate == XML_PARSER_EOF)
11934 goto done;
11935 ctxt->instate = XML_PARSER_EPILOG;
11936 ctxt->progressive = 1;
11937 } else if ((cur == '<') && (next == '!') &&
11938 (avail < 4)) {
11939 goto done;
11940 } else {
11941 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11942 xmlHaltParser(ctxt);
11943 #ifdef DEBUG_PUSH
11944 xmlGenericError(xmlGenericErrorContext,
11945 "PP: entering EOF\n");
11946 #endif
11947 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11948 ctxt->sax->endDocument(ctxt->userData);
11949 goto done;
11951 break;
11952 case XML_PARSER_DTD: {
11954 * Sorry but progressive parsing of the internal subset
11955 * is not expected to be supported. We first check that
11956 * the full content of the internal subset is available and
11957 * the parsing is launched only at that point.
11958 * Internal subset ends up with "']' S? '>'" in an unescaped
11959 * section and not in a ']]>' sequence which are conditional
11960 * sections (whoever argued to keep that crap in XML deserve
11961 * a place in hell !).
11963 int base, i;
11964 xmlChar *buf;
11965 xmlChar quote = 0;
11966 size_t use;
11968 base = ctxt->input->cur - ctxt->input->base;
11969 if (base < 0) return(0);
11970 if (ctxt->checkIndex > base)
11971 base = ctxt->checkIndex;
11972 buf = xmlBufContent(ctxt->input->buf->buffer);
11973 use = xmlBufUse(ctxt->input->buf->buffer);
11974 for (;(unsigned int) base < use; base++) {
11975 if (quote != 0) {
11976 if (buf[base] == quote)
11977 quote = 0;
11978 continue;
11980 if ((quote == 0) && (buf[base] == '<')) {
11981 int found = 0;
11982 /* special handling of comments */
11983 if (((unsigned int) base + 4 < use) &&
11984 (buf[base + 1] == '!') &&
11985 (buf[base + 2] == '-') &&
11986 (buf[base + 3] == '-')) {
11987 for (;(unsigned int) base + 3 < use; base++) {
11988 if ((buf[base] == '-') &&
11989 (buf[base + 1] == '-') &&
11990 (buf[base + 2] == '>')) {
11991 found = 1;
11992 base += 2;
11993 break;
11996 if (!found) {
11997 #if 0
11998 fprintf(stderr, "unfinished comment\n");
11999 #endif
12000 break; /* for */
12002 continue;
12005 if (buf[base] == '"') {
12006 quote = '"';
12007 continue;
12009 if (buf[base] == '\'') {
12010 quote = '\'';
12011 continue;
12013 if (buf[base] == ']') {
12014 #if 0
12015 fprintf(stderr, "%c%c%c%c: ", buf[base],
12016 buf[base + 1], buf[base + 2], buf[base + 3]);
12017 #endif
12018 if ((unsigned int) base +1 >= use)
12019 break;
12020 if (buf[base + 1] == ']') {
12021 /* conditional crap, skip both ']' ! */
12022 base++;
12023 continue;
12025 for (i = 1; (unsigned int) base + i < use; i++) {
12026 if (buf[base + i] == '>') {
12027 #if 0
12028 fprintf(stderr, "found\n");
12029 #endif
12030 goto found_end_int_subset;
12032 if (!IS_BLANK_CH(buf[base + i])) {
12033 #if 0
12034 fprintf(stderr, "not found\n");
12035 #endif
12036 goto not_end_of_int_subset;
12039 #if 0
12040 fprintf(stderr, "end of stream\n");
12041 #endif
12042 break;
12045 not_end_of_int_subset:
12046 continue; /* for */
12049 * We didn't found the end of the Internal subset
12051 if (quote == 0)
12052 ctxt->checkIndex = base;
12053 else
12054 ctxt->checkIndex = 0;
12055 #ifdef DEBUG_PUSH
12056 if (next == 0)
12057 xmlGenericError(xmlGenericErrorContext,
12058 "PP: lookup of int subset end filed\n");
12059 #endif
12060 goto done;
12062 found_end_int_subset:
12063 ctxt->checkIndex = 0;
12064 xmlParseInternalSubset(ctxt);
12065 if (ctxt->instate == XML_PARSER_EOF)
12066 goto done;
12067 ctxt->inSubset = 2;
12068 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12069 (ctxt->sax->externalSubset != NULL))
12070 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12071 ctxt->extSubSystem, ctxt->extSubURI);
12072 ctxt->inSubset = 0;
12073 xmlCleanSpecialAttr(ctxt);
12074 if (ctxt->instate == XML_PARSER_EOF)
12075 goto done;
12076 ctxt->instate = XML_PARSER_PROLOG;
12077 ctxt->checkIndex = 0;
12078 #ifdef DEBUG_PUSH
12079 xmlGenericError(xmlGenericErrorContext,
12080 "PP: entering PROLOG\n");
12081 #endif
12082 break;
12084 case XML_PARSER_COMMENT:
12085 xmlGenericError(xmlGenericErrorContext,
12086 "PP: internal error, state == COMMENT\n");
12087 ctxt->instate = XML_PARSER_CONTENT;
12088 #ifdef DEBUG_PUSH
12089 xmlGenericError(xmlGenericErrorContext,
12090 "PP: entering CONTENT\n");
12091 #endif
12092 break;
12093 case XML_PARSER_IGNORE:
12094 xmlGenericError(xmlGenericErrorContext,
12095 "PP: internal error, state == IGNORE");
12096 ctxt->instate = XML_PARSER_DTD;
12097 #ifdef DEBUG_PUSH
12098 xmlGenericError(xmlGenericErrorContext,
12099 "PP: entering DTD\n");
12100 #endif
12101 break;
12102 case XML_PARSER_PI:
12103 xmlGenericError(xmlGenericErrorContext,
12104 "PP: internal error, state == PI\n");
12105 ctxt->instate = XML_PARSER_CONTENT;
12106 #ifdef DEBUG_PUSH
12107 xmlGenericError(xmlGenericErrorContext,
12108 "PP: entering CONTENT\n");
12109 #endif
12110 break;
12111 case XML_PARSER_ENTITY_DECL:
12112 xmlGenericError(xmlGenericErrorContext,
12113 "PP: internal error, state == ENTITY_DECL\n");
12114 ctxt->instate = XML_PARSER_DTD;
12115 #ifdef DEBUG_PUSH
12116 xmlGenericError(xmlGenericErrorContext,
12117 "PP: entering DTD\n");
12118 #endif
12119 break;
12120 case XML_PARSER_ENTITY_VALUE:
12121 xmlGenericError(xmlGenericErrorContext,
12122 "PP: internal error, state == ENTITY_VALUE\n");
12123 ctxt->instate = XML_PARSER_CONTENT;
12124 #ifdef DEBUG_PUSH
12125 xmlGenericError(xmlGenericErrorContext,
12126 "PP: entering DTD\n");
12127 #endif
12128 break;
12129 case XML_PARSER_ATTRIBUTE_VALUE:
12130 xmlGenericError(xmlGenericErrorContext,
12131 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12132 ctxt->instate = XML_PARSER_START_TAG;
12133 #ifdef DEBUG_PUSH
12134 xmlGenericError(xmlGenericErrorContext,
12135 "PP: entering START_TAG\n");
12136 #endif
12137 break;
12138 case XML_PARSER_SYSTEM_LITERAL:
12139 xmlGenericError(xmlGenericErrorContext,
12140 "PP: internal error, state == SYSTEM_LITERAL\n");
12141 ctxt->instate = XML_PARSER_START_TAG;
12142 #ifdef DEBUG_PUSH
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: entering START_TAG\n");
12145 #endif
12146 break;
12147 case XML_PARSER_PUBLIC_LITERAL:
12148 xmlGenericError(xmlGenericErrorContext,
12149 "PP: internal error, state == PUBLIC_LITERAL\n");
12150 ctxt->instate = XML_PARSER_START_TAG;
12151 #ifdef DEBUG_PUSH
12152 xmlGenericError(xmlGenericErrorContext,
12153 "PP: entering START_TAG\n");
12154 #endif
12155 break;
12158 done:
12159 #ifdef DEBUG_PUSH
12160 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12161 #endif
12162 return(ret);
12163 encoding_error:
12165 char buffer[150];
12167 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12168 ctxt->input->cur[0], ctxt->input->cur[1],
12169 ctxt->input->cur[2], ctxt->input->cur[3]);
12170 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12171 "Input is not proper UTF-8, indicate encoding !\n%s",
12172 BAD_CAST buffer, NULL);
12174 return(0);
12178 * xmlParseCheckTransition:
12179 * @ctxt: an XML parser context
12180 * @chunk: a char array
12181 * @size: the size in byte of the chunk
12183 * Check depending on the current parser state if the chunk given must be
12184 * processed immediately or one need more data to advance on parsing.
12186 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12188 static int
12189 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12190 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12191 return(-1);
12192 if (ctxt->instate == XML_PARSER_START_TAG) {
12193 if (memchr(chunk, '>', size) != NULL)
12194 return(1);
12195 return(0);
12197 if (ctxt->progressive == XML_PARSER_COMMENT) {
12198 if (memchr(chunk, '>', size) != NULL)
12199 return(1);
12200 return(0);
12202 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12203 if (memchr(chunk, '>', size) != NULL)
12204 return(1);
12205 return(0);
12207 if (ctxt->progressive == XML_PARSER_PI) {
12208 if (memchr(chunk, '>', size) != NULL)
12209 return(1);
12210 return(0);
12212 if (ctxt->instate == XML_PARSER_END_TAG) {
12213 if (memchr(chunk, '>', size) != NULL)
12214 return(1);
12215 return(0);
12217 if ((ctxt->progressive == XML_PARSER_DTD) ||
12218 (ctxt->instate == XML_PARSER_DTD)) {
12219 if (memchr(chunk, '>', size) != NULL)
12220 return(1);
12221 return(0);
12223 return(1);
12227 * xmlParseChunk:
12228 * @ctxt: an XML parser context
12229 * @chunk: an char array
12230 * @size: the size in byte of the chunk
12231 * @terminate: last chunk indicator
12233 * Parse a Chunk of memory
12235 * Returns zero if no error, the xmlParserErrors otherwise.
12238 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12239 int terminate) {
12240 int end_in_lf = 0;
12241 int remain = 0;
12242 size_t old_avail = 0;
12243 size_t avail = 0;
12245 if (ctxt == NULL)
12246 return(XML_ERR_INTERNAL_ERROR);
12247 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12248 return(ctxt->errNo);
12249 if (ctxt->instate == XML_PARSER_EOF)
12250 return(-1);
12251 if (ctxt->instate == XML_PARSER_START)
12252 xmlDetectSAX2(ctxt);
12253 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12254 (chunk[size - 1] == '\r')) {
12255 end_in_lf = 1;
12256 size--;
12259 xmldecl_done:
12261 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12262 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12263 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12264 size_t cur = ctxt->input->cur - ctxt->input->base;
12265 int res;
12267 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12269 * Specific handling if we autodetected an encoding, we should not
12270 * push more than the first line ... which depend on the encoding
12271 * And only push the rest once the final encoding was detected
12273 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12274 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12275 unsigned int len = 45;
12277 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12278 BAD_CAST "UTF-16")) ||
12279 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12280 BAD_CAST "UTF16")))
12281 len = 90;
12282 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12283 BAD_CAST "UCS-4")) ||
12284 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12285 BAD_CAST "UCS4")))
12286 len = 180;
12288 if (ctxt->input->buf->rawconsumed < len)
12289 len -= ctxt->input->buf->rawconsumed;
12292 * Change size for reading the initial declaration only
12293 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12294 * will blindly copy extra bytes from memory.
12296 if ((unsigned int) size > len) {
12297 remain = size - len;
12298 size = len;
12299 } else {
12300 remain = 0;
12303 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12304 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12305 if (res < 0) {
12306 ctxt->errNo = XML_PARSER_EOF;
12307 xmlHaltParser(ctxt);
12308 return (XML_PARSER_EOF);
12310 #ifdef DEBUG_PUSH
12311 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12312 #endif
12314 } else if (ctxt->instate != XML_PARSER_EOF) {
12315 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12316 xmlParserInputBufferPtr in = ctxt->input->buf;
12317 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12318 (in->raw != NULL)) {
12319 int nbchars;
12320 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12321 size_t current = ctxt->input->cur - ctxt->input->base;
12323 nbchars = xmlCharEncInput(in, terminate);
12324 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12325 if (nbchars < 0) {
12326 /* TODO 2.6.0 */
12327 xmlGenericError(xmlGenericErrorContext,
12328 "xmlParseChunk: encoder error\n");
12329 xmlHaltParser(ctxt);
12330 return(XML_ERR_INVALID_ENCODING);
12335 if (remain != 0) {
12336 xmlParseTryOrFinish(ctxt, 0);
12337 } else {
12338 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12339 avail = xmlBufUse(ctxt->input->buf->buffer);
12341 * Depending on the current state it may not be such
12342 * a good idea to try parsing if there is nothing in the chunk
12343 * which would be worth doing a parser state transition and we
12344 * need to wait for more data
12346 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12347 (old_avail == 0) || (avail == 0) ||
12348 (xmlParseCheckTransition(ctxt,
12349 (const char *)&ctxt->input->base[old_avail],
12350 avail - old_avail)))
12351 xmlParseTryOrFinish(ctxt, terminate);
12353 if (ctxt->instate == XML_PARSER_EOF)
12354 return(ctxt->errNo);
12356 if ((ctxt->input != NULL) &&
12357 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12358 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12359 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12360 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12361 xmlHaltParser(ctxt);
12363 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12364 return(ctxt->errNo);
12366 if (remain != 0) {
12367 chunk += size;
12368 size = remain;
12369 remain = 0;
12370 goto xmldecl_done;
12372 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12373 (ctxt->input->buf != NULL)) {
12374 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12375 ctxt->input);
12376 size_t current = ctxt->input->cur - ctxt->input->base;
12378 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12380 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12381 base, current);
12383 if (terminate) {
12385 * Check for termination
12387 int cur_avail = 0;
12389 if (ctxt->input != NULL) {
12390 if (ctxt->input->buf == NULL)
12391 cur_avail = ctxt->input->length -
12392 (ctxt->input->cur - ctxt->input->base);
12393 else
12394 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12395 (ctxt->input->cur - ctxt->input->base);
12398 if ((ctxt->instate != XML_PARSER_EOF) &&
12399 (ctxt->instate != XML_PARSER_EPILOG)) {
12400 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12402 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12403 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12405 if (ctxt->instate != XML_PARSER_EOF) {
12406 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12407 ctxt->sax->endDocument(ctxt->userData);
12409 ctxt->instate = XML_PARSER_EOF;
12411 if (ctxt->wellFormed == 0)
12412 return((xmlParserErrors) ctxt->errNo);
12413 else
12414 return(0);
12417 /************************************************************************
12419 * I/O front end functions to the parser *
12421 ************************************************************************/
12424 * xmlCreatePushParserCtxt:
12425 * @sax: a SAX handler
12426 * @user_data: The user data returned on SAX callbacks
12427 * @chunk: a pointer to an array of chars
12428 * @size: number of chars in the array
12429 * @filename: an optional file name or URI
12431 * Create a parser context for using the XML parser in push mode.
12432 * If @buffer and @size are non-NULL, the data is used to detect
12433 * the encoding. The remaining characters will be parsed so they
12434 * don't need to be fed in again through xmlParseChunk.
12435 * To allow content encoding detection, @size should be >= 4
12436 * The value of @filename is used for fetching external entities
12437 * and error/warning reports.
12439 * Returns the new parser context or NULL
12442 xmlParserCtxtPtr
12443 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12444 const char *chunk, int size, const char *filename) {
12445 xmlParserCtxtPtr ctxt;
12446 xmlParserInputPtr inputStream;
12447 xmlParserInputBufferPtr buf;
12448 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12451 * plug some encoding conversion routines
12453 if ((chunk != NULL) && (size >= 4))
12454 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12456 buf = xmlAllocParserInputBuffer(enc);
12457 if (buf == NULL) return(NULL);
12459 ctxt = xmlNewParserCtxt();
12460 if (ctxt == NULL) {
12461 xmlErrMemory(NULL, "creating parser: out of memory\n");
12462 xmlFreeParserInputBuffer(buf);
12463 return(NULL);
12465 ctxt->dictNames = 1;
12466 if (sax != NULL) {
12467 #ifdef LIBXML_SAX1_ENABLED
12468 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12469 #endif /* LIBXML_SAX1_ENABLED */
12470 xmlFree(ctxt->sax);
12471 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12472 if (ctxt->sax == NULL) {
12473 xmlErrMemory(ctxt, NULL);
12474 xmlFreeParserInputBuffer(buf);
12475 xmlFreeParserCtxt(ctxt);
12476 return(NULL);
12478 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12479 if (sax->initialized == XML_SAX2_MAGIC)
12480 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12481 else
12482 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12483 if (user_data != NULL)
12484 ctxt->userData = user_data;
12486 if (filename == NULL) {
12487 ctxt->directory = NULL;
12488 } else {
12489 ctxt->directory = xmlParserGetDirectory(filename);
12492 inputStream = xmlNewInputStream(ctxt);
12493 if (inputStream == NULL) {
12494 xmlFreeParserCtxt(ctxt);
12495 xmlFreeParserInputBuffer(buf);
12496 return(NULL);
12499 if (filename == NULL)
12500 inputStream->filename = NULL;
12501 else {
12502 inputStream->filename = (char *)
12503 xmlCanonicPath((const xmlChar *) filename);
12504 if (inputStream->filename == NULL) {
12505 xmlFreeParserCtxt(ctxt);
12506 xmlFreeParserInputBuffer(buf);
12507 return(NULL);
12510 inputStream->buf = buf;
12511 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12512 inputPush(ctxt, inputStream);
12515 * If the caller didn't provide an initial 'chunk' for determining
12516 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12517 * that it can be automatically determined later
12519 if ((size == 0) || (chunk == NULL)) {
12520 ctxt->charset = XML_CHAR_ENCODING_NONE;
12521 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12522 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12523 size_t cur = ctxt->input->cur - ctxt->input->base;
12525 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12527 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12528 #ifdef DEBUG_PUSH
12529 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12530 #endif
12533 if (enc != XML_CHAR_ENCODING_NONE) {
12534 xmlSwitchEncoding(ctxt, enc);
12537 return(ctxt);
12539 #endif /* LIBXML_PUSH_ENABLED */
12542 * xmlHaltParser:
12543 * @ctxt: an XML parser context
12545 * Blocks further parser processing don't override error
12546 * for internal use
12548 static void
12549 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12550 if (ctxt == NULL)
12551 return;
12552 ctxt->instate = XML_PARSER_EOF;
12553 ctxt->disableSAX = 1;
12554 while (ctxt->inputNr > 1)
12555 xmlFreeInputStream(inputPop(ctxt));
12556 if (ctxt->input != NULL) {
12558 * in case there was a specific allocation deallocate before
12559 * overriding base
12561 if (ctxt->input->free != NULL) {
12562 ctxt->input->free((xmlChar *) ctxt->input->base);
12563 ctxt->input->free = NULL;
12565 if (ctxt->input->buf != NULL) {
12566 xmlFreeParserInputBuffer(ctxt->input->buf);
12567 ctxt->input->buf = NULL;
12569 ctxt->input->cur = BAD_CAST"";
12570 ctxt->input->length = 0;
12571 ctxt->input->base = ctxt->input->cur;
12572 ctxt->input->end = ctxt->input->cur;
12577 * xmlStopParser:
12578 * @ctxt: an XML parser context
12580 * Blocks further parser processing
12582 void
12583 xmlStopParser(xmlParserCtxtPtr ctxt) {
12584 if (ctxt == NULL)
12585 return;
12586 xmlHaltParser(ctxt);
12587 ctxt->errNo = XML_ERR_USER_STOP;
12591 * xmlCreateIOParserCtxt:
12592 * @sax: a SAX handler
12593 * @user_data: The user data returned on SAX callbacks
12594 * @ioread: an I/O read function
12595 * @ioclose: an I/O close function
12596 * @ioctx: an I/O handler
12597 * @enc: the charset encoding if known
12599 * Create a parser context for using the XML parser with an existing
12600 * I/O stream
12602 * Returns the new parser context or NULL
12604 xmlParserCtxtPtr
12605 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12606 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12607 void *ioctx, xmlCharEncoding enc) {
12608 xmlParserCtxtPtr ctxt;
12609 xmlParserInputPtr inputStream;
12610 xmlParserInputBufferPtr buf;
12612 if (ioread == NULL) return(NULL);
12614 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12615 if (buf == NULL) {
12616 if (ioclose != NULL)
12617 ioclose(ioctx);
12618 return (NULL);
12621 ctxt = xmlNewParserCtxt();
12622 if (ctxt == NULL) {
12623 xmlFreeParserInputBuffer(buf);
12624 return(NULL);
12626 if (sax != NULL) {
12627 #ifdef LIBXML_SAX1_ENABLED
12628 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12629 #endif /* LIBXML_SAX1_ENABLED */
12630 xmlFree(ctxt->sax);
12631 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12632 if (ctxt->sax == NULL) {
12633 xmlFreeParserInputBuffer(buf);
12634 xmlErrMemory(ctxt, NULL);
12635 xmlFreeParserCtxt(ctxt);
12636 return(NULL);
12638 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12639 if (sax->initialized == XML_SAX2_MAGIC)
12640 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12641 else
12642 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12643 if (user_data != NULL)
12644 ctxt->userData = user_data;
12647 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12648 if (inputStream == NULL) {
12649 xmlFreeParserCtxt(ctxt);
12650 return(NULL);
12652 inputPush(ctxt, inputStream);
12654 return(ctxt);
12657 #ifdef LIBXML_VALID_ENABLED
12658 /************************************************************************
12660 * Front ends when parsing a DTD *
12662 ************************************************************************/
12665 * xmlIOParseDTD:
12666 * @sax: the SAX handler block or NULL
12667 * @input: an Input Buffer
12668 * @enc: the charset encoding if known
12670 * Load and parse a DTD
12672 * Returns the resulting xmlDtdPtr or NULL in case of error.
12673 * @input will be freed by the function in any case.
12676 xmlDtdPtr
12677 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12678 xmlCharEncoding enc) {
12679 xmlDtdPtr ret = NULL;
12680 xmlParserCtxtPtr ctxt;
12681 xmlParserInputPtr pinput = NULL;
12682 xmlChar start[4];
12684 if (input == NULL)
12685 return(NULL);
12687 ctxt = xmlNewParserCtxt();
12688 if (ctxt == NULL) {
12689 xmlFreeParserInputBuffer(input);
12690 return(NULL);
12693 /* We are loading a DTD */
12694 ctxt->options |= XML_PARSE_DTDLOAD;
12697 * Set-up the SAX context
12699 if (sax != NULL) {
12700 if (ctxt->sax != NULL)
12701 xmlFree(ctxt->sax);
12702 ctxt->sax = sax;
12703 ctxt->userData = ctxt;
12705 xmlDetectSAX2(ctxt);
12708 * generate a parser input from the I/O handler
12711 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12712 if (pinput == NULL) {
12713 if (sax != NULL) ctxt->sax = NULL;
12714 xmlFreeParserInputBuffer(input);
12715 xmlFreeParserCtxt(ctxt);
12716 return(NULL);
12720 * plug some encoding conversion routines here.
12722 if (xmlPushInput(ctxt, pinput) < 0) {
12723 if (sax != NULL) ctxt->sax = NULL;
12724 xmlFreeParserCtxt(ctxt);
12725 return(NULL);
12727 if (enc != XML_CHAR_ENCODING_NONE) {
12728 xmlSwitchEncoding(ctxt, enc);
12731 pinput->filename = NULL;
12732 pinput->line = 1;
12733 pinput->col = 1;
12734 pinput->base = ctxt->input->cur;
12735 pinput->cur = ctxt->input->cur;
12736 pinput->free = NULL;
12739 * let's parse that entity knowing it's an external subset.
12741 ctxt->inSubset = 2;
12742 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12743 if (ctxt->myDoc == NULL) {
12744 xmlErrMemory(ctxt, "New Doc failed");
12745 return(NULL);
12747 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12748 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12749 BAD_CAST "none", BAD_CAST "none");
12751 if ((enc == XML_CHAR_ENCODING_NONE) &&
12752 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12754 * Get the 4 first bytes and decode the charset
12755 * if enc != XML_CHAR_ENCODING_NONE
12756 * plug some encoding conversion routines.
12758 start[0] = RAW;
12759 start[1] = NXT(1);
12760 start[2] = NXT(2);
12761 start[3] = NXT(3);
12762 enc = xmlDetectCharEncoding(start, 4);
12763 if (enc != XML_CHAR_ENCODING_NONE) {
12764 xmlSwitchEncoding(ctxt, enc);
12768 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12770 if (ctxt->myDoc != NULL) {
12771 if (ctxt->wellFormed) {
12772 ret = ctxt->myDoc->extSubset;
12773 ctxt->myDoc->extSubset = NULL;
12774 if (ret != NULL) {
12775 xmlNodePtr tmp;
12777 ret->doc = NULL;
12778 tmp = ret->children;
12779 while (tmp != NULL) {
12780 tmp->doc = NULL;
12781 tmp = tmp->next;
12784 } else {
12785 ret = NULL;
12787 xmlFreeDoc(ctxt->myDoc);
12788 ctxt->myDoc = NULL;
12790 if (sax != NULL) ctxt->sax = NULL;
12791 xmlFreeParserCtxt(ctxt);
12793 return(ret);
12797 * xmlSAXParseDTD:
12798 * @sax: the SAX handler block
12799 * @ExternalID: a NAME* containing the External ID of the DTD
12800 * @SystemID: a NAME* containing the URL to the DTD
12802 * Load and parse an external subset.
12804 * Returns the resulting xmlDtdPtr or NULL in case of error.
12807 xmlDtdPtr
12808 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12809 const xmlChar *SystemID) {
12810 xmlDtdPtr ret = NULL;
12811 xmlParserCtxtPtr ctxt;
12812 xmlParserInputPtr input = NULL;
12813 xmlCharEncoding enc;
12814 xmlChar* systemIdCanonic;
12816 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12818 ctxt = xmlNewParserCtxt();
12819 if (ctxt == NULL) {
12820 return(NULL);
12823 /* We are loading a DTD */
12824 ctxt->options |= XML_PARSE_DTDLOAD;
12827 * Set-up the SAX context
12829 if (sax != NULL) {
12830 if (ctxt->sax != NULL)
12831 xmlFree(ctxt->sax);
12832 ctxt->sax = sax;
12833 ctxt->userData = ctxt;
12837 * Canonicalise the system ID
12839 systemIdCanonic = xmlCanonicPath(SystemID);
12840 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12841 xmlFreeParserCtxt(ctxt);
12842 return(NULL);
12846 * Ask the Entity resolver to load the damn thing
12849 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12850 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12851 systemIdCanonic);
12852 if (input == NULL) {
12853 if (sax != NULL) ctxt->sax = NULL;
12854 xmlFreeParserCtxt(ctxt);
12855 if (systemIdCanonic != NULL)
12856 xmlFree(systemIdCanonic);
12857 return(NULL);
12861 * plug some encoding conversion routines here.
12863 if (xmlPushInput(ctxt, input) < 0) {
12864 if (sax != NULL) ctxt->sax = NULL;
12865 xmlFreeParserCtxt(ctxt);
12866 if (systemIdCanonic != NULL)
12867 xmlFree(systemIdCanonic);
12868 return(NULL);
12870 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12871 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12872 xmlSwitchEncoding(ctxt, enc);
12875 if (input->filename == NULL)
12876 input->filename = (char *) systemIdCanonic;
12877 else
12878 xmlFree(systemIdCanonic);
12879 input->line = 1;
12880 input->col = 1;
12881 input->base = ctxt->input->cur;
12882 input->cur = ctxt->input->cur;
12883 input->free = NULL;
12886 * let's parse that entity knowing it's an external subset.
12888 ctxt->inSubset = 2;
12889 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12890 if (ctxt->myDoc == NULL) {
12891 xmlErrMemory(ctxt, "New Doc failed");
12892 if (sax != NULL) ctxt->sax = NULL;
12893 xmlFreeParserCtxt(ctxt);
12894 return(NULL);
12896 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12897 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12898 ExternalID, SystemID);
12899 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12901 if (ctxt->myDoc != NULL) {
12902 if (ctxt->wellFormed) {
12903 ret = ctxt->myDoc->extSubset;
12904 ctxt->myDoc->extSubset = NULL;
12905 if (ret != NULL) {
12906 xmlNodePtr tmp;
12908 ret->doc = NULL;
12909 tmp = ret->children;
12910 while (tmp != NULL) {
12911 tmp->doc = NULL;
12912 tmp = tmp->next;
12915 } else {
12916 ret = NULL;
12918 xmlFreeDoc(ctxt->myDoc);
12919 ctxt->myDoc = NULL;
12921 if (sax != NULL) ctxt->sax = NULL;
12922 xmlFreeParserCtxt(ctxt);
12924 return(ret);
12929 * xmlParseDTD:
12930 * @ExternalID: a NAME* containing the External ID of the DTD
12931 * @SystemID: a NAME* containing the URL to the DTD
12933 * Load and parse an external subset.
12935 * Returns the resulting xmlDtdPtr or NULL in case of error.
12938 xmlDtdPtr
12939 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12940 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12942 #endif /* LIBXML_VALID_ENABLED */
12944 /************************************************************************
12946 * Front ends when parsing an Entity *
12948 ************************************************************************/
12951 * xmlParseCtxtExternalEntity:
12952 * @ctx: the existing parsing context
12953 * @URL: the URL for the entity to load
12954 * @ID: the System ID for the entity to load
12955 * @lst: the return value for the set of parsed nodes
12957 * Parse an external general entity within an existing parsing context
12958 * An external general parsed entity is well-formed if it matches the
12959 * production labeled extParsedEnt.
12961 * [78] extParsedEnt ::= TextDecl? content
12963 * Returns 0 if the entity is well formed, -1 in case of args problem and
12964 * the parser error code otherwise
12968 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12969 const xmlChar *ID, xmlNodePtr *lst) {
12970 void *userData;
12972 if (ctx == NULL) return(-1);
12974 * If the user provided their own SAX callbacks, then reuse the
12975 * userData callback field, otherwise the expected setup in a
12976 * DOM builder is to have userData == ctxt
12978 if (ctx->userData == ctx)
12979 userData = NULL;
12980 else
12981 userData = ctx->userData;
12982 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12983 userData, ctx->depth + 1,
12984 URL, ID, lst);
12988 * xmlParseExternalEntityPrivate:
12989 * @doc: the document the chunk pertains to
12990 * @oldctxt: the previous parser context if available
12991 * @sax: the SAX handler block (possibly NULL)
12992 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12993 * @depth: Used for loop detection, use 0
12994 * @URL: the URL for the entity to load
12995 * @ID: the System ID for the entity to load
12996 * @list: the return value for the set of parsed nodes
12998 * Private version of xmlParseExternalEntity()
13000 * Returns 0 if the entity is well formed, -1 in case of args problem and
13001 * the parser error code otherwise
13004 static xmlParserErrors
13005 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13006 xmlSAXHandlerPtr sax,
13007 void *user_data, int depth, const xmlChar *URL,
13008 const xmlChar *ID, xmlNodePtr *list) {
13009 xmlParserCtxtPtr ctxt;
13010 xmlDocPtr newDoc;
13011 xmlNodePtr newRoot;
13012 xmlSAXHandlerPtr oldsax = NULL;
13013 xmlParserErrors ret = XML_ERR_OK;
13014 xmlChar start[4];
13015 xmlCharEncoding enc;
13017 if (((depth > 40) &&
13018 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13019 (depth > 1024)) {
13020 return(XML_ERR_ENTITY_LOOP);
13023 if (list != NULL)
13024 *list = NULL;
13025 if ((URL == NULL) && (ID == NULL))
13026 return(XML_ERR_INTERNAL_ERROR);
13027 if (doc == NULL)
13028 return(XML_ERR_INTERNAL_ERROR);
13031 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13032 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13033 ctxt->userData = ctxt;
13034 if (sax != NULL) {
13035 oldsax = ctxt->sax;
13036 ctxt->sax = sax;
13037 if (user_data != NULL)
13038 ctxt->userData = user_data;
13040 xmlDetectSAX2(ctxt);
13041 newDoc = xmlNewDoc(BAD_CAST "1.0");
13042 if (newDoc == NULL) {
13043 xmlFreeParserCtxt(ctxt);
13044 return(XML_ERR_INTERNAL_ERROR);
13046 newDoc->properties = XML_DOC_INTERNAL;
13047 if (doc) {
13048 newDoc->intSubset = doc->intSubset;
13049 newDoc->extSubset = doc->extSubset;
13050 if (doc->dict) {
13051 newDoc->dict = doc->dict;
13052 xmlDictReference(newDoc->dict);
13054 if (doc->URL != NULL) {
13055 newDoc->URL = xmlStrdup(doc->URL);
13058 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13059 if (newRoot == NULL) {
13060 if (sax != NULL)
13061 ctxt->sax = oldsax;
13062 xmlFreeParserCtxt(ctxt);
13063 newDoc->intSubset = NULL;
13064 newDoc->extSubset = NULL;
13065 xmlFreeDoc(newDoc);
13066 return(XML_ERR_INTERNAL_ERROR);
13068 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13069 nodePush(ctxt, newDoc->children);
13070 if (doc == NULL) {
13071 ctxt->myDoc = newDoc;
13072 } else {
13073 ctxt->myDoc = doc;
13074 newRoot->doc = doc;
13078 * Get the 4 first bytes and decode the charset
13079 * if enc != XML_CHAR_ENCODING_NONE
13080 * plug some encoding conversion routines.
13082 GROW;
13083 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13084 start[0] = RAW;
13085 start[1] = NXT(1);
13086 start[2] = NXT(2);
13087 start[3] = NXT(3);
13088 enc = xmlDetectCharEncoding(start, 4);
13089 if (enc != XML_CHAR_ENCODING_NONE) {
13090 xmlSwitchEncoding(ctxt, enc);
13095 * Parse a possible text declaration first
13097 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13098 xmlParseTextDecl(ctxt);
13100 * An XML-1.0 document can't reference an entity not XML-1.0
13102 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13103 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13104 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13105 "Version mismatch between document and entity\n");
13109 ctxt->instate = XML_PARSER_CONTENT;
13110 ctxt->depth = depth;
13111 if (oldctxt != NULL) {
13112 ctxt->_private = oldctxt->_private;
13113 ctxt->loadsubset = oldctxt->loadsubset;
13114 ctxt->validate = oldctxt->validate;
13115 ctxt->valid = oldctxt->valid;
13116 ctxt->replaceEntities = oldctxt->replaceEntities;
13117 if (oldctxt->validate) {
13118 ctxt->vctxt.error = oldctxt->vctxt.error;
13119 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13120 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13122 ctxt->external = oldctxt->external;
13123 if (ctxt->dict) xmlDictFree(ctxt->dict);
13124 ctxt->dict = oldctxt->dict;
13125 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13126 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13127 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13128 ctxt->dictNames = oldctxt->dictNames;
13129 ctxt->attsDefault = oldctxt->attsDefault;
13130 ctxt->attsSpecial = oldctxt->attsSpecial;
13131 ctxt->linenumbers = oldctxt->linenumbers;
13132 ctxt->record_info = oldctxt->record_info;
13133 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13134 ctxt->node_seq.length = oldctxt->node_seq.length;
13135 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13136 } else {
13138 * Doing validity checking on chunk without context
13139 * doesn't make sense
13141 ctxt->_private = NULL;
13142 ctxt->validate = 0;
13143 ctxt->external = 2;
13144 ctxt->loadsubset = 0;
13147 xmlParseContent(ctxt);
13149 if ((RAW == '<') && (NXT(1) == '/')) {
13150 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13151 } else if (RAW != 0) {
13152 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13154 if (ctxt->node != newDoc->children) {
13155 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13158 if (!ctxt->wellFormed) {
13159 if (ctxt->errNo == 0)
13160 ret = XML_ERR_INTERNAL_ERROR;
13161 else
13162 ret = (xmlParserErrors)ctxt->errNo;
13163 } else {
13164 if (list != NULL) {
13165 xmlNodePtr cur;
13168 * Return the newly created nodeset after unlinking it from
13169 * they pseudo parent.
13171 cur = newDoc->children->children;
13172 *list = cur;
13173 while (cur != NULL) {
13174 cur->parent = NULL;
13175 cur = cur->next;
13177 newDoc->children->children = NULL;
13179 ret = XML_ERR_OK;
13183 * Record in the parent context the number of entities replacement
13184 * done when parsing that reference.
13186 if (oldctxt != NULL)
13187 oldctxt->nbentities += ctxt->nbentities;
13190 * Also record the size of the entity parsed
13192 if (ctxt->input != NULL && oldctxt != NULL) {
13193 oldctxt->sizeentities += ctxt->input->consumed;
13194 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13197 * And record the last error if any
13199 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13200 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13202 if (sax != NULL)
13203 ctxt->sax = oldsax;
13204 if (oldctxt != NULL) {
13205 ctxt->dict = NULL;
13206 ctxt->attsDefault = NULL;
13207 ctxt->attsSpecial = NULL;
13208 oldctxt->validate = ctxt->validate;
13209 oldctxt->valid = ctxt->valid;
13210 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13211 oldctxt->node_seq.length = ctxt->node_seq.length;
13212 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13214 ctxt->node_seq.maximum = 0;
13215 ctxt->node_seq.length = 0;
13216 ctxt->node_seq.buffer = NULL;
13217 xmlFreeParserCtxt(ctxt);
13218 newDoc->intSubset = NULL;
13219 newDoc->extSubset = NULL;
13220 xmlFreeDoc(newDoc);
13222 return(ret);
13225 #ifdef LIBXML_SAX1_ENABLED
13227 * xmlParseExternalEntity:
13228 * @doc: the document the chunk pertains to
13229 * @sax: the SAX handler block (possibly NULL)
13230 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13231 * @depth: Used for loop detection, use 0
13232 * @URL: the URL for the entity to load
13233 * @ID: the System ID for the entity to load
13234 * @lst: the return value for the set of parsed nodes
13236 * Parse an external general entity
13237 * An external general parsed entity is well-formed if it matches the
13238 * production labeled extParsedEnt.
13240 * [78] extParsedEnt ::= TextDecl? content
13242 * Returns 0 if the entity is well formed, -1 in case of args problem and
13243 * the parser error code otherwise
13247 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13248 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13249 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13250 ID, lst));
13254 * xmlParseBalancedChunkMemory:
13255 * @doc: the document the chunk pertains to (must not be NULL)
13256 * @sax: the SAX handler block (possibly NULL)
13257 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13258 * @depth: Used for loop detection, use 0
13259 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13260 * @lst: the return value for the set of parsed nodes
13262 * Parse a well-balanced chunk of an XML document
13263 * called by the parser
13264 * The allowed sequence for the Well Balanced Chunk is the one defined by
13265 * the content production in the XML grammar:
13267 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13269 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13270 * the parser error code otherwise
13274 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13275 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13276 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13277 depth, string, lst, 0 );
13279 #endif /* LIBXML_SAX1_ENABLED */
13282 * xmlParseBalancedChunkMemoryInternal:
13283 * @oldctxt: the existing parsing context
13284 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13285 * @user_data: the user data field for the parser context
13286 * @lst: the return value for the set of parsed nodes
13289 * Parse a well-balanced chunk of an XML document
13290 * called by the parser
13291 * The allowed sequence for the Well Balanced Chunk is the one defined by
13292 * the content production in the XML grammar:
13294 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13296 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13297 * error code otherwise
13299 * In case recover is set to 1, the nodelist will not be empty even if
13300 * the parsed chunk is not well balanced.
13302 static xmlParserErrors
13303 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13304 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13305 xmlParserCtxtPtr ctxt;
13306 xmlDocPtr newDoc = NULL;
13307 xmlNodePtr newRoot;
13308 xmlSAXHandlerPtr oldsax = NULL;
13309 xmlNodePtr content = NULL;
13310 xmlNodePtr last = NULL;
13311 int size;
13312 xmlParserErrors ret = XML_ERR_OK;
13313 #ifdef SAX2
13314 int i;
13315 #endif
13317 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13318 (oldctxt->depth > 1024)) {
13319 return(XML_ERR_ENTITY_LOOP);
13323 if (lst != NULL)
13324 *lst = NULL;
13325 if (string == NULL)
13326 return(XML_ERR_INTERNAL_ERROR);
13328 size = xmlStrlen(string);
13330 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13331 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13332 if (user_data != NULL)
13333 ctxt->userData = user_data;
13334 else
13335 ctxt->userData = ctxt;
13336 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13337 ctxt->dict = oldctxt->dict;
13338 ctxt->input_id = oldctxt->input_id + 1;
13339 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13340 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13341 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13343 #ifdef SAX2
13344 /* propagate namespaces down the entity */
13345 for (i = 0;i < oldctxt->nsNr;i += 2) {
13346 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13348 #endif
13350 oldsax = ctxt->sax;
13351 ctxt->sax = oldctxt->sax;
13352 xmlDetectSAX2(ctxt);
13353 ctxt->replaceEntities = oldctxt->replaceEntities;
13354 ctxt->options = oldctxt->options;
13356 ctxt->_private = oldctxt->_private;
13357 if (oldctxt->myDoc == NULL) {
13358 newDoc = xmlNewDoc(BAD_CAST "1.0");
13359 if (newDoc == NULL) {
13360 ctxt->sax = oldsax;
13361 ctxt->dict = NULL;
13362 xmlFreeParserCtxt(ctxt);
13363 return(XML_ERR_INTERNAL_ERROR);
13365 newDoc->properties = XML_DOC_INTERNAL;
13366 newDoc->dict = ctxt->dict;
13367 xmlDictReference(newDoc->dict);
13368 ctxt->myDoc = newDoc;
13369 } else {
13370 ctxt->myDoc = oldctxt->myDoc;
13371 content = ctxt->myDoc->children;
13372 last = ctxt->myDoc->last;
13374 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13375 if (newRoot == NULL) {
13376 ctxt->sax = oldsax;
13377 ctxt->dict = NULL;
13378 xmlFreeParserCtxt(ctxt);
13379 if (newDoc != NULL) {
13380 xmlFreeDoc(newDoc);
13382 return(XML_ERR_INTERNAL_ERROR);
13384 ctxt->myDoc->children = NULL;
13385 ctxt->myDoc->last = NULL;
13386 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13387 nodePush(ctxt, ctxt->myDoc->children);
13388 ctxt->instate = XML_PARSER_CONTENT;
13389 ctxt->depth = oldctxt->depth + 1;
13391 ctxt->validate = 0;
13392 ctxt->loadsubset = oldctxt->loadsubset;
13393 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13395 * ID/IDREF registration will be done in xmlValidateElement below
13397 ctxt->loadsubset |= XML_SKIP_IDS;
13399 ctxt->dictNames = oldctxt->dictNames;
13400 ctxt->attsDefault = oldctxt->attsDefault;
13401 ctxt->attsSpecial = oldctxt->attsSpecial;
13403 xmlParseContent(ctxt);
13404 if ((RAW == '<') && (NXT(1) == '/')) {
13405 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13406 } else if (RAW != 0) {
13407 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13409 if (ctxt->node != ctxt->myDoc->children) {
13410 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13413 if (!ctxt->wellFormed) {
13414 if (ctxt->errNo == 0)
13415 ret = XML_ERR_INTERNAL_ERROR;
13416 else
13417 ret = (xmlParserErrors)ctxt->errNo;
13418 } else {
13419 ret = XML_ERR_OK;
13422 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13423 xmlNodePtr cur;
13426 * Return the newly created nodeset after unlinking it from
13427 * they pseudo parent.
13429 cur = ctxt->myDoc->children->children;
13430 *lst = cur;
13431 while (cur != NULL) {
13432 #ifdef LIBXML_VALID_ENABLED
13433 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13434 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13435 (cur->type == XML_ELEMENT_NODE)) {
13436 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13437 oldctxt->myDoc, cur);
13439 #endif /* LIBXML_VALID_ENABLED */
13440 cur->parent = NULL;
13441 cur = cur->next;
13443 ctxt->myDoc->children->children = NULL;
13445 if (ctxt->myDoc != NULL) {
13446 xmlFreeNode(ctxt->myDoc->children);
13447 ctxt->myDoc->children = content;
13448 ctxt->myDoc->last = last;
13452 * Record in the parent context the number of entities replacement
13453 * done when parsing that reference.
13455 if (oldctxt != NULL)
13456 oldctxt->nbentities += ctxt->nbentities;
13459 * Also record the last error if any
13461 if (ctxt->lastError.code != XML_ERR_OK)
13462 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13464 ctxt->sax = oldsax;
13465 ctxt->dict = NULL;
13466 ctxt->attsDefault = NULL;
13467 ctxt->attsSpecial = NULL;
13468 xmlFreeParserCtxt(ctxt);
13469 if (newDoc != NULL) {
13470 xmlFreeDoc(newDoc);
13473 return(ret);
13477 * xmlParseInNodeContext:
13478 * @node: the context node
13479 * @data: the input string
13480 * @datalen: the input string length in bytes
13481 * @options: a combination of xmlParserOption
13482 * @lst: the return value for the set of parsed nodes
13484 * Parse a well-balanced chunk of an XML document
13485 * within the context (DTD, namespaces, etc ...) of the given node.
13487 * The allowed sequence for the data is a Well Balanced Chunk defined by
13488 * the content production in the XML grammar:
13490 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13492 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13493 * error code otherwise
13495 xmlParserErrors
13496 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13497 int options, xmlNodePtr *lst) {
13498 #ifdef SAX2
13499 xmlParserCtxtPtr ctxt;
13500 xmlDocPtr doc = NULL;
13501 xmlNodePtr fake, cur;
13502 int nsnr = 0;
13504 xmlParserErrors ret = XML_ERR_OK;
13507 * check all input parameters, grab the document
13509 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13510 return(XML_ERR_INTERNAL_ERROR);
13511 switch (node->type) {
13512 case XML_ELEMENT_NODE:
13513 case XML_ATTRIBUTE_NODE:
13514 case XML_TEXT_NODE:
13515 case XML_CDATA_SECTION_NODE:
13516 case XML_ENTITY_REF_NODE:
13517 case XML_PI_NODE:
13518 case XML_COMMENT_NODE:
13519 case XML_DOCUMENT_NODE:
13520 case XML_HTML_DOCUMENT_NODE:
13521 break;
13522 default:
13523 return(XML_ERR_INTERNAL_ERROR);
13526 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13527 (node->type != XML_DOCUMENT_NODE) &&
13528 (node->type != XML_HTML_DOCUMENT_NODE))
13529 node = node->parent;
13530 if (node == NULL)
13531 return(XML_ERR_INTERNAL_ERROR);
13532 if (node->type == XML_ELEMENT_NODE)
13533 doc = node->doc;
13534 else
13535 doc = (xmlDocPtr) node;
13536 if (doc == NULL)
13537 return(XML_ERR_INTERNAL_ERROR);
13540 * allocate a context and set-up everything not related to the
13541 * node position in the tree
13543 if (doc->type == XML_DOCUMENT_NODE)
13544 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13545 #ifdef LIBXML_HTML_ENABLED
13546 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13547 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13549 * When parsing in context, it makes no sense to add implied
13550 * elements like html/body/etc...
13552 options |= HTML_PARSE_NOIMPLIED;
13554 #endif
13555 else
13556 return(XML_ERR_INTERNAL_ERROR);
13558 if (ctxt == NULL)
13559 return(XML_ERR_NO_MEMORY);
13562 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13563 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13564 * we must wait until the last moment to free the original one.
13566 if (doc->dict != NULL) {
13567 if (ctxt->dict != NULL)
13568 xmlDictFree(ctxt->dict);
13569 ctxt->dict = doc->dict;
13570 } else
13571 options |= XML_PARSE_NODICT;
13573 if (doc->encoding != NULL) {
13574 xmlCharEncodingHandlerPtr hdlr;
13576 if (ctxt->encoding != NULL)
13577 xmlFree((xmlChar *) ctxt->encoding);
13578 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13580 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13581 if (hdlr != NULL) {
13582 xmlSwitchToEncoding(ctxt, hdlr);
13583 } else {
13584 return(XML_ERR_UNSUPPORTED_ENCODING);
13588 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13589 xmlDetectSAX2(ctxt);
13590 ctxt->myDoc = doc;
13591 /* parsing in context, i.e. as within existing content */
13592 ctxt->input_id = 2;
13593 ctxt->instate = XML_PARSER_CONTENT;
13595 fake = xmlNewDocComment(node->doc, NULL);
13596 if (fake == NULL) {
13597 xmlFreeParserCtxt(ctxt);
13598 return(XML_ERR_NO_MEMORY);
13600 xmlAddChild(node, fake);
13602 if (node->type == XML_ELEMENT_NODE) {
13603 nodePush(ctxt, node);
13605 * initialize the SAX2 namespaces stack
13607 cur = node;
13608 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13609 xmlNsPtr ns = cur->nsDef;
13610 const xmlChar *iprefix, *ihref;
13612 while (ns != NULL) {
13613 if (ctxt->dict) {
13614 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13615 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13616 } else {
13617 iprefix = ns->prefix;
13618 ihref = ns->href;
13621 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13622 nsPush(ctxt, iprefix, ihref);
13623 nsnr++;
13625 ns = ns->next;
13627 cur = cur->parent;
13631 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13633 * ID/IDREF registration will be done in xmlValidateElement below
13635 ctxt->loadsubset |= XML_SKIP_IDS;
13638 #ifdef LIBXML_HTML_ENABLED
13639 if (doc->type == XML_HTML_DOCUMENT_NODE)
13640 __htmlParseContent(ctxt);
13641 else
13642 #endif
13643 xmlParseContent(ctxt);
13645 nsPop(ctxt, nsnr);
13646 if ((RAW == '<') && (NXT(1) == '/')) {
13647 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13648 } else if (RAW != 0) {
13649 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13651 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13653 ctxt->wellFormed = 0;
13656 if (!ctxt->wellFormed) {
13657 if (ctxt->errNo == 0)
13658 ret = XML_ERR_INTERNAL_ERROR;
13659 else
13660 ret = (xmlParserErrors)ctxt->errNo;
13661 } else {
13662 ret = XML_ERR_OK;
13666 * Return the newly created nodeset after unlinking it from
13667 * the pseudo sibling.
13670 cur = fake->next;
13671 fake->next = NULL;
13672 node->last = fake;
13674 if (cur != NULL) {
13675 cur->prev = NULL;
13678 *lst = cur;
13680 while (cur != NULL) {
13681 cur->parent = NULL;
13682 cur = cur->next;
13685 xmlUnlinkNode(fake);
13686 xmlFreeNode(fake);
13689 if (ret != XML_ERR_OK) {
13690 xmlFreeNodeList(*lst);
13691 *lst = NULL;
13694 if (doc->dict != NULL)
13695 ctxt->dict = NULL;
13696 xmlFreeParserCtxt(ctxt);
13698 return(ret);
13699 #else /* !SAX2 */
13700 return(XML_ERR_INTERNAL_ERROR);
13701 #endif
13704 #ifdef LIBXML_SAX1_ENABLED
13706 * xmlParseBalancedChunkMemoryRecover:
13707 * @doc: the document the chunk pertains to (must not be NULL)
13708 * @sax: the SAX handler block (possibly NULL)
13709 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13710 * @depth: Used for loop detection, use 0
13711 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13712 * @lst: the return value for the set of parsed nodes
13713 * @recover: return nodes even if the data is broken (use 0)
13716 * Parse a well-balanced chunk of an XML document
13717 * called by the parser
13718 * The allowed sequence for the Well Balanced Chunk is the one defined by
13719 * the content production in the XML grammar:
13721 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13723 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13724 * the parser error code otherwise
13726 * In case recover is set to 1, the nodelist will not be empty even if
13727 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13728 * some extent.
13731 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13732 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13733 int recover) {
13734 xmlParserCtxtPtr ctxt;
13735 xmlDocPtr newDoc;
13736 xmlSAXHandlerPtr oldsax = NULL;
13737 xmlNodePtr content, newRoot;
13738 int size;
13739 int ret = 0;
13741 if (depth > 40) {
13742 return(XML_ERR_ENTITY_LOOP);
13746 if (lst != NULL)
13747 *lst = NULL;
13748 if (string == NULL)
13749 return(-1);
13751 size = xmlStrlen(string);
13753 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13754 if (ctxt == NULL) return(-1);
13755 ctxt->userData = ctxt;
13756 if (sax != NULL) {
13757 oldsax = ctxt->sax;
13758 ctxt->sax = sax;
13759 if (user_data != NULL)
13760 ctxt->userData = user_data;
13762 newDoc = xmlNewDoc(BAD_CAST "1.0");
13763 if (newDoc == NULL) {
13764 xmlFreeParserCtxt(ctxt);
13765 return(-1);
13767 newDoc->properties = XML_DOC_INTERNAL;
13768 if ((doc != NULL) && (doc->dict != NULL)) {
13769 xmlDictFree(ctxt->dict);
13770 ctxt->dict = doc->dict;
13771 xmlDictReference(ctxt->dict);
13772 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13773 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13774 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13775 ctxt->dictNames = 1;
13776 } else {
13777 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13779 /* doc == NULL is only supported for historic reasons */
13780 if (doc != NULL) {
13781 newDoc->intSubset = doc->intSubset;
13782 newDoc->extSubset = doc->extSubset;
13784 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13785 if (newRoot == NULL) {
13786 if (sax != NULL)
13787 ctxt->sax = oldsax;
13788 xmlFreeParserCtxt(ctxt);
13789 newDoc->intSubset = NULL;
13790 newDoc->extSubset = NULL;
13791 xmlFreeDoc(newDoc);
13792 return(-1);
13794 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13795 nodePush(ctxt, newRoot);
13796 /* doc == NULL is only supported for historic reasons */
13797 if (doc == NULL) {
13798 ctxt->myDoc = newDoc;
13799 } else {
13800 ctxt->myDoc = newDoc;
13801 newDoc->children->doc = doc;
13802 /* Ensure that doc has XML spec namespace */
13803 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13804 newDoc->oldNs = doc->oldNs;
13806 ctxt->instate = XML_PARSER_CONTENT;
13807 ctxt->input_id = 2;
13808 ctxt->depth = depth;
13811 * Doing validity checking on chunk doesn't make sense
13813 ctxt->validate = 0;
13814 ctxt->loadsubset = 0;
13815 xmlDetectSAX2(ctxt);
13817 if ( doc != NULL ){
13818 content = doc->children;
13819 doc->children = NULL;
13820 xmlParseContent(ctxt);
13821 doc->children = content;
13823 else {
13824 xmlParseContent(ctxt);
13826 if ((RAW == '<') && (NXT(1) == '/')) {
13827 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13828 } else if (RAW != 0) {
13829 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13831 if (ctxt->node != newDoc->children) {
13832 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13835 if (!ctxt->wellFormed) {
13836 if (ctxt->errNo == 0)
13837 ret = 1;
13838 else
13839 ret = ctxt->errNo;
13840 } else {
13841 ret = 0;
13844 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13845 xmlNodePtr cur;
13848 * Return the newly created nodeset after unlinking it from
13849 * they pseudo parent.
13851 cur = newDoc->children->children;
13852 *lst = cur;
13853 while (cur != NULL) {
13854 xmlSetTreeDoc(cur, doc);
13855 cur->parent = NULL;
13856 cur = cur->next;
13858 newDoc->children->children = NULL;
13861 if (sax != NULL)
13862 ctxt->sax = oldsax;
13863 xmlFreeParserCtxt(ctxt);
13864 newDoc->intSubset = NULL;
13865 newDoc->extSubset = NULL;
13866 /* This leaks the namespace list if doc == NULL */
13867 newDoc->oldNs = NULL;
13868 xmlFreeDoc(newDoc);
13870 return(ret);
13874 * xmlSAXParseEntity:
13875 * @sax: the SAX handler block
13876 * @filename: the filename
13878 * parse an XML external entity out of context and build a tree.
13879 * It use the given SAX function block to handle the parsing callback.
13880 * If sax is NULL, fallback to the default DOM tree building routines.
13882 * [78] extParsedEnt ::= TextDecl? content
13884 * This correspond to a "Well Balanced" chunk
13886 * Returns the resulting document tree
13889 xmlDocPtr
13890 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13891 xmlDocPtr ret;
13892 xmlParserCtxtPtr ctxt;
13894 ctxt = xmlCreateFileParserCtxt(filename);
13895 if (ctxt == NULL) {
13896 return(NULL);
13898 if (sax != NULL) {
13899 if (ctxt->sax != NULL)
13900 xmlFree(ctxt->sax);
13901 ctxt->sax = sax;
13902 ctxt->userData = NULL;
13905 xmlParseExtParsedEnt(ctxt);
13907 if (ctxt->wellFormed)
13908 ret = ctxt->myDoc;
13909 else {
13910 ret = NULL;
13911 xmlFreeDoc(ctxt->myDoc);
13912 ctxt->myDoc = NULL;
13914 if (sax != NULL)
13915 ctxt->sax = NULL;
13916 xmlFreeParserCtxt(ctxt);
13918 return(ret);
13922 * xmlParseEntity:
13923 * @filename: the filename
13925 * parse an XML external entity out of context and build a tree.
13927 * [78] extParsedEnt ::= TextDecl? content
13929 * This correspond to a "Well Balanced" chunk
13931 * Returns the resulting document tree
13934 xmlDocPtr
13935 xmlParseEntity(const char *filename) {
13936 return(xmlSAXParseEntity(NULL, filename));
13938 #endif /* LIBXML_SAX1_ENABLED */
13941 * xmlCreateEntityParserCtxtInternal:
13942 * @URL: the entity URL
13943 * @ID: the entity PUBLIC ID
13944 * @base: a possible base for the target URI
13945 * @pctx: parser context used to set options on new context
13947 * Create a parser context for an external entity
13948 * Automatic support for ZLIB/Compress compressed document is provided
13949 * by default if found at compile-time.
13951 * Returns the new parser context or NULL
13953 static xmlParserCtxtPtr
13954 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13955 const xmlChar *base, xmlParserCtxtPtr pctx) {
13956 xmlParserCtxtPtr ctxt;
13957 xmlParserInputPtr inputStream;
13958 char *directory = NULL;
13959 xmlChar *uri;
13961 ctxt = xmlNewParserCtxt();
13962 if (ctxt == NULL) {
13963 return(NULL);
13966 if (pctx != NULL) {
13967 ctxt->options = pctx->options;
13968 ctxt->_private = pctx->_private;
13970 * this is a subparser of pctx, so the input_id should be
13971 * incremented to distinguish from main entity
13973 ctxt->input_id = pctx->input_id + 1;
13976 /* Don't read from stdin. */
13977 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13978 URL = BAD_CAST "./-";
13980 uri = xmlBuildURI(URL, base);
13982 if (uri == NULL) {
13983 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13984 if (inputStream == NULL) {
13985 xmlFreeParserCtxt(ctxt);
13986 return(NULL);
13989 inputPush(ctxt, inputStream);
13991 if ((ctxt->directory == NULL) && (directory == NULL))
13992 directory = xmlParserGetDirectory((char *)URL);
13993 if ((ctxt->directory == NULL) && (directory != NULL))
13994 ctxt->directory = directory;
13995 } else {
13996 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13997 if (inputStream == NULL) {
13998 xmlFree(uri);
13999 xmlFreeParserCtxt(ctxt);
14000 return(NULL);
14003 inputPush(ctxt, inputStream);
14005 if ((ctxt->directory == NULL) && (directory == NULL))
14006 directory = xmlParserGetDirectory((char *)uri);
14007 if ((ctxt->directory == NULL) && (directory != NULL))
14008 ctxt->directory = directory;
14009 xmlFree(uri);
14011 return(ctxt);
14015 * xmlCreateEntityParserCtxt:
14016 * @URL: the entity URL
14017 * @ID: the entity PUBLIC ID
14018 * @base: a possible base for the target URI
14020 * Create a parser context for an external entity
14021 * Automatic support for ZLIB/Compress compressed document is provided
14022 * by default if found at compile-time.
14024 * Returns the new parser context or NULL
14026 xmlParserCtxtPtr
14027 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14028 const xmlChar *base) {
14029 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14033 /************************************************************************
14035 * Front ends when parsing from a file *
14037 ************************************************************************/
14040 * xmlCreateURLParserCtxt:
14041 * @filename: the filename or URL
14042 * @options: a combination of xmlParserOption
14044 * Create a parser context for a file or URL content.
14045 * Automatic support for ZLIB/Compress compressed document is provided
14046 * by default if found at compile-time and for file accesses
14048 * Returns the new parser context or NULL
14050 xmlParserCtxtPtr
14051 xmlCreateURLParserCtxt(const char *filename, int options)
14053 xmlParserCtxtPtr ctxt;
14054 xmlParserInputPtr inputStream;
14055 char *directory = NULL;
14057 ctxt = xmlNewParserCtxt();
14058 if (ctxt == NULL) {
14059 xmlErrMemory(NULL, "cannot allocate parser context");
14060 return(NULL);
14063 if (options)
14064 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14065 ctxt->linenumbers = 1;
14067 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14068 if (inputStream == NULL) {
14069 xmlFreeParserCtxt(ctxt);
14070 return(NULL);
14073 inputPush(ctxt, inputStream);
14074 if ((ctxt->directory == NULL) && (directory == NULL))
14075 directory = xmlParserGetDirectory(filename);
14076 if ((ctxt->directory == NULL) && (directory != NULL))
14077 ctxt->directory = directory;
14079 return(ctxt);
14083 * xmlCreateFileParserCtxt:
14084 * @filename: the filename
14086 * Create a parser context for a file content.
14087 * Automatic support for ZLIB/Compress compressed document is provided
14088 * by default if found at compile-time.
14090 * Returns the new parser context or NULL
14092 xmlParserCtxtPtr
14093 xmlCreateFileParserCtxt(const char *filename)
14095 return(xmlCreateURLParserCtxt(filename, 0));
14098 #ifdef LIBXML_SAX1_ENABLED
14100 * xmlSAXParseFileWithData:
14101 * @sax: the SAX handler block
14102 * @filename: the filename
14103 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14104 * documents
14105 * @data: the userdata
14107 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14108 * compressed document is provided by default if found at compile-time.
14109 * It use the given SAX function block to handle the parsing callback.
14110 * If sax is NULL, fallback to the default DOM tree building routines.
14112 * User data (void *) is stored within the parser context in the
14113 * context's _private member, so it is available nearly everywhere in libxml
14115 * Returns the resulting document tree
14118 xmlDocPtr
14119 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14120 int recovery, void *data) {
14121 xmlDocPtr ret;
14122 xmlParserCtxtPtr ctxt;
14124 xmlInitParser();
14126 ctxt = xmlCreateFileParserCtxt(filename);
14127 if (ctxt == NULL) {
14128 return(NULL);
14130 if (sax != NULL) {
14131 if (ctxt->sax != NULL)
14132 xmlFree(ctxt->sax);
14133 ctxt->sax = sax;
14135 xmlDetectSAX2(ctxt);
14136 if (data!=NULL) {
14137 ctxt->_private = data;
14140 if (ctxt->directory == NULL)
14141 ctxt->directory = xmlParserGetDirectory(filename);
14143 ctxt->recovery = recovery;
14145 xmlParseDocument(ctxt);
14147 if ((ctxt->wellFormed) || recovery) {
14148 ret = ctxt->myDoc;
14149 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14150 if (ctxt->input->buf->compressed > 0)
14151 ret->compression = 9;
14152 else
14153 ret->compression = ctxt->input->buf->compressed;
14156 else {
14157 ret = NULL;
14158 xmlFreeDoc(ctxt->myDoc);
14159 ctxt->myDoc = NULL;
14161 if (sax != NULL)
14162 ctxt->sax = NULL;
14163 xmlFreeParserCtxt(ctxt);
14165 return(ret);
14169 * xmlSAXParseFile:
14170 * @sax: the SAX handler block
14171 * @filename: the filename
14172 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14173 * documents
14175 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14176 * compressed document is provided by default if found at compile-time.
14177 * It use the given SAX function block to handle the parsing callback.
14178 * If sax is NULL, fallback to the default DOM tree building routines.
14180 * Returns the resulting document tree
14183 xmlDocPtr
14184 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14185 int recovery) {
14186 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14190 * xmlRecoverDoc:
14191 * @cur: a pointer to an array of xmlChar
14193 * parse an XML in-memory document and build a tree.
14194 * In the case the document is not Well Formed, a attempt to build a
14195 * tree is tried anyway
14197 * Returns the resulting document tree or NULL in case of failure
14200 xmlDocPtr
14201 xmlRecoverDoc(const xmlChar *cur) {
14202 return(xmlSAXParseDoc(NULL, cur, 1));
14206 * xmlParseFile:
14207 * @filename: the filename
14209 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14210 * compressed document is provided by default if found at compile-time.
14212 * Returns the resulting document tree if the file was wellformed,
14213 * NULL otherwise.
14216 xmlDocPtr
14217 xmlParseFile(const char *filename) {
14218 return(xmlSAXParseFile(NULL, filename, 0));
14222 * xmlRecoverFile:
14223 * @filename: the filename
14225 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14226 * compressed document is provided by default if found at compile-time.
14227 * In the case the document is not Well Formed, it attempts to build
14228 * a tree anyway
14230 * Returns the resulting document tree or NULL in case of failure
14233 xmlDocPtr
14234 xmlRecoverFile(const char *filename) {
14235 return(xmlSAXParseFile(NULL, filename, 1));
14240 * xmlSetupParserForBuffer:
14241 * @ctxt: an XML parser context
14242 * @buffer: a xmlChar * buffer
14243 * @filename: a file name
14245 * Setup the parser context to parse a new buffer; Clears any prior
14246 * contents from the parser context. The buffer parameter must not be
14247 * NULL, but the filename parameter can be
14249 void
14250 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14251 const char* filename)
14253 xmlParserInputPtr input;
14255 if ((ctxt == NULL) || (buffer == NULL))
14256 return;
14258 input = xmlNewInputStream(ctxt);
14259 if (input == NULL) {
14260 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14261 xmlClearParserCtxt(ctxt);
14262 return;
14265 xmlClearParserCtxt(ctxt);
14266 if (filename != NULL)
14267 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14268 input->base = buffer;
14269 input->cur = buffer;
14270 input->end = &buffer[xmlStrlen(buffer)];
14271 inputPush(ctxt, input);
14275 * xmlSAXUserParseFile:
14276 * @sax: a SAX handler
14277 * @user_data: The user data returned on SAX callbacks
14278 * @filename: a file name
14280 * parse an XML file and call the given SAX handler routines.
14281 * Automatic support for ZLIB/Compress compressed document is provided
14283 * Returns 0 in case of success or a error number otherwise
14286 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14287 const char *filename) {
14288 int ret = 0;
14289 xmlParserCtxtPtr ctxt;
14291 ctxt = xmlCreateFileParserCtxt(filename);
14292 if (ctxt == NULL) return -1;
14293 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14294 xmlFree(ctxt->sax);
14295 ctxt->sax = sax;
14296 xmlDetectSAX2(ctxt);
14298 if (user_data != NULL)
14299 ctxt->userData = user_data;
14301 xmlParseDocument(ctxt);
14303 if (ctxt->wellFormed)
14304 ret = 0;
14305 else {
14306 if (ctxt->errNo != 0)
14307 ret = ctxt->errNo;
14308 else
14309 ret = -1;
14311 if (sax != NULL)
14312 ctxt->sax = NULL;
14313 if (ctxt->myDoc != NULL) {
14314 xmlFreeDoc(ctxt->myDoc);
14315 ctxt->myDoc = NULL;
14317 xmlFreeParserCtxt(ctxt);
14319 return ret;
14321 #endif /* LIBXML_SAX1_ENABLED */
14323 /************************************************************************
14325 * Front ends when parsing from memory *
14327 ************************************************************************/
14330 * xmlCreateMemoryParserCtxt:
14331 * @buffer: a pointer to a char array
14332 * @size: the size of the array
14334 * Create a parser context for an XML in-memory document.
14336 * Returns the new parser context or NULL
14338 xmlParserCtxtPtr
14339 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14340 xmlParserCtxtPtr ctxt;
14341 xmlParserInputPtr input;
14342 xmlParserInputBufferPtr buf;
14344 if (buffer == NULL)
14345 return(NULL);
14346 if (size <= 0)
14347 return(NULL);
14349 ctxt = xmlNewParserCtxt();
14350 if (ctxt == NULL)
14351 return(NULL);
14353 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14354 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14355 if (buf == NULL) {
14356 xmlFreeParserCtxt(ctxt);
14357 return(NULL);
14360 input = xmlNewInputStream(ctxt);
14361 if (input == NULL) {
14362 xmlFreeParserInputBuffer(buf);
14363 xmlFreeParserCtxt(ctxt);
14364 return(NULL);
14367 input->filename = NULL;
14368 input->buf = buf;
14369 xmlBufResetInput(input->buf->buffer, input);
14371 inputPush(ctxt, input);
14372 return(ctxt);
14375 #ifdef LIBXML_SAX1_ENABLED
14377 * xmlSAXParseMemoryWithData:
14378 * @sax: the SAX handler block
14379 * @buffer: an pointer to a char array
14380 * @size: the size of the array
14381 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14382 * documents
14383 * @data: the userdata
14385 * parse an XML in-memory block and use the given SAX function block
14386 * to handle the parsing callback. If sax is NULL, fallback to the default
14387 * DOM tree building routines.
14389 * User data (void *) is stored within the parser context in the
14390 * context's _private member, so it is available nearly everywhere in libxml
14392 * Returns the resulting document tree
14395 xmlDocPtr
14396 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14397 int size, int recovery, void *data) {
14398 xmlDocPtr ret;
14399 xmlParserCtxtPtr ctxt;
14401 xmlInitParser();
14403 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14404 if (ctxt == NULL) return(NULL);
14405 if (sax != NULL) {
14406 if (ctxt->sax != NULL)
14407 xmlFree(ctxt->sax);
14408 ctxt->sax = sax;
14410 xmlDetectSAX2(ctxt);
14411 if (data!=NULL) {
14412 ctxt->_private=data;
14415 ctxt->recovery = recovery;
14417 xmlParseDocument(ctxt);
14419 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14420 else {
14421 ret = NULL;
14422 xmlFreeDoc(ctxt->myDoc);
14423 ctxt->myDoc = NULL;
14425 if (sax != NULL)
14426 ctxt->sax = NULL;
14427 xmlFreeParserCtxt(ctxt);
14429 return(ret);
14433 * xmlSAXParseMemory:
14434 * @sax: the SAX handler block
14435 * @buffer: an pointer to a char array
14436 * @size: the size of the array
14437 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14438 * documents
14440 * parse an XML in-memory block and use the given SAX function block
14441 * to handle the parsing callback. If sax is NULL, fallback to the default
14442 * DOM tree building routines.
14444 * Returns the resulting document tree
14446 xmlDocPtr
14447 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14448 int size, int recovery) {
14449 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14453 * xmlParseMemory:
14454 * @buffer: an pointer to a char array
14455 * @size: the size of the array
14457 * parse an XML in-memory block and build a tree.
14459 * Returns the resulting document tree
14462 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14463 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14467 * xmlRecoverMemory:
14468 * @buffer: an pointer to a char array
14469 * @size: the size of the array
14471 * parse an XML in-memory block and build a tree.
14472 * In the case the document is not Well Formed, an attempt to
14473 * build a tree is tried anyway
14475 * Returns the resulting document tree or NULL in case of error
14478 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14479 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14483 * xmlSAXUserParseMemory:
14484 * @sax: a SAX handler
14485 * @user_data: The user data returned on SAX callbacks
14486 * @buffer: an in-memory XML document input
14487 * @size: the length of the XML document in bytes
14489 * A better SAX parsing routine.
14490 * parse an XML in-memory buffer and call the given SAX handler routines.
14492 * Returns 0 in case of success or a error number otherwise
14494 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14495 const char *buffer, int size) {
14496 int ret = 0;
14497 xmlParserCtxtPtr ctxt;
14499 xmlInitParser();
14501 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14502 if (ctxt == NULL) return -1;
14503 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14504 xmlFree(ctxt->sax);
14505 ctxt->sax = sax;
14506 xmlDetectSAX2(ctxt);
14508 if (user_data != NULL)
14509 ctxt->userData = user_data;
14511 xmlParseDocument(ctxt);
14513 if (ctxt->wellFormed)
14514 ret = 0;
14515 else {
14516 if (ctxt->errNo != 0)
14517 ret = ctxt->errNo;
14518 else
14519 ret = -1;
14521 if (sax != NULL)
14522 ctxt->sax = NULL;
14523 if (ctxt->myDoc != NULL) {
14524 xmlFreeDoc(ctxt->myDoc);
14525 ctxt->myDoc = NULL;
14527 xmlFreeParserCtxt(ctxt);
14529 return ret;
14531 #endif /* LIBXML_SAX1_ENABLED */
14534 * xmlCreateDocParserCtxt:
14535 * @cur: a pointer to an array of xmlChar
14537 * Creates a parser context for an XML in-memory document.
14539 * Returns the new parser context or NULL
14541 xmlParserCtxtPtr
14542 xmlCreateDocParserCtxt(const xmlChar *cur) {
14543 int len;
14545 if (cur == NULL)
14546 return(NULL);
14547 len = xmlStrlen(cur);
14548 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14551 #ifdef LIBXML_SAX1_ENABLED
14553 * xmlSAXParseDoc:
14554 * @sax: the SAX handler block
14555 * @cur: a pointer to an array of xmlChar
14556 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14557 * documents
14559 * parse an XML in-memory document and build a tree.
14560 * It use the given SAX function block to handle the parsing callback.
14561 * If sax is NULL, fallback to the default DOM tree building routines.
14563 * Returns the resulting document tree
14566 xmlDocPtr
14567 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14568 xmlDocPtr ret;
14569 xmlParserCtxtPtr ctxt;
14570 xmlSAXHandlerPtr oldsax = NULL;
14572 if (cur == NULL) return(NULL);
14575 ctxt = xmlCreateDocParserCtxt(cur);
14576 if (ctxt == NULL) return(NULL);
14577 if (sax != NULL) {
14578 oldsax = ctxt->sax;
14579 ctxt->sax = sax;
14580 ctxt->userData = NULL;
14582 xmlDetectSAX2(ctxt);
14584 xmlParseDocument(ctxt);
14585 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14586 else {
14587 ret = NULL;
14588 xmlFreeDoc(ctxt->myDoc);
14589 ctxt->myDoc = NULL;
14591 if (sax != NULL)
14592 ctxt->sax = oldsax;
14593 xmlFreeParserCtxt(ctxt);
14595 return(ret);
14599 * xmlParseDoc:
14600 * @cur: a pointer to an array of xmlChar
14602 * parse an XML in-memory document and build a tree.
14604 * Returns the resulting document tree
14607 xmlDocPtr
14608 xmlParseDoc(const xmlChar *cur) {
14609 return(xmlSAXParseDoc(NULL, cur, 0));
14611 #endif /* LIBXML_SAX1_ENABLED */
14613 #ifdef LIBXML_LEGACY_ENABLED
14614 /************************************************************************
14616 * Specific function to keep track of entities references *
14617 * and used by the XSLT debugger *
14619 ************************************************************************/
14621 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14624 * xmlAddEntityReference:
14625 * @ent : A valid entity
14626 * @firstNode : A valid first node for children of entity
14627 * @lastNode : A valid last node of children entity
14629 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14631 static void
14632 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14633 xmlNodePtr lastNode)
14635 if (xmlEntityRefFunc != NULL) {
14636 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14642 * xmlSetEntityReferenceFunc:
14643 * @func: A valid function
14645 * Set the function to call call back when a xml reference has been made
14647 void
14648 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14650 xmlEntityRefFunc = func;
14652 #endif /* LIBXML_LEGACY_ENABLED */
14654 /************************************************************************
14656 * Miscellaneous *
14658 ************************************************************************/
14660 #ifdef LIBXML_XPATH_ENABLED
14661 #include <libxml/xpath.h>
14662 #endif
14664 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14665 static int xmlParserInitialized = 0;
14668 * xmlInitParser:
14670 * Initialization function for the XML parser.
14671 * This is not reentrant. Call once before processing in case of
14672 * use in multithreaded programs.
14675 void
14676 xmlInitParser(void) {
14677 if (xmlParserInitialized != 0)
14678 return;
14680 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14681 if (xmlFree == free)
14682 atexit(xmlCleanupParser);
14683 #endif
14685 #ifdef LIBXML_THREAD_ENABLED
14686 __xmlGlobalInitMutexLock();
14687 if (xmlParserInitialized == 0) {
14688 #endif
14689 xmlInitThreads();
14690 xmlInitGlobals();
14691 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14692 (xmlGenericError == NULL))
14693 initGenericErrorDefaultFunc(NULL);
14694 xmlInitMemory();
14695 xmlInitializeDict();
14696 xmlInitCharEncodingHandlers();
14697 xmlDefaultSAXHandlerInit();
14698 xmlRegisterDefaultInputCallbacks();
14699 #ifdef LIBXML_OUTPUT_ENABLED
14700 xmlRegisterDefaultOutputCallbacks();
14701 #endif /* LIBXML_OUTPUT_ENABLED */
14702 #ifdef LIBXML_HTML_ENABLED
14703 htmlInitAutoClose();
14704 htmlDefaultSAXHandlerInit();
14705 #endif
14706 #ifdef LIBXML_XPATH_ENABLED
14707 xmlXPathInit();
14708 #endif
14709 xmlParserInitialized = 1;
14710 #ifdef LIBXML_THREAD_ENABLED
14712 __xmlGlobalInitMutexUnlock();
14713 #endif
14717 * xmlCleanupParser:
14719 * This function name is somewhat misleading. It does not clean up
14720 * parser state, it cleans up memory allocated by the library itself.
14721 * It is a cleanup function for the XML library. It tries to reclaim all
14722 * related global memory allocated for the library processing.
14723 * It doesn't deallocate any document related memory. One should
14724 * call xmlCleanupParser() only when the process has finished using
14725 * the library and all XML/HTML documents built with it.
14726 * See also xmlInitParser() which has the opposite function of preparing
14727 * the library for operations.
14729 * WARNING: if your application is multithreaded or has plugin support
14730 * calling this may crash the application if another thread or
14731 * a plugin is still using libxml2. It's sometimes very hard to
14732 * guess if libxml2 is in use in the application, some libraries
14733 * or plugins may use it without notice. In case of doubt abstain
14734 * from calling this function or do it just before calling exit()
14735 * to avoid leak reports from valgrind !
14738 void
14739 xmlCleanupParser(void) {
14740 if (!xmlParserInitialized)
14741 return;
14743 xmlCleanupCharEncodingHandlers();
14744 #ifdef LIBXML_CATALOG_ENABLED
14745 xmlCatalogCleanup();
14746 #endif
14747 xmlDictCleanup();
14748 xmlCleanupInputCallbacks();
14749 #ifdef LIBXML_OUTPUT_ENABLED
14750 xmlCleanupOutputCallbacks();
14751 #endif
14752 #ifdef LIBXML_SCHEMAS_ENABLED
14753 xmlSchemaCleanupTypes();
14754 xmlRelaxNGCleanupTypes();
14755 #endif
14756 xmlCleanupGlobals();
14757 xmlCleanupThreads(); /* must be last if called not from the main thread */
14758 xmlCleanupMemory();
14759 xmlParserInitialized = 0;
14762 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14763 !defined(_WIN32)
14764 static void
14765 ATTRIBUTE_DESTRUCTOR
14766 xmlDestructor(void) {
14768 * Calling custom deallocation functions in a destructor can cause
14769 * problems, for example with Nokogiri.
14771 if (xmlFree == free)
14772 xmlCleanupParser();
14774 #endif
14776 /************************************************************************
14778 * New set (2.6.0) of simpler and more flexible APIs *
14780 ************************************************************************/
14783 * DICT_FREE:
14784 * @str: a string
14786 * Free a string if it is not owned by the "dict" dictionary in the
14787 * current scope
14789 #define DICT_FREE(str) \
14790 if ((str) && ((!dict) || \
14791 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14792 xmlFree((char *)(str));
14795 * xmlCtxtReset:
14796 * @ctxt: an XML parser context
14798 * Reset a parser context
14800 void
14801 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14803 xmlParserInputPtr input;
14804 xmlDictPtr dict;
14806 if (ctxt == NULL)
14807 return;
14809 dict = ctxt->dict;
14811 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14812 xmlFreeInputStream(input);
14814 ctxt->inputNr = 0;
14815 ctxt->input = NULL;
14817 ctxt->spaceNr = 0;
14818 if (ctxt->spaceTab != NULL) {
14819 ctxt->spaceTab[0] = -1;
14820 ctxt->space = &ctxt->spaceTab[0];
14821 } else {
14822 ctxt->space = NULL;
14826 ctxt->nodeNr = 0;
14827 ctxt->node = NULL;
14829 ctxt->nameNr = 0;
14830 ctxt->name = NULL;
14832 ctxt->nsNr = 0;
14834 DICT_FREE(ctxt->version);
14835 ctxt->version = NULL;
14836 DICT_FREE(ctxt->encoding);
14837 ctxt->encoding = NULL;
14838 DICT_FREE(ctxt->directory);
14839 ctxt->directory = NULL;
14840 DICT_FREE(ctxt->extSubURI);
14841 ctxt->extSubURI = NULL;
14842 DICT_FREE(ctxt->extSubSystem);
14843 ctxt->extSubSystem = NULL;
14844 if (ctxt->myDoc != NULL)
14845 xmlFreeDoc(ctxt->myDoc);
14846 ctxt->myDoc = NULL;
14848 ctxt->standalone = -1;
14849 ctxt->hasExternalSubset = 0;
14850 ctxt->hasPErefs = 0;
14851 ctxt->html = 0;
14852 ctxt->external = 0;
14853 ctxt->instate = XML_PARSER_START;
14854 ctxt->token = 0;
14856 ctxt->wellFormed = 1;
14857 ctxt->nsWellFormed = 1;
14858 ctxt->disableSAX = 0;
14859 ctxt->valid = 1;
14860 #if 0
14861 ctxt->vctxt.userData = ctxt;
14862 ctxt->vctxt.error = xmlParserValidityError;
14863 ctxt->vctxt.warning = xmlParserValidityWarning;
14864 #endif
14865 ctxt->record_info = 0;
14866 ctxt->checkIndex = 0;
14867 ctxt->inSubset = 0;
14868 ctxt->errNo = XML_ERR_OK;
14869 ctxt->depth = 0;
14870 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14871 ctxt->catalogs = NULL;
14872 ctxt->nbentities = 0;
14873 ctxt->sizeentities = 0;
14874 ctxt->sizeentcopy = 0;
14875 xmlInitNodeInfoSeq(&ctxt->node_seq);
14877 if (ctxt->attsDefault != NULL) {
14878 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14879 ctxt->attsDefault = NULL;
14881 if (ctxt->attsSpecial != NULL) {
14882 xmlHashFree(ctxt->attsSpecial, NULL);
14883 ctxt->attsSpecial = NULL;
14886 #ifdef LIBXML_CATALOG_ENABLED
14887 if (ctxt->catalogs != NULL)
14888 xmlCatalogFreeLocal(ctxt->catalogs);
14889 #endif
14890 if (ctxt->lastError.code != XML_ERR_OK)
14891 xmlResetError(&ctxt->lastError);
14895 * xmlCtxtResetPush:
14896 * @ctxt: an XML parser context
14897 * @chunk: a pointer to an array of chars
14898 * @size: number of chars in the array
14899 * @filename: an optional file name or URI
14900 * @encoding: the document encoding, or NULL
14902 * Reset a push parser context
14904 * Returns 0 in case of success and 1 in case of error
14907 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14908 int size, const char *filename, const char *encoding)
14910 xmlParserInputPtr inputStream;
14911 xmlParserInputBufferPtr buf;
14912 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14914 if (ctxt == NULL)
14915 return(1);
14917 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14918 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14920 buf = xmlAllocParserInputBuffer(enc);
14921 if (buf == NULL)
14922 return(1);
14924 if (ctxt == NULL) {
14925 xmlFreeParserInputBuffer(buf);
14926 return(1);
14929 xmlCtxtReset(ctxt);
14931 if (filename == NULL) {
14932 ctxt->directory = NULL;
14933 } else {
14934 ctxt->directory = xmlParserGetDirectory(filename);
14937 inputStream = xmlNewInputStream(ctxt);
14938 if (inputStream == NULL) {
14939 xmlFreeParserInputBuffer(buf);
14940 return(1);
14943 if (filename == NULL)
14944 inputStream->filename = NULL;
14945 else
14946 inputStream->filename = (char *)
14947 xmlCanonicPath((const xmlChar *) filename);
14948 inputStream->buf = buf;
14949 xmlBufResetInput(buf->buffer, inputStream);
14951 inputPush(ctxt, inputStream);
14953 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14954 (ctxt->input->buf != NULL)) {
14955 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14956 size_t cur = ctxt->input->cur - ctxt->input->base;
14958 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14960 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14961 #ifdef DEBUG_PUSH
14962 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14963 #endif
14966 if (encoding != NULL) {
14967 xmlCharEncodingHandlerPtr hdlr;
14969 if (ctxt->encoding != NULL)
14970 xmlFree((xmlChar *) ctxt->encoding);
14971 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14973 hdlr = xmlFindCharEncodingHandler(encoding);
14974 if (hdlr != NULL) {
14975 xmlSwitchToEncoding(ctxt, hdlr);
14976 } else {
14977 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14978 "Unsupported encoding %s\n", BAD_CAST encoding);
14980 } else if (enc != XML_CHAR_ENCODING_NONE) {
14981 xmlSwitchEncoding(ctxt, enc);
14984 return(0);
14989 * xmlCtxtUseOptionsInternal:
14990 * @ctxt: an XML parser context
14991 * @options: a combination of xmlParserOption
14992 * @encoding: the user provided encoding to use
14994 * Applies the options to the parser context
14996 * Returns 0 in case of success, the set of unknown or unimplemented options
14997 * in case of error.
14999 static int
15000 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15002 if (ctxt == NULL)
15003 return(-1);
15004 if (encoding != NULL) {
15005 if (ctxt->encoding != NULL)
15006 xmlFree((xmlChar *) ctxt->encoding);
15007 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15009 if (options & XML_PARSE_RECOVER) {
15010 ctxt->recovery = 1;
15011 options -= XML_PARSE_RECOVER;
15012 ctxt->options |= XML_PARSE_RECOVER;
15013 } else
15014 ctxt->recovery = 0;
15015 if (options & XML_PARSE_DTDLOAD) {
15016 ctxt->loadsubset = XML_DETECT_IDS;
15017 options -= XML_PARSE_DTDLOAD;
15018 ctxt->options |= XML_PARSE_DTDLOAD;
15019 } else
15020 ctxt->loadsubset = 0;
15021 if (options & XML_PARSE_DTDATTR) {
15022 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15023 options -= XML_PARSE_DTDATTR;
15024 ctxt->options |= XML_PARSE_DTDATTR;
15026 if (options & XML_PARSE_NOENT) {
15027 ctxt->replaceEntities = 1;
15028 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15029 options -= XML_PARSE_NOENT;
15030 ctxt->options |= XML_PARSE_NOENT;
15031 } else
15032 ctxt->replaceEntities = 0;
15033 if (options & XML_PARSE_PEDANTIC) {
15034 ctxt->pedantic = 1;
15035 options -= XML_PARSE_PEDANTIC;
15036 ctxt->options |= XML_PARSE_PEDANTIC;
15037 } else
15038 ctxt->pedantic = 0;
15039 if (options & XML_PARSE_NOBLANKS) {
15040 ctxt->keepBlanks = 0;
15041 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15042 options -= XML_PARSE_NOBLANKS;
15043 ctxt->options |= XML_PARSE_NOBLANKS;
15044 } else
15045 ctxt->keepBlanks = 1;
15046 if (options & XML_PARSE_DTDVALID) {
15047 ctxt->validate = 1;
15048 if (options & XML_PARSE_NOWARNING)
15049 ctxt->vctxt.warning = NULL;
15050 if (options & XML_PARSE_NOERROR)
15051 ctxt->vctxt.error = NULL;
15052 options -= XML_PARSE_DTDVALID;
15053 ctxt->options |= XML_PARSE_DTDVALID;
15054 } else
15055 ctxt->validate = 0;
15056 if (options & XML_PARSE_NOWARNING) {
15057 ctxt->sax->warning = NULL;
15058 options -= XML_PARSE_NOWARNING;
15060 if (options & XML_PARSE_NOERROR) {
15061 ctxt->sax->error = NULL;
15062 ctxt->sax->fatalError = NULL;
15063 options -= XML_PARSE_NOERROR;
15065 #ifdef LIBXML_SAX1_ENABLED
15066 if (options & XML_PARSE_SAX1) {
15067 ctxt->sax->startElement = xmlSAX2StartElement;
15068 ctxt->sax->endElement = xmlSAX2EndElement;
15069 ctxt->sax->startElementNs = NULL;
15070 ctxt->sax->endElementNs = NULL;
15071 ctxt->sax->initialized = 1;
15072 options -= XML_PARSE_SAX1;
15073 ctxt->options |= XML_PARSE_SAX1;
15075 #endif /* LIBXML_SAX1_ENABLED */
15076 if (options & XML_PARSE_NODICT) {
15077 ctxt->dictNames = 0;
15078 options -= XML_PARSE_NODICT;
15079 ctxt->options |= XML_PARSE_NODICT;
15080 } else {
15081 ctxt->dictNames = 1;
15083 if (options & XML_PARSE_NOCDATA) {
15084 ctxt->sax->cdataBlock = NULL;
15085 options -= XML_PARSE_NOCDATA;
15086 ctxt->options |= XML_PARSE_NOCDATA;
15088 if (options & XML_PARSE_NSCLEAN) {
15089 ctxt->options |= XML_PARSE_NSCLEAN;
15090 options -= XML_PARSE_NSCLEAN;
15092 if (options & XML_PARSE_NONET) {
15093 ctxt->options |= XML_PARSE_NONET;
15094 options -= XML_PARSE_NONET;
15096 if (options & XML_PARSE_COMPACT) {
15097 ctxt->options |= XML_PARSE_COMPACT;
15098 options -= XML_PARSE_COMPACT;
15100 if (options & XML_PARSE_OLD10) {
15101 ctxt->options |= XML_PARSE_OLD10;
15102 options -= XML_PARSE_OLD10;
15104 if (options & XML_PARSE_NOBASEFIX) {
15105 ctxt->options |= XML_PARSE_NOBASEFIX;
15106 options -= XML_PARSE_NOBASEFIX;
15108 if (options & XML_PARSE_HUGE) {
15109 ctxt->options |= XML_PARSE_HUGE;
15110 options -= XML_PARSE_HUGE;
15111 if (ctxt->dict != NULL)
15112 xmlDictSetLimit(ctxt->dict, 0);
15114 if (options & XML_PARSE_OLDSAX) {
15115 ctxt->options |= XML_PARSE_OLDSAX;
15116 options -= XML_PARSE_OLDSAX;
15118 if (options & XML_PARSE_IGNORE_ENC) {
15119 ctxt->options |= XML_PARSE_IGNORE_ENC;
15120 options -= XML_PARSE_IGNORE_ENC;
15122 if (options & XML_PARSE_BIG_LINES) {
15123 ctxt->options |= XML_PARSE_BIG_LINES;
15124 options -= XML_PARSE_BIG_LINES;
15126 ctxt->linenumbers = 1;
15127 return (options);
15131 * xmlCtxtUseOptions:
15132 * @ctxt: an XML parser context
15133 * @options: a combination of xmlParserOption
15135 * Applies the options to the parser context
15137 * Returns 0 in case of success, the set of unknown or unimplemented options
15138 * in case of error.
15141 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15143 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15147 * xmlDoRead:
15148 * @ctxt: an XML parser context
15149 * @URL: the base URL to use for the document
15150 * @encoding: the document encoding, or NULL
15151 * @options: a combination of xmlParserOption
15152 * @reuse: keep the context for reuse
15154 * Common front-end for the xmlRead functions
15156 * Returns the resulting document tree or NULL
15158 static xmlDocPtr
15159 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15160 int options, int reuse)
15162 xmlDocPtr ret;
15164 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15165 if (encoding != NULL) {
15166 xmlCharEncodingHandlerPtr hdlr;
15168 hdlr = xmlFindCharEncodingHandler(encoding);
15169 if (hdlr != NULL)
15170 xmlSwitchToEncoding(ctxt, hdlr);
15172 if ((URL != NULL) && (ctxt->input != NULL) &&
15173 (ctxt->input->filename == NULL))
15174 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15175 xmlParseDocument(ctxt);
15176 if ((ctxt->wellFormed) || ctxt->recovery)
15177 ret = ctxt->myDoc;
15178 else {
15179 ret = NULL;
15180 if (ctxt->myDoc != NULL) {
15181 xmlFreeDoc(ctxt->myDoc);
15184 ctxt->myDoc = NULL;
15185 if (!reuse) {
15186 xmlFreeParserCtxt(ctxt);
15189 return (ret);
15193 * xmlReadDoc:
15194 * @cur: a pointer to a zero terminated string
15195 * @URL: the base URL to use for the document
15196 * @encoding: the document encoding, or NULL
15197 * @options: a combination of xmlParserOption
15199 * parse an XML in-memory document and build a tree.
15201 * Returns the resulting document tree
15203 xmlDocPtr
15204 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15206 xmlParserCtxtPtr ctxt;
15208 if (cur == NULL)
15209 return (NULL);
15210 xmlInitParser();
15212 ctxt = xmlCreateDocParserCtxt(cur);
15213 if (ctxt == NULL)
15214 return (NULL);
15215 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15219 * xmlReadFile:
15220 * @filename: a file or URL
15221 * @encoding: the document encoding, or NULL
15222 * @options: a combination of xmlParserOption
15224 * parse an XML file from the filesystem or the network.
15226 * Returns the resulting document tree
15228 xmlDocPtr
15229 xmlReadFile(const char *filename, const char *encoding, int options)
15231 xmlParserCtxtPtr ctxt;
15233 xmlInitParser();
15234 ctxt = xmlCreateURLParserCtxt(filename, options);
15235 if (ctxt == NULL)
15236 return (NULL);
15237 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15241 * xmlReadMemory:
15242 * @buffer: a pointer to a char array
15243 * @size: the size of the array
15244 * @URL: the base URL to use for the document
15245 * @encoding: the document encoding, or NULL
15246 * @options: a combination of xmlParserOption
15248 * parse an XML in-memory document and build a tree.
15250 * Returns the resulting document tree
15252 xmlDocPtr
15253 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15255 xmlParserCtxtPtr ctxt;
15257 xmlInitParser();
15258 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15259 if (ctxt == NULL)
15260 return (NULL);
15261 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15265 * xmlReadFd:
15266 * @fd: an open file descriptor
15267 * @URL: the base URL to use for the document
15268 * @encoding: the document encoding, or NULL
15269 * @options: a combination of xmlParserOption
15271 * parse an XML from a file descriptor and build a tree.
15272 * NOTE that the file descriptor will not be closed when the
15273 * reader is closed or reset.
15275 * Returns the resulting document tree
15277 xmlDocPtr
15278 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15280 xmlParserCtxtPtr ctxt;
15281 xmlParserInputBufferPtr input;
15282 xmlParserInputPtr stream;
15284 if (fd < 0)
15285 return (NULL);
15286 xmlInitParser();
15288 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15289 if (input == NULL)
15290 return (NULL);
15291 input->closecallback = NULL;
15292 ctxt = xmlNewParserCtxt();
15293 if (ctxt == NULL) {
15294 xmlFreeParserInputBuffer(input);
15295 return (NULL);
15297 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15298 if (stream == NULL) {
15299 xmlFreeParserInputBuffer(input);
15300 xmlFreeParserCtxt(ctxt);
15301 return (NULL);
15303 inputPush(ctxt, stream);
15304 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15308 * xmlReadIO:
15309 * @ioread: an I/O read function
15310 * @ioclose: an I/O close function
15311 * @ioctx: an I/O handler
15312 * @URL: the base URL to use for the document
15313 * @encoding: the document encoding, or NULL
15314 * @options: a combination of xmlParserOption
15316 * parse an XML document from I/O functions and source and build a tree.
15318 * Returns the resulting document tree
15320 xmlDocPtr
15321 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15322 void *ioctx, const char *URL, const char *encoding, int options)
15324 xmlParserCtxtPtr ctxt;
15325 xmlParserInputBufferPtr input;
15326 xmlParserInputPtr stream;
15328 if (ioread == NULL)
15329 return (NULL);
15330 xmlInitParser();
15332 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15333 XML_CHAR_ENCODING_NONE);
15334 if (input == NULL) {
15335 if (ioclose != NULL)
15336 ioclose(ioctx);
15337 return (NULL);
15339 ctxt = xmlNewParserCtxt();
15340 if (ctxt == NULL) {
15341 xmlFreeParserInputBuffer(input);
15342 return (NULL);
15344 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15345 if (stream == NULL) {
15346 xmlFreeParserInputBuffer(input);
15347 xmlFreeParserCtxt(ctxt);
15348 return (NULL);
15350 inputPush(ctxt, stream);
15351 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15355 * xmlCtxtReadDoc:
15356 * @ctxt: an XML parser context
15357 * @cur: a pointer to a zero terminated string
15358 * @URL: the base URL to use for the document
15359 * @encoding: the document encoding, or NULL
15360 * @options: a combination of xmlParserOption
15362 * parse an XML in-memory document and build a tree.
15363 * This reuses the existing @ctxt parser context
15365 * Returns the resulting document tree
15367 xmlDocPtr
15368 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15369 const char *URL, const char *encoding, int options)
15371 if (cur == NULL)
15372 return (NULL);
15373 return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15374 encoding, options));
15378 * xmlCtxtReadFile:
15379 * @ctxt: an XML parser context
15380 * @filename: a file or URL
15381 * @encoding: the document encoding, or NULL
15382 * @options: a combination of xmlParserOption
15384 * parse an XML file from the filesystem or the network.
15385 * This reuses the existing @ctxt parser context
15387 * Returns the resulting document tree
15389 xmlDocPtr
15390 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15391 const char *encoding, int options)
15393 xmlParserInputPtr stream;
15395 if (filename == NULL)
15396 return (NULL);
15397 if (ctxt == NULL)
15398 return (NULL);
15399 xmlInitParser();
15401 xmlCtxtReset(ctxt);
15403 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15404 if (stream == NULL) {
15405 return (NULL);
15407 inputPush(ctxt, stream);
15408 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15412 * xmlCtxtReadMemory:
15413 * @ctxt: an XML parser context
15414 * @buffer: a pointer to a char array
15415 * @size: the size of the array
15416 * @URL: the base URL to use for the document
15417 * @encoding: the document encoding, or NULL
15418 * @options: a combination of xmlParserOption
15420 * parse an XML in-memory document and build a tree.
15421 * This reuses the existing @ctxt parser context
15423 * Returns the resulting document tree
15425 xmlDocPtr
15426 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15427 const char *URL, const char *encoding, int options)
15429 xmlParserInputBufferPtr input;
15430 xmlParserInputPtr stream;
15432 if (ctxt == NULL)
15433 return (NULL);
15434 if (buffer == NULL)
15435 return (NULL);
15436 xmlInitParser();
15438 xmlCtxtReset(ctxt);
15440 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15441 if (input == NULL) {
15442 return(NULL);
15445 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15446 if (stream == NULL) {
15447 xmlFreeParserInputBuffer(input);
15448 return(NULL);
15451 inputPush(ctxt, stream);
15452 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15456 * xmlCtxtReadFd:
15457 * @ctxt: an XML parser context
15458 * @fd: an open file descriptor
15459 * @URL: the base URL to use for the document
15460 * @encoding: the document encoding, or NULL
15461 * @options: a combination of xmlParserOption
15463 * parse an XML from a file descriptor and build a tree.
15464 * This reuses the existing @ctxt parser context
15465 * NOTE that the file descriptor will not be closed when the
15466 * reader is closed or reset.
15468 * Returns the resulting document tree
15470 xmlDocPtr
15471 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15472 const char *URL, const char *encoding, int options)
15474 xmlParserInputBufferPtr input;
15475 xmlParserInputPtr stream;
15477 if (fd < 0)
15478 return (NULL);
15479 if (ctxt == NULL)
15480 return (NULL);
15481 xmlInitParser();
15483 xmlCtxtReset(ctxt);
15486 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15487 if (input == NULL)
15488 return (NULL);
15489 input->closecallback = NULL;
15490 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15491 if (stream == NULL) {
15492 xmlFreeParserInputBuffer(input);
15493 return (NULL);
15495 inputPush(ctxt, stream);
15496 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15500 * xmlCtxtReadIO:
15501 * @ctxt: an XML parser context
15502 * @ioread: an I/O read function
15503 * @ioclose: an I/O close function
15504 * @ioctx: an I/O handler
15505 * @URL: the base URL to use for the document
15506 * @encoding: the document encoding, or NULL
15507 * @options: a combination of xmlParserOption
15509 * parse an XML document from I/O functions and source and build a tree.
15510 * This reuses the existing @ctxt parser context
15512 * Returns the resulting document tree
15514 xmlDocPtr
15515 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15516 xmlInputCloseCallback ioclose, void *ioctx,
15517 const char *URL,
15518 const char *encoding, int options)
15520 xmlParserInputBufferPtr input;
15521 xmlParserInputPtr stream;
15523 if (ioread == NULL)
15524 return (NULL);
15525 if (ctxt == NULL)
15526 return (NULL);
15527 xmlInitParser();
15529 xmlCtxtReset(ctxt);
15531 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15532 XML_CHAR_ENCODING_NONE);
15533 if (input == NULL) {
15534 if (ioclose != NULL)
15535 ioclose(ioctx);
15536 return (NULL);
15538 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15539 if (stream == NULL) {
15540 xmlFreeParserInputBuffer(input);
15541 return (NULL);
15543 inputPush(ctxt, stream);
15544 return (xmlDoRead(ctxt, URL, encoding, options, 1));