2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
33 /* To avoid EBCDIC trouble when parsing on zOS */
35 #pragma convert("ISO8859-1")
42 #define XML_DIR_SEP '\\'
44 #define XML_DIR_SEP '/'
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/valid.h>
61 #include <libxml/entities.h>
62 #include <libxml/xmlerror.h>
63 #include <libxml/encoding.h>
64 #include <libxml/xmlIO.h>
65 #include <libxml/uri.h>
66 #ifdef LIBXML_CATALOG_ENABLED
67 #include <libxml/catalog.h>
69 #ifdef LIBXML_SCHEMAS_ENABLED
70 #include <libxml/xmlschemastypes.h>
71 #include <libxml/relaxng.h>
78 const xmlChar
*prefix
;
85 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
);
87 static xmlParserCtxtPtr
88 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
89 const xmlChar
*base
, xmlParserCtxtPtr pctx
);
91 static void xmlHaltParser(xmlParserCtxtPtr ctxt
);
94 xmlParseElementStart(xmlParserCtxtPtr ctxt
);
97 xmlParseElementEnd(xmlParserCtxtPtr ctxt
);
99 /************************************************************************
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
103 ************************************************************************/
105 #define XML_MAX_HUGE_LENGTH 1000000000
107 #define XML_PARSER_BIG_ENTITY 1000
108 #define XML_PARSER_LOT_ENTITY 5000
111 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
112 * replacement over the size in byte of the input indicates that you have
113 * and exponential behaviour. A value of 10 correspond to at least 3 entity
114 * replacement per byte of input.
116 #define XML_PARSER_NON_LINEAR 10
119 * xmlParserEntityCheck
121 * Function to check non-linear entity expansion behaviour
122 * This is here to detect and stop exponential linear entity expansion
123 * This is not a limitation of the parser but a safety
124 * boundary feature. It can be disabled with the XML_PARSE_HUGE
128 xmlParserEntityCheck(xmlParserCtxtPtr ctxt
, size_t size
,
129 xmlEntityPtr ent
, size_t replacement
)
134 if ((ctxt
== NULL
) || (ctxt
->options
& XML_PARSE_HUGE
))
136 if (ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
)
140 * This may look absurd but is needed to detect
143 if ((ent
!= NULL
) && (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
144 (ent
->content
!= NULL
) && (ent
->checked
== 0) &&
145 (ctxt
->errNo
!= XML_ERR_ENTITY_LOOP
)) {
146 unsigned long oldnbent
= ctxt
->nbentities
, diff
;
152 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
153 XML_SUBSTITUTE_REF
, 0, 0, 0);
155 if ((rep
== NULL
) || (ctxt
->errNo
== XML_ERR_ENTITY_LOOP
)) {
159 diff
= ctxt
->nbentities
- oldnbent
+ 1;
160 if (diff
> INT_MAX
/ 2)
162 ent
->checked
= diff
* 2;
164 if (xmlStrchr(rep
, '<'))
172 * Prevent entity exponential check, not just replacement while
174 * The check is potentially costly so do that only once in a thousand
176 if ((ctxt
->instate
== XML_PARSER_DTD
) && (ctxt
->nbentities
> 10000) &&
177 (ctxt
->nbentities
% 1024 == 0)) {
178 for (i
= 0;i
< ctxt
->inputNr
;i
++) {
179 consumed
+= ctxt
->inputTab
[i
]->consumed
+
180 (ctxt
->inputTab
[i
]->cur
- ctxt
->inputTab
[i
]->base
);
182 if (ctxt
->nbentities
> consumed
* XML_PARSER_NON_LINEAR
) {
183 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
184 ctxt
->instate
= XML_PARSER_EOF
;
192 if (replacement
!= 0) {
193 if (replacement
< XML_MAX_TEXT_LENGTH
)
197 * If the volume of entity copy reaches 10 times the
198 * amount of parsed data and over the large text threshold
199 * then that's very likely to be an abuse.
201 if (ctxt
->input
!= NULL
) {
202 consumed
= ctxt
->input
->consumed
+
203 (ctxt
->input
->cur
- ctxt
->input
->base
);
205 consumed
+= ctxt
->sizeentities
;
207 if (replacement
< XML_PARSER_NON_LINEAR
* consumed
)
209 } else if (size
!= 0) {
211 * Do the check based on the replacement size of the entity
213 if (size
< XML_PARSER_BIG_ENTITY
)
217 * A limit on the amount of text data reasonably used
219 if (ctxt
->input
!= NULL
) {
220 consumed
= ctxt
->input
->consumed
+
221 (ctxt
->input
->cur
- ctxt
->input
->base
);
223 consumed
+= ctxt
->sizeentities
;
225 if ((size
< XML_PARSER_NON_LINEAR
* consumed
) &&
226 (ctxt
->nbentities
* 3 < XML_PARSER_NON_LINEAR
* consumed
))
228 } else if (ent
!= NULL
) {
230 * use the number of parsed entities in the replacement
232 size
= ent
->checked
/ 2;
235 * The amount of data parsed counting entities size only once
237 if (ctxt
->input
!= NULL
) {
238 consumed
= ctxt
->input
->consumed
+
239 (ctxt
->input
->cur
- ctxt
->input
->base
);
241 consumed
+= ctxt
->sizeentities
;
244 * Check the density of entities for the amount of data
245 * knowing an entity reference will take at least 3 bytes
247 if (size
* 3 < consumed
* XML_PARSER_NON_LINEAR
)
251 * strange we got no data for checking
253 if (((ctxt
->lastError
.code
!= XML_ERR_UNDECLARED_ENTITY
) &&
254 (ctxt
->lastError
.code
!= XML_WAR_UNDECLARED_ENTITY
)) ||
255 (ctxt
->nbentities
<= 10000))
258 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
265 * arbitrary depth limit for the XML documents that we allow to
266 * process. This is not a limitation of the parser but a safety
267 * boundary feature. It can be disabled with the XML_PARSE_HUGE
270 unsigned int xmlParserMaxDepth
= 256;
275 #define XML_PARSER_BIG_BUFFER_SIZE 300
276 #define XML_PARSER_BUFFER_SIZE 100
277 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
280 * XML_PARSER_CHUNK_SIZE
282 * When calling GROW that's the minimal amount of data
283 * the parser expected to have received. It is not a hard
284 * limit but an optimization when reading strings like Names
285 * It is not strictly needed as long as inputs available characters
286 * are followed by 0, which should be provided by the I/O level
288 #define XML_PARSER_CHUNK_SIZE 100
291 * List of XML prefixed PI allowed by W3C specs
294 static const char* const xmlW3CPIs
[] = {
301 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
302 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt
,
303 const xmlChar
**str
);
305 static xmlParserErrors
306 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
307 xmlSAXHandlerPtr sax
,
308 void *user_data
, int depth
, const xmlChar
*URL
,
309 const xmlChar
*ID
, xmlNodePtr
*list
);
312 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
,
313 const char *encoding
);
314 #ifdef LIBXML_LEGACY_ENABLED
316 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
317 xmlNodePtr lastNode
);
318 #endif /* LIBXML_LEGACY_ENABLED */
320 static xmlParserErrors
321 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
322 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
);
325 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
);
327 /************************************************************************
329 * Some factorized error routines *
331 ************************************************************************/
334 * xmlErrAttributeDup:
335 * @ctxt: an XML parser context
336 * @prefix: the attribute prefix
337 * @localname: the attribute localname
339 * Handle a redefinition of attribute error
342 xmlErrAttributeDup(xmlParserCtxtPtr ctxt
, const xmlChar
* prefix
,
343 const xmlChar
* localname
)
345 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
346 (ctxt
->instate
== XML_PARSER_EOF
))
349 ctxt
->errNo
= XML_ERR_ATTRIBUTE_REDEFINED
;
352 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
353 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
354 (const char *) localname
, NULL
, NULL
, 0, 0,
355 "Attribute %s redefined\n", localname
);
357 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
358 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
359 (const char *) prefix
, (const char *) localname
,
360 NULL
, 0, 0, "Attribute %s:%s redefined\n", prefix
,
363 ctxt
->wellFormed
= 0;
364 if (ctxt
->recovery
== 0)
365 ctxt
->disableSAX
= 1;
371 * @ctxt: an XML parser context
372 * @error: the error number
373 * @extra: extra information string
375 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
)
382 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
383 (ctxt
->instate
== XML_PARSER_EOF
))
386 case XML_ERR_INVALID_HEX_CHARREF
:
387 errmsg
= "CharRef: invalid hexadecimal value";
389 case XML_ERR_INVALID_DEC_CHARREF
:
390 errmsg
= "CharRef: invalid decimal value";
392 case XML_ERR_INVALID_CHARREF
:
393 errmsg
= "CharRef: invalid value";
395 case XML_ERR_INTERNAL_ERROR
:
396 errmsg
= "internal error";
398 case XML_ERR_PEREF_AT_EOF
:
399 errmsg
= "PEReference at end of document";
401 case XML_ERR_PEREF_IN_PROLOG
:
402 errmsg
= "PEReference in prolog";
404 case XML_ERR_PEREF_IN_EPILOG
:
405 errmsg
= "PEReference in epilog";
407 case XML_ERR_PEREF_NO_NAME
:
408 errmsg
= "PEReference: no name";
410 case XML_ERR_PEREF_SEMICOL_MISSING
:
411 errmsg
= "PEReference: expecting ';'";
413 case XML_ERR_ENTITY_LOOP
:
414 errmsg
= "Detected an entity reference loop";
416 case XML_ERR_ENTITY_NOT_STARTED
:
417 errmsg
= "EntityValue: \" or ' expected";
419 case XML_ERR_ENTITY_PE_INTERNAL
:
420 errmsg
= "PEReferences forbidden in internal subset";
422 case XML_ERR_ENTITY_NOT_FINISHED
:
423 errmsg
= "EntityValue: \" or ' expected";
425 case XML_ERR_ATTRIBUTE_NOT_STARTED
:
426 errmsg
= "AttValue: \" or ' expected";
428 case XML_ERR_LT_IN_ATTRIBUTE
:
429 errmsg
= "Unescaped '<' not allowed in attributes values";
431 case XML_ERR_LITERAL_NOT_STARTED
:
432 errmsg
= "SystemLiteral \" or ' expected";
434 case XML_ERR_LITERAL_NOT_FINISHED
:
435 errmsg
= "Unfinished System or Public ID \" or ' expected";
437 case XML_ERR_MISPLACED_CDATA_END
:
438 errmsg
= "Sequence ']]>' not allowed in content";
440 case XML_ERR_URI_REQUIRED
:
441 errmsg
= "SYSTEM or PUBLIC, the URI is missing";
443 case XML_ERR_PUBID_REQUIRED
:
444 errmsg
= "PUBLIC, the Public Identifier is missing";
446 case XML_ERR_HYPHEN_IN_COMMENT
:
447 errmsg
= "Comment must not contain '--' (double-hyphen)";
449 case XML_ERR_PI_NOT_STARTED
:
450 errmsg
= "xmlParsePI : no target name";
452 case XML_ERR_RESERVED_XML_NAME
:
453 errmsg
= "Invalid PI name";
455 case XML_ERR_NOTATION_NOT_STARTED
:
456 errmsg
= "NOTATION: Name expected here";
458 case XML_ERR_NOTATION_NOT_FINISHED
:
459 errmsg
= "'>' required to close NOTATION declaration";
461 case XML_ERR_VALUE_REQUIRED
:
462 errmsg
= "Entity value required";
464 case XML_ERR_URI_FRAGMENT
:
465 errmsg
= "Fragment not allowed";
467 case XML_ERR_ATTLIST_NOT_STARTED
:
468 errmsg
= "'(' required to start ATTLIST enumeration";
470 case XML_ERR_NMTOKEN_REQUIRED
:
471 errmsg
= "NmToken expected in ATTLIST enumeration";
473 case XML_ERR_ATTLIST_NOT_FINISHED
:
474 errmsg
= "')' required to finish ATTLIST enumeration";
476 case XML_ERR_MIXED_NOT_STARTED
:
477 errmsg
= "MixedContentDecl : '|' or ')*' expected";
479 case XML_ERR_PCDATA_REQUIRED
:
480 errmsg
= "MixedContentDecl : '#PCDATA' expected";
482 case XML_ERR_ELEMCONTENT_NOT_STARTED
:
483 errmsg
= "ContentDecl : Name or '(' expected";
485 case XML_ERR_ELEMCONTENT_NOT_FINISHED
:
486 errmsg
= "ContentDecl : ',' '|' or ')' expected";
488 case XML_ERR_PEREF_IN_INT_SUBSET
:
490 "PEReference: forbidden within markup decl in internal subset";
492 case XML_ERR_GT_REQUIRED
:
493 errmsg
= "expected '>'";
495 case XML_ERR_CONDSEC_INVALID
:
496 errmsg
= "XML conditional section '[' expected";
498 case XML_ERR_EXT_SUBSET_NOT_FINISHED
:
499 errmsg
= "Content error in the external subset";
501 case XML_ERR_CONDSEC_INVALID_KEYWORD
:
503 "conditional section INCLUDE or IGNORE keyword expected";
505 case XML_ERR_CONDSEC_NOT_FINISHED
:
506 errmsg
= "XML conditional section not closed";
508 case XML_ERR_XMLDECL_NOT_STARTED
:
509 errmsg
= "Text declaration '<?xml' required";
511 case XML_ERR_XMLDECL_NOT_FINISHED
:
512 errmsg
= "parsing XML declaration: '?>' expected";
514 case XML_ERR_EXT_ENTITY_STANDALONE
:
515 errmsg
= "external parsed entities cannot be standalone";
517 case XML_ERR_ENTITYREF_SEMICOL_MISSING
:
518 errmsg
= "EntityRef: expecting ';'";
520 case XML_ERR_DOCTYPE_NOT_FINISHED
:
521 errmsg
= "DOCTYPE improperly terminated";
523 case XML_ERR_LTSLASH_REQUIRED
:
524 errmsg
= "EndTag: '</' not found";
526 case XML_ERR_EQUAL_REQUIRED
:
527 errmsg
= "expected '='";
529 case XML_ERR_STRING_NOT_CLOSED
:
530 errmsg
= "String not closed expecting \" or '";
532 case XML_ERR_STRING_NOT_STARTED
:
533 errmsg
= "String not started expecting ' or \"";
535 case XML_ERR_ENCODING_NAME
:
536 errmsg
= "Invalid XML encoding name";
538 case XML_ERR_STANDALONE_VALUE
:
539 errmsg
= "standalone accepts only 'yes' or 'no'";
541 case XML_ERR_DOCUMENT_EMPTY
:
542 errmsg
= "Document is empty";
544 case XML_ERR_DOCUMENT_END
:
545 errmsg
= "Extra content at the end of the document";
547 case XML_ERR_NOT_WELL_BALANCED
:
548 errmsg
= "chunk is not well balanced";
550 case XML_ERR_EXTRA_CONTENT
:
551 errmsg
= "extra content at the end of well balanced chunk";
553 case XML_ERR_VERSION_MISSING
:
554 errmsg
= "Malformed declaration expecting version";
556 case XML_ERR_NAME_TOO_LONG
:
557 errmsg
= "Name too long";
565 errmsg
= "Unregistered error message";
570 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
571 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, "%s\n",
574 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
575 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, "%s: %s\n",
579 ctxt
->wellFormed
= 0;
580 if (ctxt
->recovery
== 0)
581 ctxt
->disableSAX
= 1;
587 * @ctxt: an XML parser context
588 * @error: the error number
589 * @msg: the error message
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 static void LIBXML_ATTR_FORMAT(3,0)
594 xmlFatalErrMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
597 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
598 (ctxt
->instate
== XML_PARSER_EOF
))
602 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
603 XML_ERR_FATAL
, NULL
, 0, NULL
, NULL
, NULL
, 0, 0, "%s", msg
);
605 ctxt
->wellFormed
= 0;
606 if (ctxt
->recovery
== 0)
607 ctxt
->disableSAX
= 1;
613 * @ctxt: an XML parser context
614 * @error: the error number
615 * @msg: the error message
621 static void LIBXML_ATTR_FORMAT(3,0)
622 xmlWarningMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
623 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
625 xmlStructuredErrorFunc schannel
= NULL
;
627 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
628 (ctxt
->instate
== XML_PARSER_EOF
))
630 if ((ctxt
!= NULL
) && (ctxt
->sax
!= NULL
) &&
631 (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
632 schannel
= ctxt
->sax
->serror
;
634 __xmlRaiseError(schannel
,
635 (ctxt
->sax
) ? ctxt
->sax
->warning
: NULL
,
637 ctxt
, NULL
, XML_FROM_PARSER
, error
,
638 XML_ERR_WARNING
, NULL
, 0,
639 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
640 msg
, (const char *) str1
, (const char *) str2
);
642 __xmlRaiseError(schannel
, NULL
, NULL
,
643 ctxt
, NULL
, XML_FROM_PARSER
, error
,
644 XML_ERR_WARNING
, NULL
, 0,
645 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
646 msg
, (const char *) str1
, (const char *) str2
);
652 * @ctxt: an XML parser context
653 * @error: the error number
654 * @msg: the error message
657 * Handle a validity error.
659 static void LIBXML_ATTR_FORMAT(3,0)
660 xmlValidityError(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
661 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
663 xmlStructuredErrorFunc schannel
= NULL
;
665 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
666 (ctxt
->instate
== XML_PARSER_EOF
))
670 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
671 schannel
= ctxt
->sax
->serror
;
674 __xmlRaiseError(schannel
,
675 ctxt
->vctxt
.error
, ctxt
->vctxt
.userData
,
676 ctxt
, NULL
, XML_FROM_DTD
, error
,
677 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
678 (const char *) str2
, NULL
, 0, 0,
679 msg
, (const char *) str1
, (const char *) str2
);
682 __xmlRaiseError(schannel
, NULL
, NULL
,
683 ctxt
, NULL
, XML_FROM_DTD
, error
,
684 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
685 (const char *) str2
, NULL
, 0, 0,
686 msg
, (const char *) str1
, (const char *) str2
);
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @val: an integer value
697 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
699 static void LIBXML_ATTR_FORMAT(3,0)
700 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
701 const char *msg
, int val
)
703 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
704 (ctxt
->instate
== XML_PARSER_EOF
))
708 __xmlRaiseError(NULL
, NULL
, NULL
,
709 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
710 NULL
, 0, NULL
, NULL
, NULL
, val
, 0, msg
, val
);
712 ctxt
->wellFormed
= 0;
713 if (ctxt
->recovery
== 0)
714 ctxt
->disableSAX
= 1;
719 * xmlFatalErrMsgStrIntStr:
720 * @ctxt: an XML parser context
721 * @error: the error number
722 * @msg: the error message
723 * @str1: an string info
724 * @val: an integer value
725 * @str2: an string info
727 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
729 static void LIBXML_ATTR_FORMAT(3,0)
730 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
731 const char *msg
, const xmlChar
*str1
, int val
,
734 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
735 (ctxt
->instate
== XML_PARSER_EOF
))
739 __xmlRaiseError(NULL
, NULL
, NULL
,
740 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
741 NULL
, 0, (const char *) str1
, (const char *) str2
,
742 NULL
, val
, 0, msg
, str1
, val
, str2
);
744 ctxt
->wellFormed
= 0;
745 if (ctxt
->recovery
== 0)
746 ctxt
->disableSAX
= 1;
752 * @ctxt: an XML parser context
753 * @error: the error number
754 * @msg: the error message
755 * @val: a string value
757 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
759 static void LIBXML_ATTR_FORMAT(3,0)
760 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
761 const char *msg
, const xmlChar
* val
)
763 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
764 (ctxt
->instate
== XML_PARSER_EOF
))
768 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
769 XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
770 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
773 ctxt
->wellFormed
= 0;
774 if (ctxt
->recovery
== 0)
775 ctxt
->disableSAX
= 1;
781 * @ctxt: an XML parser context
782 * @error: the error number
783 * @msg: the error message
784 * @val: a string value
786 * Handle a non fatal parser error
788 static void LIBXML_ATTR_FORMAT(3,0)
789 xmlErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
790 const char *msg
, const xmlChar
* val
)
792 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
793 (ctxt
->instate
== XML_PARSER_EOF
))
797 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
798 XML_FROM_PARSER
, error
, XML_ERR_ERROR
,
799 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
805 * @ctxt: an XML parser context
806 * @error: the error number
808 * @info1: extra information string
809 * @info2: extra information string
811 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
813 static void LIBXML_ATTR_FORMAT(3,0)
814 xmlNsErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
816 const xmlChar
* info1
, const xmlChar
* info2
,
817 const xmlChar
* info3
)
819 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
820 (ctxt
->instate
== XML_PARSER_EOF
))
824 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
825 XML_ERR_ERROR
, NULL
, 0, (const char *) info1
,
826 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
827 info1
, info2
, info3
);
829 ctxt
->nsWellFormed
= 0;
834 * @ctxt: an XML parser context
835 * @error: the error number
837 * @info1: extra information string
838 * @info2: extra information string
840 * Handle a namespace warning error
842 static void LIBXML_ATTR_FORMAT(3,0)
843 xmlNsWarn(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
845 const xmlChar
* info1
, const xmlChar
* info2
,
846 const xmlChar
* info3
)
848 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
849 (ctxt
->instate
== XML_PARSER_EOF
))
851 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
852 XML_ERR_WARNING
, NULL
, 0, (const char *) info1
,
853 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
854 info1
, info2
, info3
);
857 /************************************************************************
859 * Library wide options *
861 ************************************************************************/
865 * @feature: the feature to be examined
867 * Examines if the library has been compiled with a given feature.
869 * Returns a non-zero value if the feature exist, otherwise zero.
870 * Returns zero (0) if the feature does not exist or an unknown
871 * unknown feature is requested, non-zero otherwise.
874 xmlHasFeature(xmlFeature feature
)
877 case XML_WITH_THREAD
:
878 #ifdef LIBXML_THREAD_ENABLED
884 #ifdef LIBXML_TREE_ENABLED
889 case XML_WITH_OUTPUT
:
890 #ifdef LIBXML_OUTPUT_ENABLED
896 #ifdef LIBXML_PUSH_ENABLED
901 case XML_WITH_READER
:
902 #ifdef LIBXML_READER_ENABLED
907 case XML_WITH_PATTERN
:
908 #ifdef LIBXML_PATTERN_ENABLED
913 case XML_WITH_WRITER
:
914 #ifdef LIBXML_WRITER_ENABLED
920 #ifdef LIBXML_SAX1_ENABLED
926 #ifdef LIBXML_FTP_ENABLED
932 #ifdef LIBXML_HTTP_ENABLED
938 #ifdef LIBXML_VALID_ENABLED
944 #ifdef LIBXML_HTML_ENABLED
949 case XML_WITH_LEGACY
:
950 #ifdef LIBXML_LEGACY_ENABLED
956 #ifdef LIBXML_C14N_ENABLED
961 case XML_WITH_CATALOG
:
962 #ifdef LIBXML_CATALOG_ENABLED
968 #ifdef LIBXML_XPATH_ENABLED
974 #ifdef LIBXML_XPTR_ENABLED
979 case XML_WITH_XINCLUDE
:
980 #ifdef LIBXML_XINCLUDE_ENABLED
986 #ifdef LIBXML_ICONV_ENABLED
991 case XML_WITH_ISO8859X
:
992 #ifdef LIBXML_ISO8859X_ENABLED
997 case XML_WITH_UNICODE
:
998 #ifdef LIBXML_UNICODE_ENABLED
1003 case XML_WITH_REGEXP
:
1004 #ifdef LIBXML_REGEXP_ENABLED
1009 case XML_WITH_AUTOMATA
:
1010 #ifdef LIBXML_AUTOMATA_ENABLED
1016 #ifdef LIBXML_EXPR_ENABLED
1021 case XML_WITH_SCHEMAS
:
1022 #ifdef LIBXML_SCHEMAS_ENABLED
1027 case XML_WITH_SCHEMATRON
:
1028 #ifdef LIBXML_SCHEMATRON_ENABLED
1033 case XML_WITH_MODULES
:
1034 #ifdef LIBXML_MODULES_ENABLED
1039 case XML_WITH_DEBUG
:
1040 #ifdef LIBXML_DEBUG_ENABLED
1045 case XML_WITH_DEBUG_MEM
:
1046 #ifdef DEBUG_MEMORY_LOCATION
1051 case XML_WITH_DEBUG_RUN
:
1052 #ifdef LIBXML_DEBUG_RUNTIME
1058 #ifdef LIBXML_ZLIB_ENABLED
1064 #ifdef LIBXML_LZMA_ENABLED
1070 #ifdef LIBXML_ICU_ENABLED
1081 /************************************************************************
1083 * SAX2 defaulted attributes handling *
1085 ************************************************************************/
1089 * @ctxt: an XML parser context
1091 * Do the SAX2 detection and specific initialization
1094 xmlDetectSAX2(xmlParserCtxtPtr ctxt
) {
1095 xmlSAXHandlerPtr sax
;
1097 /* Avoid unused variable warning if features are disabled. */
1100 if (ctxt
== NULL
) return;
1102 #ifdef LIBXML_SAX1_ENABLED
1103 if ((sax
) && (sax
->initialized
== XML_SAX2_MAGIC
) &&
1104 ((sax
->startElementNs
!= NULL
) ||
1105 (sax
->endElementNs
!= NULL
) ||
1106 ((sax
->startElement
== NULL
) && (sax
->endElement
== NULL
))))
1110 #endif /* LIBXML_SAX1_ENABLED */
1112 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
1113 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
1114 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
1115 if ((ctxt
->str_xml
==NULL
) || (ctxt
->str_xmlns
==NULL
) ||
1116 (ctxt
->str_xml_ns
== NULL
)) {
1117 xmlErrMemory(ctxt
, NULL
);
1121 typedef struct _xmlDefAttrs xmlDefAttrs
;
1122 typedef xmlDefAttrs
*xmlDefAttrsPtr
;
1123 struct _xmlDefAttrs
{
1124 int nbAttrs
; /* number of defaulted attributes on that element */
1125 int maxAttrs
; /* the size of the array */
1126 #if __STDC_VERSION__ >= 199901L
1127 /* Using a C99 flexible array member avoids UBSan errors. */
1128 const xmlChar
*values
[]; /* array of localname/prefix/values/external */
1130 const xmlChar
*values
[5];
1135 * xmlAttrNormalizeSpace:
1136 * @src: the source string
1137 * @dst: the target string
1139 * Normalize the space in non CDATA attribute values:
1140 * If the attribute type is not CDATA, then the XML processor MUST further
1141 * process the normalized attribute value by discarding any leading and
1142 * trailing space (#x20) characters, and by replacing sequences of space
1143 * (#x20) characters by a single space (#x20) character.
1144 * Note that the size of dst need to be at least src, and if one doesn't need
1145 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1146 * passing src as dst is just fine.
1148 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1152 xmlAttrNormalizeSpace(const xmlChar
*src
, xmlChar
*dst
)
1154 if ((src
== NULL
) || (dst
== NULL
))
1157 while (*src
== 0x20) src
++;
1160 while (*src
== 0x20) src
++;
1174 * xmlAttrNormalizeSpace2:
1175 * @src: the source string
1177 * Normalize the space in non CDATA attribute values, a slightly more complex
1178 * front end to avoid allocation problems when running on attribute values
1179 * coming from the input.
1181 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1184 static const xmlChar
*
1185 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt
, xmlChar
*src
, int *len
)
1188 int remove_head
= 0;
1189 int need_realloc
= 0;
1192 if ((ctxt
== NULL
) || (src
== NULL
) || (len
== NULL
))
1199 while (*cur
== 0x20) {
1206 if ((*cur
== 0x20) || (*cur
== 0)) {
1216 ret
= xmlStrndup(src
+ remove_head
, i
- remove_head
+ 1);
1218 xmlErrMemory(ctxt
, NULL
);
1221 xmlAttrNormalizeSpace(ret
, ret
);
1222 *len
= (int) strlen((const char *)ret
);
1224 } else if (remove_head
) {
1225 *len
-= remove_head
;
1226 memmove(src
, src
+ remove_head
, 1 + *len
);
1234 * @ctxt: an XML parser context
1235 * @fullname: the element fullname
1236 * @fullattr: the attribute fullname
1237 * @value: the attribute value
1239 * Add a defaulted attribute for an element
1242 xmlAddDefAttrs(xmlParserCtxtPtr ctxt
,
1243 const xmlChar
*fullname
,
1244 const xmlChar
*fullattr
,
1245 const xmlChar
*value
) {
1246 xmlDefAttrsPtr defaults
;
1248 const xmlChar
*name
;
1249 const xmlChar
*prefix
;
1252 * Allows to detect attribute redefinitions
1254 if (ctxt
->attsSpecial
!= NULL
) {
1255 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1259 if (ctxt
->attsDefault
== NULL
) {
1260 ctxt
->attsDefault
= xmlHashCreateDict(10, ctxt
->dict
);
1261 if (ctxt
->attsDefault
== NULL
)
1266 * split the element name into prefix:localname , the string found
1267 * are within the DTD and then not associated to namespace names.
1269 name
= xmlSplitQName3(fullname
, &len
);
1271 name
= xmlDictLookup(ctxt
->dict
, fullname
, -1);
1274 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1275 prefix
= xmlDictLookup(ctxt
->dict
, fullname
, len
);
1279 * make sure there is some storage
1281 defaults
= xmlHashLookup2(ctxt
->attsDefault
, name
, prefix
);
1282 if (defaults
== NULL
) {
1283 defaults
= (xmlDefAttrsPtr
) xmlMalloc(sizeof(xmlDefAttrs
) +
1284 (4 * 5) * sizeof(const xmlChar
*));
1285 if (defaults
== NULL
)
1287 defaults
->nbAttrs
= 0;
1288 defaults
->maxAttrs
= 4;
1289 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1290 defaults
, NULL
) < 0) {
1294 } else if (defaults
->nbAttrs
>= defaults
->maxAttrs
) {
1295 xmlDefAttrsPtr temp
;
1297 temp
= (xmlDefAttrsPtr
) xmlRealloc(defaults
, sizeof(xmlDefAttrs
) +
1298 (2 * defaults
->maxAttrs
* 5) * sizeof(const xmlChar
*));
1302 defaults
->maxAttrs
*= 2;
1303 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1304 defaults
, NULL
) < 0) {
1311 * Split the element name into prefix:localname , the string found
1312 * are within the DTD and hen not associated to namespace names.
1314 name
= xmlSplitQName3(fullattr
, &len
);
1316 name
= xmlDictLookup(ctxt
->dict
, fullattr
, -1);
1319 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1320 prefix
= xmlDictLookup(ctxt
->dict
, fullattr
, len
);
1323 defaults
->values
[5 * defaults
->nbAttrs
] = name
;
1324 defaults
->values
[5 * defaults
->nbAttrs
+ 1] = prefix
;
1325 /* intern the string and precompute the end */
1326 len
= xmlStrlen(value
);
1327 value
= xmlDictLookup(ctxt
->dict
, value
, len
);
1328 defaults
->values
[5 * defaults
->nbAttrs
+ 2] = value
;
1329 defaults
->values
[5 * defaults
->nbAttrs
+ 3] = value
+ len
;
1331 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = BAD_CAST
"external";
1333 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = NULL
;
1334 defaults
->nbAttrs
++;
1339 xmlErrMemory(ctxt
, NULL
);
1344 * xmlAddSpecialAttr:
1345 * @ctxt: an XML parser context
1346 * @fullname: the element fullname
1347 * @fullattr: the attribute fullname
1348 * @type: the attribute type
1350 * Register this attribute type
1353 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt
,
1354 const xmlChar
*fullname
,
1355 const xmlChar
*fullattr
,
1358 if (ctxt
->attsSpecial
== NULL
) {
1359 ctxt
->attsSpecial
= xmlHashCreateDict(10, ctxt
->dict
);
1360 if (ctxt
->attsSpecial
== NULL
)
1364 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1367 xmlHashAddEntry2(ctxt
->attsSpecial
, fullname
, fullattr
,
1368 (void *) (ptrdiff_t) type
);
1372 xmlErrMemory(ctxt
, NULL
);
1377 * xmlCleanSpecialAttrCallback:
1379 * Removes CDATA attributes from the special attribute table
1382 xmlCleanSpecialAttrCallback(void *payload
, void *data
,
1383 const xmlChar
*fullname
, const xmlChar
*fullattr
,
1384 const xmlChar
*unused ATTRIBUTE_UNUSED
) {
1385 xmlParserCtxtPtr ctxt
= (xmlParserCtxtPtr
) data
;
1387 if (((ptrdiff_t) payload
) == XML_ATTRIBUTE_CDATA
) {
1388 xmlHashRemoveEntry2(ctxt
->attsSpecial
, fullname
, fullattr
, NULL
);
1393 * xmlCleanSpecialAttr:
1394 * @ctxt: an XML parser context
1396 * Trim the list of attributes defined to remove all those of type
1397 * CDATA as they are not special. This call should be done when finishing
1398 * to parse the DTD and before starting to parse the document root.
1401 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt
)
1403 if (ctxt
->attsSpecial
== NULL
)
1406 xmlHashScanFull(ctxt
->attsSpecial
, xmlCleanSpecialAttrCallback
, ctxt
);
1408 if (xmlHashSize(ctxt
->attsSpecial
) == 0) {
1409 xmlHashFree(ctxt
->attsSpecial
, NULL
);
1410 ctxt
->attsSpecial
= NULL
;
1416 * xmlCheckLanguageID:
1417 * @lang: pointer to the string value
1419 * Checks that the value conforms to the LanguageID production:
1421 * NOTE: this is somewhat deprecated, those productions were removed from
1422 * the XML Second edition.
1424 * [33] LanguageID ::= Langcode ('-' Subcode)*
1425 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1426 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1427 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1428 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1429 * [38] Subcode ::= ([a-z] | [A-Z])+
1431 * The current REC reference the successors of RFC 1766, currently 5646
1433 * http://www.rfc-editor.org/rfc/rfc5646.txt
1434 * langtag = language
1440 * language = 2*3ALPHA ; shortest ISO 639 code
1441 * ["-" extlang] ; sometimes followed by
1442 * ; extended language subtags
1443 * / 4ALPHA ; or reserved for future use
1444 * / 5*8ALPHA ; or registered language subtag
1446 * extlang = 3ALPHA ; selected ISO 639 codes
1447 * *2("-" 3ALPHA) ; permanently reserved
1449 * script = 4ALPHA ; ISO 15924 code
1451 * region = 2ALPHA ; ISO 3166-1 code
1452 * / 3DIGIT ; UN M.49 code
1454 * variant = 5*8alphanum ; registered variants
1455 * / (DIGIT 3alphanum)
1457 * extension = singleton 1*("-" (2*8alphanum))
1459 * ; Single alphanumerics
1460 * ; "x" reserved for private use
1461 * singleton = DIGIT ; 0 - 9
1467 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1468 * The parser below doesn't try to cope with extension or privateuse
1469 * that could be added but that's not interoperable anyway
1471 * Returns 1 if correct 0 otherwise
1474 xmlCheckLanguageID(const xmlChar
* lang
)
1476 const xmlChar
*cur
= lang
, *nxt
;
1480 if (((cur
[0] == 'i') && (cur
[1] == '-')) ||
1481 ((cur
[0] == 'I') && (cur
[1] == '-')) ||
1482 ((cur
[0] == 'x') && (cur
[1] == '-')) ||
1483 ((cur
[0] == 'X') && (cur
[1] == '-'))) {
1485 * Still allow IANA code and user code which were coming
1486 * from the previous version of the XML-1.0 specification
1487 * it's deprecated but we should not fail
1490 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1491 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1493 return(cur
[0] == 0);
1496 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1497 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1499 if (nxt
- cur
>= 4) {
1503 if ((nxt
- cur
> 8) || (nxt
[0] != 0))
1509 /* we got an ISO 639 code */
1517 /* now we can have extlang or script or region or variant */
1518 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1521 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1522 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1528 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1532 /* we parsed an extlang */
1540 /* now we can have script or region or variant */
1541 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1544 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1545 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1549 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1553 /* we parsed a script */
1562 /* now we can have region or variant */
1563 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1566 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1567 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1570 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1574 /* we parsed a region */
1583 /* now we can just have a variant */
1584 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1585 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1588 if ((nxt
- cur
< 5) || (nxt
- cur
> 8))
1591 /* we parsed a variant */
1597 /* extensions and private use subtags not checked */
1601 if (((nxt
[1] >= '0') && (nxt
[1] <= '9')) &&
1602 ((nxt
[2] >= '0') && (nxt
[2] <= '9'))) {
1609 /************************************************************************
1611 * Parser stacks related functions and macros *
1613 ************************************************************************/
1615 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
,
1616 const xmlChar
** str
);
1621 * @ctxt: an XML parser context
1622 * @prefix: the namespace prefix or NULL
1623 * @URL: the namespace name
1625 * Pushes a new parser namespace on top of the ns stack
1627 * Returns -1 in case of error, -2 if the namespace should be discarded
1628 * and the index in the stack otherwise.
1631 nsPush(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
, const xmlChar
*URL
)
1633 if (ctxt
->options
& XML_PARSE_NSCLEAN
) {
1635 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-= 2) {
1636 if (ctxt
->nsTab
[i
] == prefix
) {
1638 if (ctxt
->nsTab
[i
+ 1] == URL
)
1640 /* out of scope keep it */
1645 if ((ctxt
->nsMax
== 0) || (ctxt
->nsTab
== NULL
)) {
1648 ctxt
->nsTab
= (const xmlChar
**)
1649 xmlMalloc(ctxt
->nsMax
* sizeof(xmlChar
*));
1650 if (ctxt
->nsTab
== NULL
) {
1651 xmlErrMemory(ctxt
, NULL
);
1655 } else if (ctxt
->nsNr
>= ctxt
->nsMax
) {
1656 const xmlChar
** tmp
;
1658 tmp
= (const xmlChar
**) xmlRealloc((char *) ctxt
->nsTab
,
1659 ctxt
->nsMax
* sizeof(ctxt
->nsTab
[0]));
1661 xmlErrMemory(ctxt
, NULL
);
1667 ctxt
->nsTab
[ctxt
->nsNr
++] = prefix
;
1668 ctxt
->nsTab
[ctxt
->nsNr
++] = URL
;
1669 return (ctxt
->nsNr
);
1673 * @ctxt: an XML parser context
1674 * @nr: the number to pop
1676 * Pops the top @nr parser prefix/namespace from the ns stack
1678 * Returns the number of namespaces removed
1681 nsPop(xmlParserCtxtPtr ctxt
, int nr
)
1685 if (ctxt
->nsTab
== NULL
) return(0);
1686 if (ctxt
->nsNr
< nr
) {
1687 xmlGenericError(xmlGenericErrorContext
, "Pbm popping %d NS\n", nr
);
1690 if (ctxt
->nsNr
<= 0)
1693 for (i
= 0;i
< nr
;i
++) {
1695 ctxt
->nsTab
[ctxt
->nsNr
] = NULL
;
1702 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt
, int nr
) {
1703 const xmlChar
**atts
;
1707 if (ctxt
->atts
== NULL
) {
1708 maxatts
= 55; /* allow for 10 attrs by default */
1709 atts
= (const xmlChar
**)
1710 xmlMalloc(maxatts
* sizeof(xmlChar
*));
1711 if (atts
== NULL
) goto mem_error
;
1713 attallocs
= (int *) xmlMalloc((maxatts
/ 5) * sizeof(int));
1714 if (attallocs
== NULL
) goto mem_error
;
1715 ctxt
->attallocs
= attallocs
;
1716 ctxt
->maxatts
= maxatts
;
1717 } else if (nr
+ 5 > ctxt
->maxatts
) {
1718 maxatts
= (nr
+ 5) * 2;
1719 atts
= (const xmlChar
**) xmlRealloc((void *) ctxt
->atts
,
1720 maxatts
* sizeof(const xmlChar
*));
1721 if (atts
== NULL
) goto mem_error
;
1723 attallocs
= (int *) xmlRealloc((void *) ctxt
->attallocs
,
1724 (maxatts
/ 5) * sizeof(int));
1725 if (attallocs
== NULL
) goto mem_error
;
1726 ctxt
->attallocs
= attallocs
;
1727 ctxt
->maxatts
= maxatts
;
1729 return(ctxt
->maxatts
);
1731 xmlErrMemory(ctxt
, NULL
);
1737 * @ctxt: an XML parser context
1738 * @value: the parser input
1740 * Pushes a new parser input on top of the input stack
1742 * Returns -1 in case of error, the index in the stack otherwise
1745 inputPush(xmlParserCtxtPtr ctxt
, xmlParserInputPtr value
)
1747 if ((ctxt
== NULL
) || (value
== NULL
))
1749 if (ctxt
->inputNr
>= ctxt
->inputMax
) {
1750 ctxt
->inputMax
*= 2;
1752 (xmlParserInputPtr
*) xmlRealloc(ctxt
->inputTab
,
1754 sizeof(ctxt
->inputTab
[0]));
1755 if (ctxt
->inputTab
== NULL
) {
1756 xmlErrMemory(ctxt
, NULL
);
1757 ctxt
->inputMax
/= 2;
1761 ctxt
->inputTab
[ctxt
->inputNr
] = value
;
1762 ctxt
->input
= value
;
1763 return (ctxt
->inputNr
++);
1767 * @ctxt: an XML parser context
1769 * Pops the top parser input from the input stack
1771 * Returns the input just removed
1774 inputPop(xmlParserCtxtPtr ctxt
)
1776 xmlParserInputPtr ret
;
1780 if (ctxt
->inputNr
<= 0)
1783 if (ctxt
->inputNr
> 0)
1784 ctxt
->input
= ctxt
->inputTab
[ctxt
->inputNr
- 1];
1787 ret
= ctxt
->inputTab
[ctxt
->inputNr
];
1788 ctxt
->inputTab
[ctxt
->inputNr
] = NULL
;
1793 * @ctxt: an XML parser context
1794 * @value: the element node
1796 * Pushes a new element node on top of the node stack
1798 * Returns -1 in case of error, the index in the stack otherwise
1801 nodePush(xmlParserCtxtPtr ctxt
, xmlNodePtr value
)
1803 if (ctxt
== NULL
) return(0);
1804 if (ctxt
->nodeNr
>= ctxt
->nodeMax
) {
1807 tmp
= (xmlNodePtr
*) xmlRealloc(ctxt
->nodeTab
,
1809 sizeof(ctxt
->nodeTab
[0]));
1811 xmlErrMemory(ctxt
, NULL
);
1814 ctxt
->nodeTab
= tmp
;
1817 if ((((unsigned int) ctxt
->nodeNr
) > xmlParserMaxDepth
) &&
1818 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
1819 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
1820 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1822 xmlHaltParser(ctxt
);
1825 ctxt
->nodeTab
[ctxt
->nodeNr
] = value
;
1827 return (ctxt
->nodeNr
++);
1832 * @ctxt: an XML parser context
1834 * Pops the top element node from the node stack
1836 * Returns the node just removed
1839 nodePop(xmlParserCtxtPtr ctxt
)
1843 if (ctxt
== NULL
) return(NULL
);
1844 if (ctxt
->nodeNr
<= 0)
1847 if (ctxt
->nodeNr
> 0)
1848 ctxt
->node
= ctxt
->nodeTab
[ctxt
->nodeNr
- 1];
1851 ret
= ctxt
->nodeTab
[ctxt
->nodeNr
];
1852 ctxt
->nodeTab
[ctxt
->nodeNr
] = NULL
;
1858 * @ctxt: an XML parser context
1859 * @value: the element name
1860 * @prefix: the element prefix
1861 * @URI: the element namespace name
1862 * @line: the current line number for error messages
1863 * @nsNr: the number of namespaces pushed on the namespace table
1865 * Pushes a new element name/prefix/URL on top of the name stack
1867 * Returns -1 in case of error, the index in the stack otherwise
1870 nameNsPush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
,
1871 const xmlChar
*prefix
, const xmlChar
*URI
, int line
, int nsNr
)
1875 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1876 const xmlChar
* *tmp
;
1879 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1881 sizeof(ctxt
->nameTab
[0]));
1886 ctxt
->nameTab
= tmp
;
1887 tmp2
= (xmlStartTag
*) xmlRealloc((void * *)ctxt
->pushTab
,
1889 sizeof(ctxt
->pushTab
[0]));
1894 ctxt
->pushTab
= tmp2
;
1895 } else if (ctxt
->pushTab
== NULL
) {
1896 ctxt
->pushTab
= (xmlStartTag
*) xmlMalloc(ctxt
->nameMax
*
1897 sizeof(ctxt
->pushTab
[0]));
1898 if (ctxt
->pushTab
== NULL
)
1901 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1903 tag
= &ctxt
->pushTab
[ctxt
->nameNr
];
1904 tag
->prefix
= prefix
;
1908 return (ctxt
->nameNr
++);
1910 xmlErrMemory(ctxt
, NULL
);
1913 #ifdef LIBXML_PUSH_ENABLED
1916 * @ctxt: an XML parser context
1918 * Pops the top element/prefix/URI name from the name stack
1920 * Returns the name just removed
1922 static const xmlChar
*
1923 nameNsPop(xmlParserCtxtPtr ctxt
)
1927 if (ctxt
->nameNr
<= 0)
1930 if (ctxt
->nameNr
> 0)
1931 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1934 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1935 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1938 #endif /* LIBXML_PUSH_ENABLED */
1942 * @ctxt: an XML parser context
1943 * @value: the element name
1945 * Pushes a new element name on top of the name stack
1947 * Returns -1 in case of error, the index in the stack otherwise
1950 namePush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
)
1952 if (ctxt
== NULL
) return (-1);
1954 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1955 const xmlChar
* *tmp
;
1956 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1958 sizeof(ctxt
->nameTab
[0]));
1962 ctxt
->nameTab
= tmp
;
1965 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1967 return (ctxt
->nameNr
++);
1969 xmlErrMemory(ctxt
, NULL
);
1974 * @ctxt: an XML parser context
1976 * Pops the top element name from the name stack
1978 * Returns the name just removed
1981 namePop(xmlParserCtxtPtr ctxt
)
1985 if ((ctxt
== NULL
) || (ctxt
->nameNr
<= 0))
1988 if (ctxt
->nameNr
> 0)
1989 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1992 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1993 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1997 static int spacePush(xmlParserCtxtPtr ctxt
, int val
) {
1998 if (ctxt
->spaceNr
>= ctxt
->spaceMax
) {
2001 ctxt
->spaceMax
*= 2;
2002 tmp
= (int *) xmlRealloc(ctxt
->spaceTab
,
2003 ctxt
->spaceMax
* sizeof(ctxt
->spaceTab
[0]));
2005 xmlErrMemory(ctxt
, NULL
);
2009 ctxt
->spaceTab
= tmp
;
2011 ctxt
->spaceTab
[ctxt
->spaceNr
] = val
;
2012 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
];
2013 return(ctxt
->spaceNr
++);
2016 static int spacePop(xmlParserCtxtPtr ctxt
) {
2018 if (ctxt
->spaceNr
<= 0) return(0);
2020 if (ctxt
->spaceNr
> 0)
2021 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
- 1];
2023 ctxt
->space
= &ctxt
->spaceTab
[0];
2024 ret
= ctxt
->spaceTab
[ctxt
->spaceNr
];
2025 ctxt
->spaceTab
[ctxt
->spaceNr
] = -1;
2030 * Macros for accessing the content. Those should be used only by the parser,
2033 * Dirty macros, i.e. one often need to make assumption on the context to
2036 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2037 * To be used with extreme caution since operations consuming
2038 * characters may move the input buffer to a different location !
2039 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2040 * This should be used internally by the parser
2041 * only to compare to ASCII values otherwise it would break when
2042 * running with UTF-8 encoding.
2043 * RAW same as CUR but in the input buffer, bypass any token
2044 * extraction that may have been done
2045 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2046 * to compare on ASCII based substring.
2047 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2048 * strings without newlines within the parser.
2049 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2050 * defined char within the parser.
2051 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2053 * NEXT Skip to the next character, this does the proper decoding
2054 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2055 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2056 * CUR_CHAR(l) returns the current unicode character (int), set l
2057 * to the number of xmlChars used for the encoding [0-5].
2058 * CUR_SCHAR same but operate on a string instead of the context
2059 * COPY_BUF copy the current unicode char to the target buffer, increment
2061 * GROW, SHRINK handling of input buffers
2064 #define RAW (*ctxt->input->cur)
2065 #define CUR (*ctxt->input->cur)
2066 #define NXT(val) ctxt->input->cur[(val)]
2067 #define CUR_PTR ctxt->input->cur
2068 #define BASE_PTR ctxt->input->base
2070 #define CMP4( s, c1, c2, c3, c4 ) \
2071 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2072 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2073 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2074 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2075 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2076 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2077 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2078 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2079 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2080 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2081 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2082 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2083 ((unsigned char *) s)[ 8 ] == c9 )
2084 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2085 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2086 ((unsigned char *) s)[ 9 ] == c10 )
2088 #define SKIP(val) do { \
2089 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2090 if (*ctxt->input->cur == 0) \
2091 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2094 #define SKIPL(val) do { \
2096 for(skipl=0; skipl<val; skipl++) { \
2097 if (*(ctxt->input->cur) == '\n') { \
2098 ctxt->input->line++; ctxt->input->col = 1; \
2099 } else ctxt->input->col++; \
2100 ctxt->input->cur++; \
2102 if (*ctxt->input->cur == 0) \
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2106 #define SHRINK if ((ctxt->progressive == 0) && \
2107 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2108 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2111 static void xmlSHRINK (xmlParserCtxtPtr ctxt
) {
2112 xmlParserInputShrink(ctxt
->input
);
2113 if (*ctxt
->input
->cur
== 0)
2114 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2117 #define GROW if ((ctxt->progressive == 0) && \
2118 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2121 static void xmlGROW (xmlParserCtxtPtr ctxt
) {
2122 ptrdiff_t curEnd
= ctxt
->input
->end
- ctxt
->input
->cur
;
2123 ptrdiff_t curBase
= ctxt
->input
->cur
- ctxt
->input
->base
;
2125 if (((curEnd
> XML_MAX_LOOKUP_LIMIT
) ||
2126 (curBase
> XML_MAX_LOOKUP_LIMIT
)) &&
2127 ((ctxt
->input
->buf
) &&
2128 (ctxt
->input
->buf
->readcallback
!= xmlInputReadCallbackNop
)) &&
2129 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
2130 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "Huge input lookup");
2131 xmlHaltParser(ctxt
);
2134 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2135 if ((ctxt
->input
->cur
> ctxt
->input
->end
) ||
2136 (ctxt
->input
->cur
< ctxt
->input
->base
)) {
2137 xmlHaltParser(ctxt
);
2138 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "cur index out of bound");
2141 if ((ctxt
->input
->cur
!= NULL
) && (*ctxt
->input
->cur
== 0))
2142 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2145 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2147 #define NEXT xmlNextChar(ctxt)
2150 ctxt->input->col++; \
2151 ctxt->input->cur++; \
2152 if (*ctxt->input->cur == 0) \
2153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2156 #define NEXTL(l) do { \
2157 if (*(ctxt->input->cur) == '\n') { \
2158 ctxt->input->line++; ctxt->input->col = 1; \
2159 } else ctxt->input->col++; \
2160 ctxt->input->cur += l; \
2163 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2164 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2166 #define COPY_BUF(l,b,i,v) \
2167 if (l == 1) b[i++] = (xmlChar) v; \
2168 else i += xmlCopyCharMultiByte(&b[i],v)
2170 #define CUR_CONSUMED \
2171 (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2174 * xmlSkipBlankChars:
2175 * @ctxt: the XML parser context
2177 * skip all blanks character found at that point in the input streams.
2178 * It pops up finished entities in the process if allowable at that point.
2180 * Returns the number of space chars skipped
2184 xmlSkipBlankChars(xmlParserCtxtPtr ctxt
) {
2188 * It's Okay to use CUR/NEXT here since all the blanks are on
2191 if (((ctxt
->inputNr
== 1) && (ctxt
->instate
!= XML_PARSER_DTD
)) ||
2192 (ctxt
->instate
== XML_PARSER_START
)) {
2195 * if we are in the document content, go really fast
2197 cur
= ctxt
->input
->cur
;
2198 while (IS_BLANK_CH(*cur
)) {
2200 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
2208 ctxt
->input
->cur
= cur
;
2209 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2210 cur
= ctxt
->input
->cur
;
2213 ctxt
->input
->cur
= cur
;
2215 int expandPE
= ((ctxt
->external
!= 0) || (ctxt
->inputNr
!= 1));
2218 if (IS_BLANK_CH(CUR
)) { /* CHECKED tstblanks.xml */
2220 } else if (CUR
== '%') {
2222 * Need to handle support of entities branching here
2224 if ((expandPE
== 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2226 xmlParsePEReference(ctxt
);
2227 } else if (CUR
== 0) {
2228 if (ctxt
->inputNr
<= 1)
2236 * Also increase the counter when entering or exiting a PERef.
2237 * The spec says: "When a parameter-entity reference is recognized
2238 * in the DTD and included, its replacement text MUST be enlarged
2239 * by the attachment of one leading and one following space (#x20)
2249 /************************************************************************
2251 * Commodity functions to handle entities *
2253 ************************************************************************/
2257 * @ctxt: an XML parser context
2259 * xmlPopInput: the current input pointed by ctxt->input came to an end
2260 * pop it and return the next char.
2262 * Returns the current xmlChar in the parser context
2265 xmlPopInput(xmlParserCtxtPtr ctxt
) {
2266 if ((ctxt
== NULL
) || (ctxt
->inputNr
<= 1)) return(0);
2267 if (xmlParserDebugEntities
)
2268 xmlGenericError(xmlGenericErrorContext
,
2269 "Popping input %d\n", ctxt
->inputNr
);
2270 if ((ctxt
->inputNr
> 1) && (ctxt
->inSubset
== 0) &&
2271 (ctxt
->instate
!= XML_PARSER_EOF
))
2272 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
2273 "Unfinished entity outside the DTD");
2274 xmlFreeInputStream(inputPop(ctxt
));
2275 if (*ctxt
->input
->cur
== 0)
2276 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2282 * @ctxt: an XML parser context
2283 * @input: an XML parser input fragment (entity, XML fragment ...).
2285 * xmlPushInput: switch to a new input stream which is stacked on top
2286 * of the previous one(s).
2287 * Returns -1 in case of error or the index in the input stack
2290 xmlPushInput(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
) {
2292 if (input
== NULL
) return(-1);
2294 if (xmlParserDebugEntities
) {
2295 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
2296 xmlGenericError(xmlGenericErrorContext
,
2297 "%s(%d): ", ctxt
->input
->filename
,
2299 xmlGenericError(xmlGenericErrorContext
,
2300 "Pushing input %d : %.30s\n", ctxt
->inputNr
+1, input
->cur
);
2302 if (((ctxt
->inputNr
> 40) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2303 (ctxt
->inputNr
> 1024)) {
2304 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2305 while (ctxt
->inputNr
> 1)
2306 xmlFreeInputStream(inputPop(ctxt
));
2309 ret
= inputPush(ctxt
, input
);
2310 if (ctxt
->instate
== XML_PARSER_EOF
)
2318 * @ctxt: an XML parser context
2320 * parse Reference declarations
2322 * [66] CharRef ::= '&#' [0-9]+ ';' |
2323 * '&#x' [0-9a-fA-F]+ ';'
2325 * [ WFC: Legal Character ]
2326 * Characters referred to using character references must match the
2327 * production for Char.
2329 * Returns the value parsed (as an int), 0 in case of error
2332 xmlParseCharRef(xmlParserCtxtPtr ctxt
) {
2337 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2339 if ((RAW
== '&') && (NXT(1) == '#') &&
2343 while (RAW
!= ';') { /* loop blocked by count */
2347 if (ctxt
->instate
== XML_PARSER_EOF
)
2350 if ((RAW
>= '0') && (RAW
<= '9'))
2351 val
= val
* 16 + (CUR
- '0');
2352 else if ((RAW
>= 'a') && (RAW
<= 'f') && (count
< 20))
2353 val
= val
* 16 + (CUR
- 'a') + 10;
2354 else if ((RAW
>= 'A') && (RAW
<= 'F') && (count
< 20))
2355 val
= val
* 16 + (CUR
- 'A') + 10;
2357 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2368 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2372 } else if ((RAW
== '&') && (NXT(1) == '#')) {
2375 while (RAW
!= ';') { /* loop blocked by count */
2379 if (ctxt
->instate
== XML_PARSER_EOF
)
2382 if ((RAW
>= '0') && (RAW
<= '9'))
2383 val
= val
* 10 + (CUR
- '0');
2385 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2396 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2401 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2405 * [ WFC: Legal Character ]
2406 * Characters referred to using character references must match the
2407 * production for Char.
2409 if (val
>= 0x110000) {
2410 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2411 "xmlParseCharRef: character reference out of bounds\n",
2413 } else if (IS_CHAR(val
)) {
2416 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2417 "xmlParseCharRef: invalid xmlChar value %d\n",
2424 * xmlParseStringCharRef:
2425 * @ctxt: an XML parser context
2426 * @str: a pointer to an index in the string
2428 * parse Reference declarations, variant parsing from a string rather
2429 * than an an input flow.
2431 * [66] CharRef ::= '&#' [0-9]+ ';' |
2432 * '&#x' [0-9a-fA-F]+ ';'
2434 * [ WFC: Legal Character ]
2435 * Characters referred to using character references must match the
2436 * production for Char.
2438 * Returns the value parsed (as an int), 0 in case of error, str will be
2439 * updated to the current value of the index
2442 xmlParseStringCharRef(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
2447 if ((str
== NULL
) || (*str
== NULL
)) return(0);
2450 if ((cur
== '&') && (ptr
[1] == '#') && (ptr
[2] == 'x')) {
2453 while (cur
!= ';') { /* Non input consuming loop */
2454 if ((cur
>= '0') && (cur
<= '9'))
2455 val
= val
* 16 + (cur
- '0');
2456 else if ((cur
>= 'a') && (cur
<= 'f'))
2457 val
= val
* 16 + (cur
- 'a') + 10;
2458 else if ((cur
>= 'A') && (cur
<= 'F'))
2459 val
= val
* 16 + (cur
- 'A') + 10;
2461 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2473 } else if ((cur
== '&') && (ptr
[1] == '#')){
2476 while (cur
!= ';') { /* Non input consuming loops */
2477 if ((cur
>= '0') && (cur
<= '9'))
2478 val
= val
* 10 + (cur
- '0');
2480 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2493 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2499 * [ WFC: Legal Character ]
2500 * Characters referred to using character references must match the
2501 * production for Char.
2503 if (val
>= 0x110000) {
2504 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2505 "xmlParseStringCharRef: character reference out of bounds\n",
2507 } else if (IS_CHAR(val
)) {
2510 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2511 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2518 * xmlParserHandlePEReference:
2519 * @ctxt: the parser context
2521 * [69] PEReference ::= '%' Name ';'
2523 * [ WFC: No Recursion ]
2524 * A parsed entity must not contain a recursive
2525 * reference to itself, either directly or indirectly.
2527 * [ WFC: Entity Declared ]
2528 * In a document without any DTD, a document with only an internal DTD
2529 * subset which contains no parameter entity references, or a document
2530 * with "standalone='yes'", ... ... The declaration of a parameter
2531 * entity must precede any reference to it...
2533 * [ VC: Entity Declared ]
2534 * In a document with an external subset or external parameter entities
2535 * with "standalone='no'", ... ... The declaration of a parameter entity
2536 * must precede any reference to it...
2539 * Parameter-entity references may only appear in the DTD.
2540 * NOTE: misleading but this is handled.
2542 * A PEReference may have been detected in the current input stream
2543 * the handling is done accordingly to
2544 * http://www.w3.org/TR/REC-xml#entproc
2546 * - Included in literal in entity values
2547 * - Included as Parameter Entity reference within DTDs
2550 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt
) {
2551 switch(ctxt
->instate
) {
2552 case XML_PARSER_CDATA_SECTION
:
2554 case XML_PARSER_COMMENT
:
2556 case XML_PARSER_START_TAG
:
2558 case XML_PARSER_END_TAG
:
2560 case XML_PARSER_EOF
:
2561 xmlFatalErr(ctxt
, XML_ERR_PEREF_AT_EOF
, NULL
);
2563 case XML_PARSER_PROLOG
:
2564 case XML_PARSER_START
:
2565 case XML_PARSER_MISC
:
2566 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_PROLOG
, NULL
);
2568 case XML_PARSER_ENTITY_DECL
:
2569 case XML_PARSER_CONTENT
:
2570 case XML_PARSER_ATTRIBUTE_VALUE
:
2572 case XML_PARSER_SYSTEM_LITERAL
:
2573 case XML_PARSER_PUBLIC_LITERAL
:
2574 /* we just ignore it there */
2576 case XML_PARSER_EPILOG
:
2577 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_EPILOG
, NULL
);
2579 case XML_PARSER_ENTITY_VALUE
:
2581 * NOTE: in the case of entity values, we don't do the
2582 * substitution here since we need the literal
2583 * entity value to be able to save the internal
2584 * subset of the document.
2585 * This will be handled by xmlStringDecodeEntities
2588 case XML_PARSER_DTD
:
2590 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2591 * In the internal DTD subset, parameter-entity references
2592 * can occur only where markup declarations can occur, not
2593 * within markup declarations.
2594 * In that case this is handled in xmlParseMarkupDecl
2596 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
2598 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2601 case XML_PARSER_IGNORE
:
2605 xmlParsePEReference(ctxt
);
2609 * Macro used to grow the current buffer.
2610 * buffer##_size is expected to be a size_t
2611 * mem_error: is expected to handle memory allocation failures
2613 #define growBuffer(buffer, n) { \
2615 size_t new_size = buffer##_size * 2 + n; \
2616 if (new_size < buffer##_size) goto mem_error; \
2617 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2618 if (tmp == NULL) goto mem_error; \
2620 buffer##_size = new_size; \
2624 * xmlStringLenDecodeEntities:
2625 * @ctxt: the parser context
2626 * @str: the input string
2627 * @len: the string length
2628 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2629 * @end: an end marker xmlChar, 0 if none
2630 * @end2: an end marker xmlChar, 0 if none
2631 * @end3: an end marker xmlChar, 0 if none
2633 * Takes a entity string content and process to do the adequate substitutions.
2635 * [67] Reference ::= EntityRef | CharRef
2637 * [69] PEReference ::= '%' Name ';'
2639 * Returns A newly allocated string with the substitution done. The caller
2640 * must deallocate it !
2643 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2644 int what
, xmlChar end
, xmlChar end2
, xmlChar end3
) {
2645 xmlChar
*buffer
= NULL
;
2646 size_t buffer_size
= 0;
2649 xmlChar
*current
= NULL
;
2650 xmlChar
*rep
= NULL
;
2651 const xmlChar
*last
;
2655 if ((ctxt
== NULL
) || (str
== NULL
) || (len
< 0))
2659 if (((ctxt
->depth
> 40) &&
2660 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2661 (ctxt
->depth
> 1024)) {
2662 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2667 * allocate a translation buffer.
2669 buffer_size
= XML_PARSER_BIG_BUFFER_SIZE
;
2670 buffer
= (xmlChar
*) xmlMallocAtomic(buffer_size
);
2671 if (buffer
== NULL
) goto mem_error
;
2674 * OK loop until we reach one of the ending char or a size limit.
2675 * we are operating on already parsed values.
2678 c
= CUR_SCHAR(str
, l
);
2681 while ((c
!= 0) && (c
!= end
) && /* non input consuming loop */
2682 (c
!= end2
) && (c
!= end3
) &&
2683 (ctxt
->instate
!= XML_PARSER_EOF
)) {
2686 if ((c
== '&') && (str
[1] == '#')) {
2687 int val
= xmlParseStringCharRef(ctxt
, &str
);
2690 COPY_BUF(0,buffer
,nbchars
,val
);
2691 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2692 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2694 } else if ((c
== '&') && (what
& XML_SUBSTITUTE_REF
)) {
2695 if (xmlParserDebugEntities
)
2696 xmlGenericError(xmlGenericErrorContext
,
2697 "String decoding Entity Reference: %.30s\n",
2699 ent
= xmlParseStringEntityRef(ctxt
, &str
);
2700 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
2702 ctxt
->nbentities
+= ent
->checked
/ 2;
2703 if ((ent
!= NULL
) &&
2704 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
2705 if (ent
->content
!= NULL
) {
2706 COPY_BUF(0,buffer
,nbchars
,ent
->content
[0]);
2707 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2708 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2711 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
2712 "predefined entity has no content\n");
2715 } else if ((ent
!= NULL
) && (ent
->content
!= NULL
)) {
2717 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2721 ent
->content
[0] = 0;
2726 while (*current
!= 0) { /* non input consuming loop */
2727 buffer
[nbchars
++] = *current
++;
2728 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2729 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
, 0))
2731 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2736 } else if (ent
!= NULL
) {
2737 int i
= xmlStrlen(ent
->name
);
2738 const xmlChar
*cur
= ent
->name
;
2740 buffer
[nbchars
++] = '&';
2741 if (nbchars
+ i
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2742 growBuffer(buffer
, i
+ XML_PARSER_BUFFER_SIZE
);
2745 buffer
[nbchars
++] = *cur
++;
2746 buffer
[nbchars
++] = ';';
2748 } else if (c
== '%' && (what
& XML_SUBSTITUTE_PEREF
)) {
2749 if (xmlParserDebugEntities
)
2750 xmlGenericError(xmlGenericErrorContext
,
2751 "String decoding PE Reference: %.30s\n", str
);
2752 ent
= xmlParseStringPEReference(ctxt
, &str
);
2753 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
2755 ctxt
->nbentities
+= ent
->checked
/ 2;
2757 if (ent
->content
== NULL
) {
2759 * Note: external parsed entities will not be loaded,
2760 * it is not required for a non-validating parser to
2761 * complete external PEReferences coming from the
2764 if (((ctxt
->options
& XML_PARSE_NOENT
) != 0) ||
2765 ((ctxt
->options
& XML_PARSE_DTDVALID
) != 0) ||
2766 (ctxt
->validate
!= 0)) {
2767 xmlLoadEntityContent(ctxt
, ent
);
2769 xmlWarningMsg(ctxt
, XML_ERR_ENTITY_PROCESSING
,
2770 "not validating will not read content for PE entity %s\n",
2775 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2779 if (ent
->content
!= NULL
)
2780 ent
->content
[0] = 0;
2784 while (*current
!= 0) { /* non input consuming loop */
2785 buffer
[nbchars
++] = *current
++;
2786 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2787 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
, 0))
2789 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2796 COPY_BUF(l
,buffer
,nbchars
,c
);
2798 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2799 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2803 c
= CUR_SCHAR(str
, l
);
2807 buffer
[nbchars
] = 0;
2811 xmlErrMemory(ctxt
, NULL
);
2821 * xmlStringDecodeEntities:
2822 * @ctxt: the parser context
2823 * @str: the input string
2824 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2825 * @end: an end marker xmlChar, 0 if none
2826 * @end2: an end marker xmlChar, 0 if none
2827 * @end3: an end marker xmlChar, 0 if none
2829 * Takes a entity string content and process to do the adequate substitutions.
2831 * [67] Reference ::= EntityRef | CharRef
2833 * [69] PEReference ::= '%' Name ';'
2835 * Returns A newly allocated string with the substitution done. The caller
2836 * must deallocate it !
2839 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int what
,
2840 xmlChar end
, xmlChar end2
, xmlChar end3
) {
2841 if ((ctxt
== NULL
) || (str
== NULL
)) return(NULL
);
2842 return(xmlStringLenDecodeEntities(ctxt
, str
, xmlStrlen(str
), what
,
2846 /************************************************************************
2848 * Commodity functions, cleanup needed ? *
2850 ************************************************************************/
2854 * @ctxt: an XML parser context
2856 * @len: the size of @str
2857 * @blank_chars: we know the chars are blanks
2859 * Is this a sequence of blank chars that one can ignore ?
2861 * Returns 1 if ignorable 0 otherwise.
2864 static int areBlanks(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2867 xmlNodePtr lastChild
;
2870 * Don't spend time trying to differentiate them, the same callback is
2873 if (ctxt
->sax
->ignorableWhitespace
== ctxt
->sax
->characters
)
2877 * Check for xml:space value.
2879 if ((ctxt
->space
== NULL
) || (*(ctxt
->space
) == 1) ||
2880 (*(ctxt
->space
) == -2))
2884 * Check that the string is made of blanks
2886 if (blank_chars
== 0) {
2887 for (i
= 0;i
< len
;i
++)
2888 if (!(IS_BLANK_CH(str
[i
]))) return(0);
2892 * Look if the element is mixed content in the DTD if available
2894 if (ctxt
->node
== NULL
) return(0);
2895 if (ctxt
->myDoc
!= NULL
) {
2896 ret
= xmlIsMixedElement(ctxt
->myDoc
, ctxt
->node
->name
);
2897 if (ret
== 0) return(1);
2898 if (ret
== 1) return(0);
2902 * Otherwise, heuristic :-\
2904 if ((RAW
!= '<') && (RAW
!= 0xD)) return(0);
2905 if ((ctxt
->node
->children
== NULL
) &&
2906 (RAW
== '<') && (NXT(1) == '/')) return(0);
2908 lastChild
= xmlGetLastChild(ctxt
->node
);
2909 if (lastChild
== NULL
) {
2910 if ((ctxt
->node
->type
!= XML_ELEMENT_NODE
) &&
2911 (ctxt
->node
->content
!= NULL
)) return(0);
2912 } else if (xmlNodeIsText(lastChild
))
2914 else if ((ctxt
->node
->children
!= NULL
) &&
2915 (xmlNodeIsText(ctxt
->node
->children
)))
2920 /************************************************************************
2922 * Extra stuff for namespace support *
2923 * Relates to http://www.w3.org/TR/WD-xml-names *
2925 ************************************************************************/
2929 * @ctxt: an XML parser context
2930 * @name: an XML parser context
2931 * @prefix: a xmlChar **
2933 * parse an UTF8 encoded XML qualified name string
2935 * [NS 5] QName ::= (Prefix ':')? LocalPart
2937 * [NS 6] Prefix ::= NCName
2939 * [NS 7] LocalPart ::= NCName
2941 * Returns the local part, and prefix is updated
2942 * to get the Prefix if any.
2946 xmlSplitQName(xmlParserCtxtPtr ctxt
, const xmlChar
*name
, xmlChar
**prefix
) {
2947 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
2948 xmlChar
*buffer
= NULL
;
2950 int max
= XML_MAX_NAMELEN
;
2951 xmlChar
*ret
= NULL
;
2952 const xmlChar
*cur
= name
;
2955 if (prefix
== NULL
) return(NULL
);
2958 if (cur
== NULL
) return(NULL
);
2960 #ifndef XML_XML_NAMESPACE
2961 /* xml: prefix is not really a namespace */
2962 if ((cur
[0] == 'x') && (cur
[1] == 'm') &&
2963 (cur
[2] == 'l') && (cur
[3] == ':'))
2964 return(xmlStrdup(name
));
2967 /* nasty but well=formed */
2969 return(xmlStrdup(name
));
2972 while ((c
!= 0) && (c
!= ':') && (len
< max
)) { /* tested bigname.xml */
2978 * Okay someone managed to make a huge name, so he's ready to pay
2979 * for the processing speed.
2983 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
2984 if (buffer
== NULL
) {
2985 xmlErrMemory(ctxt
, NULL
);
2988 memcpy(buffer
, buf
, len
);
2989 while ((c
!= 0) && (c
!= ':')) { /* tested bigname.xml */
2990 if (len
+ 10 > max
) {
2994 tmp
= (xmlChar
*) xmlRealloc(buffer
,
2995 max
* sizeof(xmlChar
));
2998 xmlErrMemory(ctxt
, NULL
);
3009 if ((c
== ':') && (*cur
== 0)) {
3013 return(xmlStrdup(name
));
3017 ret
= xmlStrndup(buf
, len
);
3021 max
= XML_MAX_NAMELEN
;
3029 return(xmlStrndup(BAD_CAST
"", 0));
3034 * Check that the first character is proper to start
3037 if (!(((c
>= 0x61) && (c
<= 0x7A)) ||
3038 ((c
>= 0x41) && (c
<= 0x5A)) ||
3039 (c
== '_') || (c
== ':'))) {
3041 int first
= CUR_SCHAR(cur
, l
);
3043 if (!IS_LETTER(first
) && (first
!= '_')) {
3044 xmlFatalErrMsgStr(ctxt
, XML_NS_ERR_QNAME
,
3045 "Name %s is not XML Namespace compliant\n",
3051 while ((c
!= 0) && (len
< max
)) { /* tested bigname2.xml */
3057 * Okay someone managed to make a huge name, so he's ready to pay
3058 * for the processing speed.
3062 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3063 if (buffer
== NULL
) {
3064 xmlErrMemory(ctxt
, NULL
);
3067 memcpy(buffer
, buf
, len
);
3068 while (c
!= 0) { /* tested bigname2.xml */
3069 if (len
+ 10 > max
) {
3073 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3074 max
* sizeof(xmlChar
));
3076 xmlErrMemory(ctxt
, NULL
);
3089 ret
= xmlStrndup(buf
, len
);
3098 /************************************************************************
3100 * The parser itself *
3101 * Relates to http://www.w3.org/TR/REC-xml *
3103 ************************************************************************/
3105 /************************************************************************
3107 * Routines to parse Name, NCName and NmToken *
3109 ************************************************************************/
3111 static unsigned long nbParseName
= 0;
3112 static unsigned long nbParseNmToken
= 0;
3113 static unsigned long nbParseNCName
= 0;
3114 static unsigned long nbParseNCNameComplex
= 0;
3115 static unsigned long nbParseNameComplex
= 0;
3116 static unsigned long nbParseStringName
= 0;
3120 * The two following functions are related to the change of accepted
3121 * characters for Name and NmToken in the Revision 5 of XML-1.0
3122 * They correspond to the modified production [4] and the new production [4a]
3123 * changes in that revision. Also note that the macros used for the
3124 * productions Letter, Digit, CombiningChar and Extender are not needed
3126 * We still keep compatibility to pre-revision5 parsing semantic if the
3127 * new XML_PARSE_OLD10 option is given to the parser.
3130 xmlIsNameStartChar(xmlParserCtxtPtr ctxt
, int c
) {
3131 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3133 * Use the new checks of production [4] [4a] amd [5] of the
3134 * Update 5 of XML-1.0
3136 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3137 (((c
>= 'a') && (c
<= 'z')) ||
3138 ((c
>= 'A') && (c
<= 'Z')) ||
3139 (c
== '_') || (c
== ':') ||
3140 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3141 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3142 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3143 ((c
>= 0x370) && (c
<= 0x37D)) ||
3144 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3145 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3146 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3147 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3148 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3149 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3150 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3151 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3154 if (IS_LETTER(c
) || (c
== '_') || (c
== ':'))
3161 xmlIsNameChar(xmlParserCtxtPtr ctxt
, int c
) {
3162 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3164 * Use the new checks of production [4] [4a] amd [5] of the
3165 * Update 5 of XML-1.0
3167 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3168 (((c
>= 'a') && (c
<= 'z')) ||
3169 ((c
>= 'A') && (c
<= 'Z')) ||
3170 ((c
>= '0') && (c
<= '9')) || /* !start */
3171 (c
== '_') || (c
== ':') ||
3172 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3173 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3174 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3175 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3176 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3177 ((c
>= 0x370) && (c
<= 0x37D)) ||
3178 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3179 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3180 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3181 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3182 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3183 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3184 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3185 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3186 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3189 if ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3190 (c
== '.') || (c
== '-') ||
3191 (c
== '_') || (c
== ':') ||
3192 (IS_COMBINING(c
)) ||
3199 static xmlChar
* xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
,
3200 int *len
, int *alloc
, int normalize
);
3202 static const xmlChar
*
3203 xmlParseNameComplex(xmlParserCtxtPtr ctxt
) {
3207 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3208 XML_MAX_TEXT_LENGTH
:
3209 XML_MAX_NAME_LENGTH
;
3212 nbParseNameComplex
++;
3216 * Handler for more complex cases
3219 if (ctxt
->instate
== XML_PARSER_EOF
)
3222 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3224 * Use the new checks of production [4] [4a] amd [5] of the
3225 * Update 5 of XML-1.0
3227 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3228 (!(((c
>= 'a') && (c
<= 'z')) ||
3229 ((c
>= 'A') && (c
<= 'Z')) ||
3230 (c
== '_') || (c
== ':') ||
3231 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3232 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3233 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3234 ((c
>= 0x370) && (c
<= 0x37D)) ||
3235 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3236 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3237 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3238 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3239 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3240 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3241 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3242 ((c
>= 0x10000) && (c
<= 0xEFFFF))))) {
3248 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3249 (((c
>= 'a') && (c
<= 'z')) ||
3250 ((c
>= 'A') && (c
<= 'Z')) ||
3251 ((c
>= '0') && (c
<= '9')) || /* !start */
3252 (c
== '_') || (c
== ':') ||
3253 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3254 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3255 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3256 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3257 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3258 ((c
>= 0x370) && (c
<= 0x37D)) ||
3259 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3260 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3261 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3262 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3263 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3264 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3265 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3266 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3267 ((c
>= 0x10000) && (c
<= 0xEFFFF))
3269 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3272 if (ctxt
->instate
== XML_PARSER_EOF
)
3275 if (len
<= INT_MAX
- l
)
3281 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3282 (!IS_LETTER(c
) && (c
!= '_') &&
3290 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3291 ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3292 (c
== '.') || (c
== '-') ||
3293 (c
== '_') || (c
== ':') ||
3294 (IS_COMBINING(c
)) ||
3295 (IS_EXTENDER(c
)))) {
3296 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3299 if (ctxt
->instate
== XML_PARSER_EOF
)
3302 if (len
<= INT_MAX
- l
)
3308 if (len
> maxLength
) {
3309 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3312 if (ctxt
->input
->cur
- ctxt
->input
->base
< len
) {
3314 * There were a couple of bugs where PERefs lead to to a change
3315 * of the buffer. Check the buffer size to avoid passing an invalid
3316 * pointer to xmlDictLookup.
3318 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
3319 "unexpected change of input buffer");
3322 if ((*ctxt
->input
->cur
== '\n') && (ctxt
->input
->cur
[-1] == '\r'))
3323 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- (len
+ 1), len
));
3324 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- len
, len
));
3329 * @ctxt: an XML parser context
3331 * parse an XML name.
3333 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3334 * CombiningChar | Extender
3336 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3338 * [6] Names ::= Name (#x20 Name)*
3340 * Returns the Name parsed or NULL
3344 xmlParseName(xmlParserCtxtPtr ctxt
) {
3348 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3349 XML_MAX_TEXT_LENGTH
:
3350 XML_MAX_NAME_LENGTH
;
3359 * Accelerator for simple ASCII names
3361 in
= ctxt
->input
->cur
;
3362 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3363 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3364 (*in
== '_') || (*in
== ':')) {
3366 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3367 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3368 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3369 (*in
== '_') || (*in
== '-') ||
3370 (*in
== ':') || (*in
== '.'))
3372 if ((*in
> 0) && (*in
< 0x80)) {
3373 count
= in
- ctxt
->input
->cur
;
3374 if (count
> maxLength
) {
3375 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3378 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3379 ctxt
->input
->cur
= in
;
3380 ctxt
->input
->col
+= count
;
3382 xmlErrMemory(ctxt
, NULL
);
3386 /* accelerator for special cases */
3387 return(xmlParseNameComplex(ctxt
));
3390 static const xmlChar
*
3391 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt
) {
3395 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3396 XML_MAX_TEXT_LENGTH
:
3397 XML_MAX_NAME_LENGTH
;
3398 size_t startPosition
= 0;
3401 nbParseNCNameComplex
++;
3405 * Handler for more complex cases
3408 startPosition
= CUR_PTR
- BASE_PTR
;
3410 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3411 (!xmlIsNameStartChar(ctxt
, c
) || (c
== ':'))) {
3415 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3416 (xmlIsNameChar(ctxt
, c
) && (c
!= ':'))) {
3417 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3420 if (ctxt
->instate
== XML_PARSER_EOF
)
3423 if (len
<= INT_MAX
- l
)
3430 * when shrinking to extend the buffer we really need to preserve
3431 * the part of the name we already parsed. Hence rolling back
3432 * by current length.
3434 ctxt
->input
->cur
-= l
;
3436 if (ctxt
->instate
== XML_PARSER_EOF
)
3438 ctxt
->input
->cur
+= l
;
3442 if (len
> maxLength
) {
3443 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3446 return(xmlDictLookup(ctxt
->dict
, (BASE_PTR
+ startPosition
), len
));
3451 * @ctxt: an XML parser context
3452 * @len: length of the string parsed
3454 * parse an XML name.
3456 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3457 * CombiningChar | Extender
3459 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3461 * Returns the Name parsed or NULL
3464 static const xmlChar
*
3465 xmlParseNCName(xmlParserCtxtPtr ctxt
) {
3466 const xmlChar
*in
, *e
;
3469 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3470 XML_MAX_TEXT_LENGTH
:
3471 XML_MAX_NAME_LENGTH
;
3478 * Accelerator for simple ASCII names
3480 in
= ctxt
->input
->cur
;
3481 e
= ctxt
->input
->end
;
3482 if ((((*in
>= 0x61) && (*in
<= 0x7A)) ||
3483 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3484 (*in
== '_')) && (in
< e
)) {
3486 while ((((*in
>= 0x61) && (*in
<= 0x7A)) ||
3487 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3488 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3489 (*in
== '_') || (*in
== '-') ||
3490 (*in
== '.')) && (in
< e
))
3494 if ((*in
> 0) && (*in
< 0x80)) {
3495 count
= in
- ctxt
->input
->cur
;
3496 if (count
> maxLength
) {
3497 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3500 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3501 ctxt
->input
->cur
= in
;
3502 ctxt
->input
->col
+= count
;
3504 xmlErrMemory(ctxt
, NULL
);
3510 return(xmlParseNCNameComplex(ctxt
));
3514 * xmlParseNameAndCompare:
3515 * @ctxt: an XML parser context
3517 * parse an XML name and compares for match
3518 * (specialized for endtag parsing)
3520 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3521 * and the name for mismatch
3524 static const xmlChar
*
3525 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *other
) {
3526 register const xmlChar
*cmp
= other
;
3527 register const xmlChar
*in
;
3531 if (ctxt
->instate
== XML_PARSER_EOF
)
3534 in
= ctxt
->input
->cur
;
3535 while (*in
!= 0 && *in
== *cmp
) {
3539 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
3541 ctxt
->input
->col
+= in
- ctxt
->input
->cur
;
3542 ctxt
->input
->cur
= in
;
3543 return (const xmlChar
*) 1;
3545 /* failure (or end of input buffer), check with full function */
3546 ret
= xmlParseName (ctxt
);
3547 /* strings coming from the dictionary direct compare possible */
3549 return (const xmlChar
*) 1;
3555 * xmlParseStringName:
3556 * @ctxt: an XML parser context
3557 * @str: a pointer to the string pointer (IN/OUT)
3559 * parse an XML name.
3561 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562 * CombiningChar | Extender
3564 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3566 * [6] Names ::= Name (#x20 Name)*
3568 * Returns the Name parsed or NULL. The @str pointer
3569 * is updated to the current location in the string.
3573 xmlParseStringName(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
3574 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3575 const xmlChar
*cur
= *str
;
3578 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3579 XML_MAX_TEXT_LENGTH
:
3580 XML_MAX_NAME_LENGTH
;
3583 nbParseStringName
++;
3586 c
= CUR_SCHAR(cur
, l
);
3587 if (!xmlIsNameStartChar(ctxt
, c
)) {
3591 COPY_BUF(l
,buf
,len
,c
);
3593 c
= CUR_SCHAR(cur
, l
);
3594 while (xmlIsNameChar(ctxt
, c
)) {
3595 COPY_BUF(l
,buf
,len
,c
);
3597 c
= CUR_SCHAR(cur
, l
);
3598 if (len
>= XML_MAX_NAMELEN
) { /* test bigentname.xml */
3600 * Okay someone managed to make a huge name, so he's ready to pay
3601 * for the processing speed.
3606 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3607 if (buffer
== NULL
) {
3608 xmlErrMemory(ctxt
, NULL
);
3611 memcpy(buffer
, buf
, len
);
3612 while (xmlIsNameChar(ctxt
, c
)) {
3613 if (len
+ 10 > max
) {
3617 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3618 max
* sizeof(xmlChar
));
3620 xmlErrMemory(ctxt
, NULL
);
3626 COPY_BUF(l
,buffer
,len
,c
);
3628 c
= CUR_SCHAR(cur
, l
);
3629 if (len
> maxLength
) {
3630 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3640 if (len
> maxLength
) {
3641 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3645 return(xmlStrndup(buf
, len
));
3650 * @ctxt: an XML parser context
3652 * parse an XML Nmtoken.
3654 * [7] Nmtoken ::= (NameChar)+
3656 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3658 * Returns the Nmtoken parsed or NULL
3662 xmlParseNmtoken(xmlParserCtxtPtr ctxt
) {
3663 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3667 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3668 XML_MAX_TEXT_LENGTH
:
3669 XML_MAX_NAME_LENGTH
;
3676 if (ctxt
->instate
== XML_PARSER_EOF
)
3680 while (xmlIsNameChar(ctxt
, c
)) {
3681 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3685 COPY_BUF(l
,buf
,len
,c
);
3691 if (ctxt
->instate
== XML_PARSER_EOF
)
3695 if (len
>= XML_MAX_NAMELEN
) {
3697 * Okay someone managed to make a huge token, so he's ready to pay
3698 * for the processing speed.
3703 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3704 if (buffer
== NULL
) {
3705 xmlErrMemory(ctxt
, NULL
);
3708 memcpy(buffer
, buf
, len
);
3709 while (xmlIsNameChar(ctxt
, c
)) {
3710 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3713 if (ctxt
->instate
== XML_PARSER_EOF
) {
3718 if (len
+ 10 > max
) {
3722 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3723 max
* sizeof(xmlChar
));
3725 xmlErrMemory(ctxt
, NULL
);
3731 COPY_BUF(l
,buffer
,len
,c
);
3734 if (len
> maxLength
) {
3735 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3746 if (len
> maxLength
) {
3747 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3750 return(xmlStrndup(buf
, len
));
3754 * xmlParseEntityValue:
3755 * @ctxt: an XML parser context
3756 * @orig: if non-NULL store a copy of the original entity value
3758 * parse a value for ENTITY declarations
3760 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3761 * "'" ([^%&'] | PEReference | Reference)* "'"
3763 * Returns the EntityValue parsed with reference substituted or NULL
3767 xmlParseEntityValue(xmlParserCtxtPtr ctxt
, xmlChar
**orig
) {
3768 xmlChar
*buf
= NULL
;
3770 int size
= XML_PARSER_BUFFER_SIZE
;
3772 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3773 XML_MAX_HUGE_LENGTH
:
3774 XML_MAX_TEXT_LENGTH
;
3776 xmlChar
*ret
= NULL
;
3777 const xmlChar
*cur
= NULL
;
3778 xmlParserInputPtr input
;
3780 if (RAW
== '"') stop
= '"';
3781 else if (RAW
== '\'') stop
= '\'';
3783 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_STARTED
, NULL
);
3786 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
3788 xmlErrMemory(ctxt
, NULL
);
3793 * The content of the entity definition is copied in a buffer.
3796 ctxt
->instate
= XML_PARSER_ENTITY_VALUE
;
3797 input
= ctxt
->input
;
3799 if (ctxt
->instate
== XML_PARSER_EOF
)
3804 * NOTE: 4.4.5 Included in Literal
3805 * When a parameter entity reference appears in a literal entity
3806 * value, ... a single or double quote character in the replacement
3807 * text is always treated as a normal data character and will not
3808 * terminate the literal.
3809 * In practice it means we stop the loop only when back at parsing
3810 * the initial entity and the quote is found
3812 while (((IS_CHAR(c
)) && ((c
!= stop
) || /* checked */
3813 (ctxt
->input
!= input
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
3814 if (len
+ 5 >= size
) {
3818 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3820 xmlErrMemory(ctxt
, NULL
);
3825 COPY_BUF(l
,buf
,len
,c
);
3835 if (len
> maxLength
) {
3836 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
3837 "entity value too long\n");
3842 if (ctxt
->instate
== XML_PARSER_EOF
)
3845 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
, NULL
);
3851 * Raise problem w.r.t. '&' and '%' being used in non-entities
3852 * reference constructs. Note Charref will be handled in
3853 * xmlStringDecodeEntities()
3856 while (*cur
!= 0) { /* non input consuming */
3857 if ((*cur
== '%') || ((*cur
== '&') && (cur
[1] != '#'))) {
3863 name
= xmlParseStringName(ctxt
, &cur
);
3868 if ((nameOk
== 0) || (*cur
!= ';')) {
3869 xmlFatalErrMsgInt(ctxt
, XML_ERR_ENTITY_CHAR_ERROR
,
3870 "EntityValue: '%c' forbidden except for entities references\n",
3874 if ((tmp
== '%') && (ctxt
->inSubset
== 1) &&
3875 (ctxt
->inputNr
== 1)) {
3876 xmlFatalErr(ctxt
, XML_ERR_ENTITY_PE_INTERNAL
, NULL
);
3886 * Then PEReference entities are substituted.
3888 * NOTE: 4.4.7 Bypassed
3889 * When a general entity reference appears in the EntityValue in
3890 * an entity declaration, it is bypassed and left as is.
3891 * so XML_SUBSTITUTE_REF is not set here.
3894 ret
= xmlStringDecodeEntities(ctxt
, buf
, XML_SUBSTITUTE_PEREF
,
3909 * xmlParseAttValueComplex:
3910 * @ctxt: an XML parser context
3911 * @len: the resulting attribute len
3912 * @normalize: whether to apply the inner normalization
3914 * parse a value for an attribute, this is the fallback function
3915 * of xmlParseAttValue() when the attribute parsing requires handling
3916 * of non-ASCII characters, or normalization compaction.
3918 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3921 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt
, int *attlen
, int normalize
) {
3923 xmlChar
*buf
= NULL
;
3924 xmlChar
*rep
= NULL
;
3926 size_t buf_size
= 0;
3927 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3928 XML_MAX_HUGE_LENGTH
:
3929 XML_MAX_TEXT_LENGTH
;
3930 int c
, l
, in_space
= 0;
3931 xmlChar
*current
= NULL
;
3934 if (NXT(0) == '"') {
3935 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3938 } else if (NXT(0) == '\'') {
3940 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3943 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
3948 * allocate a translation buffer.
3950 buf_size
= XML_PARSER_BUFFER_SIZE
;
3951 buf
= (xmlChar
*) xmlMallocAtomic(buf_size
);
3952 if (buf
== NULL
) goto mem_error
;
3955 * OK loop until we reach one of the ending char or a size limit.
3958 while (((NXT(0) != limit
) && /* checked */
3959 (IS_CHAR(c
)) && (c
!= '<')) &&
3960 (ctxt
->instate
!= XML_PARSER_EOF
)) {
3963 if (NXT(1) == '#') {
3964 int val
= xmlParseCharRef(ctxt
);
3967 if (ctxt
->replaceEntities
) {
3968 if (len
+ 10 > buf_size
) {
3969 growBuffer(buf
, 10);
3974 * The reparsing will be done in xmlStringGetNodeList()
3975 * called by the attribute() function in SAX.c
3977 if (len
+ 10 > buf_size
) {
3978 growBuffer(buf
, 10);
3986 } else if (val
!= 0) {
3987 if (len
+ 10 > buf_size
) {
3988 growBuffer(buf
, 10);
3990 len
+= xmlCopyChar(0, &buf
[len
], val
);
3993 ent
= xmlParseEntityRef(ctxt
);
3996 ctxt
->nbentities
+= ent
->owner
;
3997 if ((ent
!= NULL
) &&
3998 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
3999 if (len
+ 10 > buf_size
) {
4000 growBuffer(buf
, 10);
4002 if ((ctxt
->replaceEntities
== 0) &&
4003 (ent
->content
[0] == '&')) {
4010 buf
[len
++] = ent
->content
[0];
4012 } else if ((ent
!= NULL
) &&
4013 (ctxt
->replaceEntities
!= 0)) {
4014 if (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) {
4016 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
4022 while (*current
!= 0) { /* non input consuming */
4023 if ((*current
== 0xD) || (*current
== 0xA) ||
4024 (*current
== 0x9)) {
4028 buf
[len
++] = *current
++;
4029 if (len
+ 10 > buf_size
) {
4030 growBuffer(buf
, 10);
4037 if (len
+ 10 > buf_size
) {
4038 growBuffer(buf
, 10);
4040 if (ent
->content
!= NULL
)
4041 buf
[len
++] = ent
->content
[0];
4043 } else if (ent
!= NULL
) {
4044 int i
= xmlStrlen(ent
->name
);
4045 const xmlChar
*cur
= ent
->name
;
4048 * This may look absurd but is needed to detect
4051 if ((ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
4052 (ent
->content
!= NULL
) && (ent
->checked
== 0)) {
4053 unsigned long oldnbent
= ctxt
->nbentities
, diff
;
4056 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
4057 XML_SUBSTITUTE_REF
, 0, 0, 0);
4060 diff
= ctxt
->nbentities
- oldnbent
+ 1;
4061 if (diff
> INT_MAX
/ 2)
4063 ent
->checked
= diff
* 2;
4065 if (xmlStrchr(rep
, '<'))
4070 ent
->content
[0] = 0;
4075 * Just output the reference
4078 while (len
+ i
+ 10 > buf_size
) {
4079 growBuffer(buf
, i
+ 10);
4082 buf
[len
++] = *cur
++;
4087 if ((c
== 0x20) || (c
== 0xD) || (c
== 0xA) || (c
== 0x9)) {
4088 if ((len
!= 0) || (!normalize
)) {
4089 if ((!normalize
) || (!in_space
)) {
4090 COPY_BUF(l
,buf
,len
,0x20);
4091 while (len
+ 10 > buf_size
) {
4092 growBuffer(buf
, 10);
4099 COPY_BUF(l
,buf
,len
,c
);
4100 if (len
+ 10 > buf_size
) {
4101 growBuffer(buf
, 10);
4108 if (len
> maxLength
) {
4109 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4110 "AttValue length too long\n");
4114 if (ctxt
->instate
== XML_PARSER_EOF
)
4117 if ((in_space
) && (normalize
)) {
4118 while ((len
> 0) && (buf
[len
- 1] == 0x20)) len
--;
4122 xmlFatalErr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
, NULL
);
4123 } else if (RAW
!= limit
) {
4124 if ((c
!= 0) && (!IS_CHAR(c
))) {
4125 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_CHAR
,
4126 "invalid character in attribute value\n");
4128 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4129 "AttValue: ' expected\n");
4134 if (attlen
!= NULL
) *attlen
= (int) len
;
4138 xmlErrMemory(ctxt
, NULL
);
4149 * @ctxt: an XML parser context
4151 * parse a value for an attribute
4152 * Note: the parser won't do substitution of entities here, this
4153 * will be handled later in xmlStringGetNodeList
4155 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4156 * "'" ([^<&'] | Reference)* "'"
4158 * 3.3.3 Attribute-Value Normalization:
4159 * Before the value of an attribute is passed to the application or
4160 * checked for validity, the XML processor must normalize it as follows:
4161 * - a character reference is processed by appending the referenced
4162 * character to the attribute value
4163 * - an entity reference is processed by recursively processing the
4164 * replacement text of the entity
4165 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4166 * appending #x20 to the normalized value, except that only a single
4167 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4168 * parsed entity or the literal entity value of an internal parsed entity
4169 * - other characters are processed by appending them to the normalized value
4170 * If the declared value is not CDATA, then the XML processor must further
4171 * process the normalized attribute value by discarding any leading and
4172 * trailing space (#x20) characters, and by replacing sequences of space
4173 * (#x20) characters by a single space (#x20) character.
4174 * All attributes for which no declaration has been read should be treated
4175 * by a non-validating parser as if declared CDATA.
4177 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4182 xmlParseAttValue(xmlParserCtxtPtr ctxt
) {
4183 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
)) return(NULL
);
4184 return(xmlParseAttValueInternal(ctxt
, NULL
, NULL
, 0));
4188 * xmlParseSystemLiteral:
4189 * @ctxt: an XML parser context
4191 * parse an XML Literal
4193 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4195 * Returns the SystemLiteral parsed or NULL
4199 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt
) {
4200 xmlChar
*buf
= NULL
;
4202 int size
= XML_PARSER_BUFFER_SIZE
;
4204 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4205 XML_MAX_TEXT_LENGTH
:
4206 XML_MAX_NAME_LENGTH
;
4208 int state
= ctxt
->instate
;
4215 } else if (RAW
== '\'') {
4219 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4223 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4225 xmlErrMemory(ctxt
, NULL
);
4228 ctxt
->instate
= XML_PARSER_SYSTEM_LITERAL
;
4230 while ((IS_CHAR(cur
)) && (cur
!= stop
)) { /* checked */
4231 if (len
+ 5 >= size
) {
4235 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4238 xmlErrMemory(ctxt
, NULL
);
4239 ctxt
->instate
= (xmlParserInputState
) state
;
4249 if (ctxt
->instate
== XML_PARSER_EOF
) {
4254 COPY_BUF(l
,buf
,len
,cur
);
4262 if (len
> maxLength
) {
4263 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "SystemLiteral");
4265 ctxt
->instate
= (xmlParserInputState
) state
;
4270 ctxt
->instate
= (xmlParserInputState
) state
;
4271 if (!IS_CHAR(cur
)) {
4272 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4280 * xmlParsePubidLiteral:
4281 * @ctxt: an XML parser context
4283 * parse an XML public literal
4285 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4287 * Returns the PubidLiteral parsed or NULL.
4291 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt
) {
4292 xmlChar
*buf
= NULL
;
4294 int size
= XML_PARSER_BUFFER_SIZE
;
4295 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4296 XML_MAX_TEXT_LENGTH
:
4297 XML_MAX_NAME_LENGTH
;
4301 xmlParserInputState oldstate
= ctxt
->instate
;
4307 } else if (RAW
== '\'') {
4311 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4314 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4316 xmlErrMemory(ctxt
, NULL
);
4319 ctxt
->instate
= XML_PARSER_PUBLIC_LITERAL
;
4321 while ((IS_PUBIDCHAR_CH(cur
)) && (cur
!= stop
)) { /* checked */
4322 if (len
+ 1 >= size
) {
4326 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4328 xmlErrMemory(ctxt
, NULL
);
4340 if (ctxt
->instate
== XML_PARSER_EOF
) {
4352 if (len
> maxLength
) {
4353 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Public ID");
4360 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4364 ctxt
->instate
= oldstate
;
4368 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
);
4371 * used for the test in the inner loop of the char data testing
4373 static const unsigned char test_char_data
[256] = {
4374 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4375 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4376 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4377 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4378 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4379 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4380 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4381 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4382 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4383 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4384 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4385 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4386 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4387 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4388 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4389 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4410 * @ctxt: an XML parser context
4411 * @cdata: int indicating whether we are within a CDATA section
4413 * parse a CharData section.
4414 * if we are within a CDATA section ']]>' marks an end of section.
4416 * The right angle bracket (>) may be represented using the string ">",
4417 * and must, for compatibility, be escaped using ">" or a character
4418 * reference when it appears in the string "]]>" in content, when that
4419 * string is not marking the end of a CDATA section.
4421 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4425 xmlParseCharData(xmlParserCtxtPtr ctxt
, int cdata
) {
4428 int line
= ctxt
->input
->line
;
4429 int col
= ctxt
->input
->col
;
4435 * Accelerated common case where input don't need to be
4436 * modified before passing it to the handler.
4439 in
= ctxt
->input
->cur
;
4442 while (*in
== 0x20) { in
++; ctxt
->input
->col
++; }
4445 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4447 } while (*in
== 0xA);
4448 goto get_more_space
;
4451 nbchar
= in
- ctxt
->input
->cur
;
4453 const xmlChar
*tmp
= ctxt
->input
->cur
;
4454 ctxt
->input
->cur
= in
;
4456 if ((ctxt
->sax
!= NULL
) &&
4457 (ctxt
->sax
->ignorableWhitespace
!=
4458 ctxt
->sax
->characters
)) {
4459 if (areBlanks(ctxt
, tmp
, nbchar
, 1)) {
4460 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4461 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4464 if (ctxt
->sax
->characters
!= NULL
)
4465 ctxt
->sax
->characters(ctxt
->userData
,
4467 if (*ctxt
->space
== -1)
4470 } else if ((ctxt
->sax
!= NULL
) &&
4471 (ctxt
->sax
->characters
!= NULL
)) {
4472 ctxt
->sax
->characters(ctxt
->userData
,
4480 ccol
= ctxt
->input
->col
;
4481 while (test_char_data
[*in
]) {
4485 ctxt
->input
->col
= ccol
;
4488 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4490 } while (*in
== 0xA);
4494 if ((in
[1] == ']') && (in
[2] == '>')) {
4495 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4496 ctxt
->input
->cur
= in
+ 1;
4503 nbchar
= in
- ctxt
->input
->cur
;
4505 if ((ctxt
->sax
!= NULL
) &&
4506 (ctxt
->sax
->ignorableWhitespace
!=
4507 ctxt
->sax
->characters
) &&
4508 (IS_BLANK_CH(*ctxt
->input
->cur
))) {
4509 const xmlChar
*tmp
= ctxt
->input
->cur
;
4510 ctxt
->input
->cur
= in
;
4512 if (areBlanks(ctxt
, tmp
, nbchar
, 0)) {
4513 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4514 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4517 if (ctxt
->sax
->characters
!= NULL
)
4518 ctxt
->sax
->characters(ctxt
->userData
,
4520 if (*ctxt
->space
== -1)
4523 line
= ctxt
->input
->line
;
4524 col
= ctxt
->input
->col
;
4525 } else if (ctxt
->sax
!= NULL
) {
4526 if (ctxt
->sax
->characters
!= NULL
)
4527 ctxt
->sax
->characters(ctxt
->userData
,
4528 ctxt
->input
->cur
, nbchar
);
4529 line
= ctxt
->input
->line
;
4530 col
= ctxt
->input
->col
;
4532 /* something really bad happened in the SAX callback */
4533 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4536 ctxt
->input
->cur
= in
;
4540 ctxt
->input
->cur
= in
;
4542 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4543 continue; /* while */
4555 if (ctxt
->instate
== XML_PARSER_EOF
)
4557 in
= ctxt
->input
->cur
;
4558 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09) || (*in
== 0x0a));
4561 ctxt
->input
->line
= line
;
4562 ctxt
->input
->col
= col
;
4563 xmlParseCharDataComplex(ctxt
, cdata
);
4567 * xmlParseCharDataComplex:
4568 * @ctxt: an XML parser context
4569 * @cdata: int indicating whether we are within a CDATA section
4571 * parse a CharData section.this is the fallback function
4572 * of xmlParseCharData() when the parsing requires handling
4573 * of non-ASCII characters.
4576 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
) {
4577 xmlChar buf
[XML_PARSER_BIG_BUFFER_SIZE
+ 5];
4585 while ((cur
!= '<') && /* checked */
4587 (IS_CHAR(cur
))) /* test also done in xmlCurrentChar() */ {
4588 if ((cur
== ']') && (NXT(1) == ']') &&
4592 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4595 COPY_BUF(l
,buf
,nbchar
,cur
);
4596 /* move current position before possible calling of ctxt->sax->characters */
4599 if (nbchar
>= XML_PARSER_BIG_BUFFER_SIZE
) {
4603 * OK the segment is to be consumed as chars.
4605 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4606 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4607 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4608 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4611 if (ctxt
->sax
->characters
!= NULL
)
4612 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4613 if ((ctxt
->sax
->characters
!=
4614 ctxt
->sax
->ignorableWhitespace
) &&
4615 (*ctxt
->space
== -1))
4620 /* something really bad happened in the SAX callback */
4621 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4629 if (ctxt
->instate
== XML_PARSER_EOF
)
4636 * OK the segment is to be consumed as chars.
4638 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4639 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4640 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4641 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
, buf
, nbchar
);
4643 if (ctxt
->sax
->characters
!= NULL
)
4644 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4645 if ((ctxt
->sax
->characters
!= ctxt
->sax
->ignorableWhitespace
) &&
4646 (*ctxt
->space
== -1))
4651 if ((cur
!= 0) && (!IS_CHAR(cur
))) {
4652 /* Generate the error and skip the offending character */
4653 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4654 "PCDATA invalid Char value %d\n",
4661 * xmlParseExternalID:
4662 * @ctxt: an XML parser context
4663 * @publicID: a xmlChar** receiving PubidLiteral
4664 * @strict: indicate whether we should restrict parsing to only
4665 * production [75], see NOTE below
4667 * Parse an External ID or a Public ID
4669 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4670 * 'PUBLIC' S PubidLiteral S SystemLiteral
4672 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4673 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4675 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4677 * Returns the function returns SystemLiteral and in the second
4678 * case publicID receives PubidLiteral, is strict is off
4679 * it is possible to return NULL and have publicID set.
4683 xmlParseExternalID(xmlParserCtxtPtr ctxt
, xmlChar
**publicID
, int strict
) {
4684 xmlChar
*URI
= NULL
;
4689 if (CMP6(CUR_PTR
, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4691 if (SKIP_BLANKS
== 0) {
4692 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4693 "Space required after 'SYSTEM'\n");
4695 URI
= xmlParseSystemLiteral(ctxt
);
4697 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4699 } else if (CMP6(CUR_PTR
, 'P', 'U', 'B', 'L', 'I', 'C')) {
4701 if (SKIP_BLANKS
== 0) {
4702 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4703 "Space required after 'PUBLIC'\n");
4705 *publicID
= xmlParsePubidLiteral(ctxt
);
4706 if (*publicID
== NULL
) {
4707 xmlFatalErr(ctxt
, XML_ERR_PUBID_REQUIRED
, NULL
);
4711 * We don't handle [83] so "S SystemLiteral" is required.
4713 if (SKIP_BLANKS
== 0) {
4714 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4715 "Space required after the Public Identifier\n");
4719 * We handle [83] so we return immediately, if
4720 * "S SystemLiteral" is not detected. We skip blanks if no
4721 * system literal was found, but this is harmless since we must
4722 * be at the end of a NotationDecl.
4724 if (SKIP_BLANKS
== 0) return(NULL
);
4725 if ((CUR
!= '\'') && (CUR
!= '"')) return(NULL
);
4727 URI
= xmlParseSystemLiteral(ctxt
);
4729 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4736 * xmlParseCommentComplex:
4737 * @ctxt: an XML parser context
4738 * @buf: the already parsed part of the buffer
4739 * @len: number of bytes in the buffer
4740 * @size: allocated size of the buffer
4742 * Skip an XML (SGML) comment <!-- .... -->
4743 * The spec says that "For compatibility, the string "--" (double-hyphen)
4744 * must not occur within comments. "
4745 * This is the slow routine in case the accelerator for ascii didn't work
4747 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4750 xmlParseCommentComplex(xmlParserCtxtPtr ctxt
, xmlChar
*buf
,
4751 size_t len
, size_t size
) {
4756 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4757 XML_MAX_HUGE_LENGTH
:
4758 XML_MAX_TEXT_LENGTH
;
4761 inputid
= ctxt
->input
->id
;
4765 size
= XML_PARSER_BUFFER_SIZE
;
4766 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4768 xmlErrMemory(ctxt
, NULL
);
4772 GROW
; /* Assure there's enough input data */
4775 goto not_terminated
;
4777 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4778 "xmlParseComment: invalid xmlChar value %d\n",
4786 goto not_terminated
;
4788 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4789 "xmlParseComment: invalid xmlChar value %d\n",
4797 goto not_terminated
;
4798 while (IS_CHAR(cur
) && /* checked */
4800 (r
!= '-') || (q
!= '-'))) {
4801 if ((r
== '-') && (q
== '-')) {
4802 xmlFatalErr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
, NULL
);
4804 if (len
+ 5 >= size
) {
4808 new_size
= size
* 2;
4809 new_buf
= (xmlChar
*) xmlRealloc(buf
, new_size
);
4810 if (new_buf
== NULL
) {
4812 xmlErrMemory(ctxt
, NULL
);
4818 COPY_BUF(ql
,buf
,len
,q
);
4829 if (ctxt
->instate
== XML_PARSER_EOF
) {
4842 if (len
> maxLength
) {
4843 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4844 "Comment too big found", NULL
);
4851 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4852 "Comment not terminated \n<!--%.50s\n", buf
);
4853 } else if (!IS_CHAR(cur
)) {
4854 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4855 "xmlParseComment: invalid xmlChar value %d\n",
4858 if (inputid
!= ctxt
->input
->id
) {
4859 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4860 "Comment doesn't start and stop in the same"
4864 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4865 (!ctxt
->disableSAX
))
4866 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4871 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4872 "Comment not terminated\n", NULL
);
4879 * @ctxt: an XML parser context
4881 * Skip an XML (SGML) comment <!-- .... -->
4882 * The spec says that "For compatibility, the string "--" (double-hyphen)
4883 * must not occur within comments. "
4885 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4888 xmlParseComment(xmlParserCtxtPtr ctxt
) {
4889 xmlChar
*buf
= NULL
;
4890 size_t size
= XML_PARSER_BUFFER_SIZE
;
4892 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4893 XML_MAX_HUGE_LENGTH
:
4894 XML_MAX_TEXT_LENGTH
;
4895 xmlParserInputState state
;
4902 * Check that there is a comment right here.
4904 if ((RAW
!= '<') || (NXT(1) != '!') ||
4905 (NXT(2) != '-') || (NXT(3) != '-')) return;
4906 state
= ctxt
->instate
;
4907 ctxt
->instate
= XML_PARSER_COMMENT
;
4908 inputid
= ctxt
->input
->id
;
4914 * Accelerated common case where input don't need to be
4915 * modified before passing it to the handler.
4917 in
= ctxt
->input
->cur
;
4921 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4923 } while (*in
== 0xA);
4926 ccol
= ctxt
->input
->col
;
4927 while (((*in
> '-') && (*in
<= 0x7F)) ||
4928 ((*in
>= 0x20) && (*in
< '-')) ||
4933 ctxt
->input
->col
= ccol
;
4936 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4938 } while (*in
== 0xA);
4941 nbchar
= in
- ctxt
->input
->cur
;
4943 * save current set of data
4946 if ((ctxt
->sax
!= NULL
) &&
4947 (ctxt
->sax
->comment
!= NULL
)) {
4949 if ((*in
== '-') && (in
[1] == '-'))
4952 size
= XML_PARSER_BUFFER_SIZE
+ nbchar
;
4953 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4955 xmlErrMemory(ctxt
, NULL
);
4956 ctxt
->instate
= state
;
4960 } else if (len
+ nbchar
+ 1 >= size
) {
4962 size
+= len
+ nbchar
+ XML_PARSER_BUFFER_SIZE
;
4963 new_buf
= (xmlChar
*) xmlRealloc(buf
,
4964 size
* sizeof(xmlChar
));
4965 if (new_buf
== NULL
) {
4967 xmlErrMemory(ctxt
, NULL
);
4968 ctxt
->instate
= state
;
4973 memcpy(&buf
[len
], ctxt
->input
->cur
, nbchar
);
4978 if (len
> maxLength
) {
4979 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4980 "Comment too big found", NULL
);
4984 ctxt
->input
->cur
= in
;
4987 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4992 ctxt
->input
->cur
= in
;
4994 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
5001 if (ctxt
->instate
== XML_PARSER_EOF
) {
5005 in
= ctxt
->input
->cur
;
5009 if (ctxt
->input
->id
!= inputid
) {
5010 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5011 "comment doesn't start and stop in the"
5015 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
5016 (!ctxt
->disableSAX
)) {
5018 ctxt
->sax
->comment(ctxt
->userData
, buf
);
5020 ctxt
->sax
->comment(ctxt
->userData
, BAD_CAST
"");
5024 if (ctxt
->instate
!= XML_PARSER_EOF
)
5025 ctxt
->instate
= state
;
5029 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5030 "Double hyphen within comment: "
5034 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5035 "Double hyphen within comment\n", NULL
);
5036 if (ctxt
->instate
== XML_PARSER_EOF
) {
5047 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09) || (*in
== 0x0a));
5048 xmlParseCommentComplex(ctxt
, buf
, len
, size
);
5049 ctxt
->instate
= state
;
5056 * @ctxt: an XML parser context
5058 * parse the name of a PI
5060 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5062 * Returns the PITarget name or NULL
5066 xmlParsePITarget(xmlParserCtxtPtr ctxt
) {
5067 const xmlChar
*name
;
5069 name
= xmlParseName(ctxt
);
5070 if ((name
!= NULL
) &&
5071 ((name
[0] == 'x') || (name
[0] == 'X')) &&
5072 ((name
[1] == 'm') || (name
[1] == 'M')) &&
5073 ((name
[2] == 'l') || (name
[2] == 'L'))) {
5075 if ((name
[0] == 'x') && (name
[1] == 'm') &&
5076 (name
[2] == 'l') && (name
[3] == 0)) {
5077 xmlFatalErrMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5078 "XML declaration allowed only at the start of the document\n");
5080 } else if (name
[3] == 0) {
5081 xmlFatalErr(ctxt
, XML_ERR_RESERVED_XML_NAME
, NULL
);
5085 if (xmlW3CPIs
[i
] == NULL
) break;
5086 if (xmlStrEqual(name
, (const xmlChar
*)xmlW3CPIs
[i
]))
5089 xmlWarningMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5090 "xmlParsePITarget: invalid name prefix 'xml'\n",
5093 if ((name
!= NULL
) && (xmlStrchr(name
, ':') != NULL
)) {
5094 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5095 "colons are forbidden from PI names '%s'\n", name
, NULL
, NULL
);
5100 #ifdef LIBXML_CATALOG_ENABLED
5102 * xmlParseCatalogPI:
5103 * @ctxt: an XML parser context
5104 * @catalog: the PI value string
5106 * parse an XML Catalog Processing Instruction.
5108 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5110 * Occurs only if allowed by the user and if happening in the Misc
5111 * part of the document before any doctype information
5112 * This will add the given catalog to the parsing context in order
5113 * to be used if there is a resolution need further down in the document
5117 xmlParseCatalogPI(xmlParserCtxtPtr ctxt
, const xmlChar
*catalog
) {
5118 xmlChar
*URL
= NULL
;
5119 const xmlChar
*tmp
, *base
;
5123 while (IS_BLANK_CH(*tmp
)) tmp
++;
5124 if (xmlStrncmp(tmp
, BAD_CAST
"catalog", 7))
5127 while (IS_BLANK_CH(*tmp
)) tmp
++;
5132 while (IS_BLANK_CH(*tmp
)) tmp
++;
5134 if ((marker
!= '\'') && (marker
!= '"'))
5138 while ((*tmp
!= 0) && (*tmp
!= marker
)) tmp
++;
5141 URL
= xmlStrndup(base
, tmp
- base
);
5143 while (IS_BLANK_CH(*tmp
)) tmp
++;
5148 ctxt
->catalogs
= xmlCatalogAddLocal(ctxt
->catalogs
, URL
);
5154 xmlWarningMsg(ctxt
, XML_WAR_CATALOG_PI
,
5155 "Catalog PI syntax error: %s\n",
5164 * @ctxt: an XML parser context
5166 * parse an XML Processing Instruction.
5168 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5170 * The processing is transferred to SAX once parsed.
5174 xmlParsePI(xmlParserCtxtPtr ctxt
) {
5175 xmlChar
*buf
= NULL
;
5177 size_t size
= XML_PARSER_BUFFER_SIZE
;
5178 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
5179 XML_MAX_HUGE_LENGTH
:
5180 XML_MAX_TEXT_LENGTH
;
5182 const xmlChar
*target
;
5183 xmlParserInputState state
;
5186 if ((RAW
== '<') && (NXT(1) == '?')) {
5187 int inputid
= ctxt
->input
->id
;
5188 state
= ctxt
->instate
;
5189 ctxt
->instate
= XML_PARSER_PI
;
5191 * this is a Processing Instruction.
5197 * Parse the target name and check for special support like
5200 target
= xmlParsePITarget(ctxt
);
5201 if (target
!= NULL
) {
5202 if ((RAW
== '?') && (NXT(1) == '>')) {
5203 if (inputid
!= ctxt
->input
->id
) {
5204 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5205 "PI declaration doesn't start and stop in"
5206 " the same entity\n");
5213 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5214 (ctxt
->sax
->processingInstruction
!= NULL
))
5215 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5217 if (ctxt
->instate
!= XML_PARSER_EOF
)
5218 ctxt
->instate
= state
;
5221 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
5223 xmlErrMemory(ctxt
, NULL
);
5224 ctxt
->instate
= state
;
5227 if (SKIP_BLANKS
== 0) {
5228 xmlFatalErrMsgStr(ctxt
, XML_ERR_SPACE_REQUIRED
,
5229 "ParsePI: PI %s space expected\n", target
);
5232 while (IS_CHAR(cur
) && /* checked */
5233 ((cur
!= '?') || (NXT(1) != '>'))) {
5234 if (len
+ 5 >= size
) {
5236 size_t new_size
= size
* 2;
5237 tmp
= (xmlChar
*) xmlRealloc(buf
, new_size
);
5239 xmlErrMemory(ctxt
, NULL
);
5241 ctxt
->instate
= state
;
5251 if (ctxt
->instate
== XML_PARSER_EOF
) {
5257 COPY_BUF(l
,buf
,len
,cur
);
5265 if (len
> maxLength
) {
5266 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5267 "PI %s too big found", target
);
5269 ctxt
->instate
= state
;
5275 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5276 "ParsePI: PI %s never end ...\n", target
);
5278 if (inputid
!= ctxt
->input
->id
) {
5279 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5280 "PI declaration doesn't start and stop in"
5281 " the same entity\n");
5285 #ifdef LIBXML_CATALOG_ENABLED
5286 if (((state
== XML_PARSER_MISC
) ||
5287 (state
== XML_PARSER_START
)) &&
5288 (xmlStrEqual(target
, XML_CATALOG_PI
))) {
5289 xmlCatalogAllow allow
= xmlCatalogGetDefaults();
5290 if ((allow
== XML_CATA_ALLOW_DOCUMENT
) ||
5291 (allow
== XML_CATA_ALLOW_ALL
))
5292 xmlParseCatalogPI(ctxt
, buf
);
5300 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5301 (ctxt
->sax
->processingInstruction
!= NULL
))
5302 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5307 xmlFatalErr(ctxt
, XML_ERR_PI_NOT_STARTED
, NULL
);
5309 if (ctxt
->instate
!= XML_PARSER_EOF
)
5310 ctxt
->instate
= state
;
5315 * xmlParseNotationDecl:
5316 * @ctxt: an XML parser context
5318 * parse a notation declaration
5320 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5322 * Hence there is actually 3 choices:
5323 * 'PUBLIC' S PubidLiteral
5324 * 'PUBLIC' S PubidLiteral S SystemLiteral
5325 * and 'SYSTEM' S SystemLiteral
5327 * See the NOTE on xmlParseExternalID().
5331 xmlParseNotationDecl(xmlParserCtxtPtr ctxt
) {
5332 const xmlChar
*name
;
5336 if (CMP10(CUR_PTR
, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5337 int inputid
= ctxt
->input
->id
;
5340 if (SKIP_BLANKS
== 0) {
5341 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5342 "Space required after '<!NOTATION'\n");
5346 name
= xmlParseName(ctxt
);
5348 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5351 if (xmlStrchr(name
, ':') != NULL
) {
5352 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5353 "colons are forbidden from notation names '%s'\n",
5356 if (SKIP_BLANKS
== 0) {
5357 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5358 "Space required after the NOTATION name'\n");
5365 Systemid
= xmlParseExternalID(ctxt
, &Pubid
, 0);
5369 if (inputid
!= ctxt
->input
->id
) {
5370 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5371 "Notation declaration doesn't start and stop"
5372 " in the same entity\n");
5375 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5376 (ctxt
->sax
->notationDecl
!= NULL
))
5377 ctxt
->sax
->notationDecl(ctxt
->userData
, name
, Pubid
, Systemid
);
5379 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5381 if (Systemid
!= NULL
) xmlFree(Systemid
);
5382 if (Pubid
!= NULL
) xmlFree(Pubid
);
5387 * xmlParseEntityDecl:
5388 * @ctxt: an XML parser context
5390 * parse <!ENTITY declarations
5392 * [70] EntityDecl ::= GEDecl | PEDecl
5394 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5396 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5398 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5400 * [74] PEDef ::= EntityValue | ExternalID
5402 * [76] NDataDecl ::= S 'NDATA' S Name
5404 * [ VC: Notation Declared ]
5405 * The Name must match the declared name of a notation.
5409 xmlParseEntityDecl(xmlParserCtxtPtr ctxt
) {
5410 const xmlChar
*name
= NULL
;
5411 xmlChar
*value
= NULL
;
5412 xmlChar
*URI
= NULL
, *literal
= NULL
;
5413 const xmlChar
*ndata
= NULL
;
5414 int isParameter
= 0;
5415 xmlChar
*orig
= NULL
;
5417 /* GROW; done in the caller */
5418 if (CMP8(CUR_PTR
, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5419 int inputid
= ctxt
->input
->id
;
5422 if (SKIP_BLANKS
== 0) {
5423 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5424 "Space required after '<!ENTITY'\n");
5429 if (SKIP_BLANKS
== 0) {
5430 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5431 "Space required after '%%'\n");
5436 name
= xmlParseName(ctxt
);
5438 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5439 "xmlParseEntityDecl: no name\n");
5442 if (xmlStrchr(name
, ':') != NULL
) {
5443 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5444 "colons are forbidden from entities names '%s'\n",
5447 if (SKIP_BLANKS
== 0) {
5448 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5449 "Space required after the entity name\n");
5452 ctxt
->instate
= XML_PARSER_ENTITY_DECL
;
5454 * handle the various case of definitions...
5457 if ((RAW
== '"') || (RAW
== '\'')) {
5458 value
= xmlParseEntityValue(ctxt
, &orig
);
5460 if ((ctxt
->sax
!= NULL
) &&
5461 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5462 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5463 XML_INTERNAL_PARAMETER_ENTITY
,
5467 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5468 if ((URI
== NULL
) && (literal
== NULL
)) {
5469 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5474 uri
= xmlParseURI((const char *) URI
);
5476 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5477 "Invalid URI: %s\n", URI
);
5479 * This really ought to be a well formedness error
5480 * but the XML Core WG decided otherwise c.f. issue
5481 * E26 of the XML erratas.
5484 if (uri
->fragment
!= NULL
) {
5486 * Okay this is foolish to block those but not
5489 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5491 if ((ctxt
->sax
!= NULL
) &&
5492 (!ctxt
->disableSAX
) &&
5493 (ctxt
->sax
->entityDecl
!= NULL
))
5494 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5495 XML_EXTERNAL_PARAMETER_ENTITY
,
5496 literal
, URI
, NULL
);
5503 if ((RAW
== '"') || (RAW
== '\'')) {
5504 value
= xmlParseEntityValue(ctxt
, &orig
);
5505 if ((ctxt
->sax
!= NULL
) &&
5506 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5507 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5508 XML_INTERNAL_GENERAL_ENTITY
,
5511 * For expat compatibility in SAX mode.
5513 if ((ctxt
->myDoc
== NULL
) ||
5514 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
5515 if (ctxt
->myDoc
== NULL
) {
5516 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5517 if (ctxt
->myDoc
== NULL
) {
5518 xmlErrMemory(ctxt
, "New Doc failed");
5521 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5523 if (ctxt
->myDoc
->intSubset
== NULL
)
5524 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5525 BAD_CAST
"fake", NULL
, NULL
);
5527 xmlSAX2EntityDecl(ctxt
, name
, XML_INTERNAL_GENERAL_ENTITY
,
5531 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5532 if ((URI
== NULL
) && (literal
== NULL
)) {
5533 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5538 uri
= xmlParseURI((const char *)URI
);
5540 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5541 "Invalid URI: %s\n", URI
);
5543 * This really ought to be a well formedness error
5544 * but the XML Core WG decided otherwise c.f. issue
5545 * E26 of the XML erratas.
5548 if (uri
->fragment
!= NULL
) {
5550 * Okay this is foolish to block those but not
5553 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5558 if ((RAW
!= '>') && (SKIP_BLANKS
== 0)) {
5559 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5560 "Space required before 'NDATA'\n");
5562 if (CMP5(CUR_PTR
, 'N', 'D', 'A', 'T', 'A')) {
5564 if (SKIP_BLANKS
== 0) {
5565 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5566 "Space required after 'NDATA'\n");
5568 ndata
= xmlParseName(ctxt
);
5569 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5570 (ctxt
->sax
->unparsedEntityDecl
!= NULL
))
5571 ctxt
->sax
->unparsedEntityDecl(ctxt
->userData
, name
,
5572 literal
, URI
, ndata
);
5574 if ((ctxt
->sax
!= NULL
) &&
5575 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5576 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5577 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5578 literal
, URI
, NULL
);
5580 * For expat compatibility in SAX mode.
5581 * assuming the entity replacement was asked for
5583 if ((ctxt
->replaceEntities
!= 0) &&
5584 ((ctxt
->myDoc
== NULL
) ||
5585 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
)))) {
5586 if (ctxt
->myDoc
== NULL
) {
5587 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5588 if (ctxt
->myDoc
== NULL
) {
5589 xmlErrMemory(ctxt
, "New Doc failed");
5592 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5595 if (ctxt
->myDoc
->intSubset
== NULL
)
5596 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5597 BAD_CAST
"fake", NULL
, NULL
);
5598 xmlSAX2EntityDecl(ctxt
, name
,
5599 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5600 literal
, URI
, NULL
);
5605 if (ctxt
->instate
== XML_PARSER_EOF
)
5609 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
5610 "xmlParseEntityDecl: entity %s not terminated\n", name
);
5611 xmlHaltParser(ctxt
);
5613 if (inputid
!= ctxt
->input
->id
) {
5614 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5615 "Entity declaration doesn't start and stop in"
5616 " the same entity\n");
5622 * Ugly mechanism to save the raw entity value.
5624 xmlEntityPtr cur
= NULL
;
5627 if ((ctxt
->sax
!= NULL
) &&
5628 (ctxt
->sax
->getParameterEntity
!= NULL
))
5629 cur
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
5631 if ((ctxt
->sax
!= NULL
) &&
5632 (ctxt
->sax
->getEntity
!= NULL
))
5633 cur
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
5634 if ((cur
== NULL
) && (ctxt
->userData
==ctxt
)) {
5635 cur
= xmlSAX2GetEntity(ctxt
, name
);
5638 if ((cur
!= NULL
) && (cur
->orig
== NULL
)) {
5645 if (value
!= NULL
) xmlFree(value
);
5646 if (URI
!= NULL
) xmlFree(URI
);
5647 if (literal
!= NULL
) xmlFree(literal
);
5648 if (orig
!= NULL
) xmlFree(orig
);
5653 * xmlParseDefaultDecl:
5654 * @ctxt: an XML parser context
5655 * @value: Receive a possible fixed default value for the attribute
5657 * Parse an attribute default declaration
5659 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5661 * [ VC: Required Attribute ]
5662 * if the default declaration is the keyword #REQUIRED, then the
5663 * attribute must be specified for all elements of the type in the
5664 * attribute-list declaration.
5666 * [ VC: Attribute Default Legal ]
5667 * The declared default value must meet the lexical constraints of
5668 * the declared attribute type c.f. xmlValidateAttributeDecl()
5670 * [ VC: Fixed Attribute Default ]
5671 * if an attribute has a default value declared with the #FIXED
5672 * keyword, instances of that attribute must match the default value.
5674 * [ WFC: No < in Attribute Values ]
5675 * handled in xmlParseAttValue()
5677 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5678 * or XML_ATTRIBUTE_FIXED.
5682 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
5687 if (CMP9(CUR_PTR
, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5689 return(XML_ATTRIBUTE_REQUIRED
);
5691 if (CMP8(CUR_PTR
, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5693 return(XML_ATTRIBUTE_IMPLIED
);
5695 val
= XML_ATTRIBUTE_NONE
;
5696 if (CMP6(CUR_PTR
, '#', 'F', 'I', 'X', 'E', 'D')) {
5698 val
= XML_ATTRIBUTE_FIXED
;
5699 if (SKIP_BLANKS
== 0) {
5700 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5701 "Space required after '#FIXED'\n");
5704 ret
= xmlParseAttValue(ctxt
);
5705 ctxt
->instate
= XML_PARSER_DTD
;
5707 xmlFatalErrMsg(ctxt
, (xmlParserErrors
)ctxt
->errNo
,
5708 "Attribute default value declaration error\n");
5715 * xmlParseNotationType:
5716 * @ctxt: an XML parser context
5718 * parse an Notation attribute type.
5720 * Note: the leading 'NOTATION' S part has already being parsed...
5722 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5724 * [ VC: Notation Attributes ]
5725 * Values of this type must match one of the notation names included
5726 * in the declaration; all notation names in the declaration must be declared.
5728 * Returns: the notation attribute tree built while parsing
5732 xmlParseNotationType(xmlParserCtxtPtr ctxt
) {
5733 const xmlChar
*name
;
5734 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5737 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5744 name
= xmlParseName(ctxt
);
5746 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5747 "Name expected in NOTATION declaration\n");
5748 xmlFreeEnumeration(ret
);
5752 while (tmp
!= NULL
) {
5753 if (xmlStrEqual(name
, tmp
->name
)) {
5754 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5755 "standalone: attribute notation value token %s duplicated\n",
5757 if (!xmlDictOwns(ctxt
->dict
, name
))
5758 xmlFree((xmlChar
*) name
);
5764 cur
= xmlCreateEnumeration(name
);
5766 xmlFreeEnumeration(ret
);
5769 if (last
== NULL
) ret
= last
= cur
;
5776 } while (RAW
== '|');
5778 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5779 xmlFreeEnumeration(ret
);
5787 * xmlParseEnumerationType:
5788 * @ctxt: an XML parser context
5790 * parse an Enumeration attribute type.
5792 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5794 * [ VC: Enumeration ]
5795 * Values of this type must match one of the Nmtoken tokens in
5798 * Returns: the enumeration attribute tree built while parsing
5802 xmlParseEnumerationType(xmlParserCtxtPtr ctxt
) {
5804 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5807 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_STARTED
, NULL
);
5814 name
= xmlParseNmtoken(ctxt
);
5816 xmlFatalErr(ctxt
, XML_ERR_NMTOKEN_REQUIRED
, NULL
);
5820 while (tmp
!= NULL
) {
5821 if (xmlStrEqual(name
, tmp
->name
)) {
5822 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5823 "standalone: attribute enumeration value token %s duplicated\n",
5825 if (!xmlDictOwns(ctxt
->dict
, name
))
5832 cur
= xmlCreateEnumeration(name
);
5833 if (!xmlDictOwns(ctxt
->dict
, name
))
5836 xmlFreeEnumeration(ret
);
5839 if (last
== NULL
) ret
= last
= cur
;
5846 } while (RAW
== '|');
5848 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_FINISHED
, NULL
);
5856 * xmlParseEnumeratedType:
5857 * @ctxt: an XML parser context
5858 * @tree: the enumeration tree built while parsing
5860 * parse an Enumerated attribute type.
5862 * [57] EnumeratedType ::= NotationType | Enumeration
5864 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5867 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5871 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5872 if (CMP8(CUR_PTR
, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5874 if (SKIP_BLANKS
== 0) {
5875 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5876 "Space required after 'NOTATION'\n");
5879 *tree
= xmlParseNotationType(ctxt
);
5880 if (*tree
== NULL
) return(0);
5881 return(XML_ATTRIBUTE_NOTATION
);
5883 *tree
= xmlParseEnumerationType(ctxt
);
5884 if (*tree
== NULL
) return(0);
5885 return(XML_ATTRIBUTE_ENUMERATION
);
5889 * xmlParseAttributeType:
5890 * @ctxt: an XML parser context
5891 * @tree: the enumeration tree built while parsing
5893 * parse the Attribute list def for an element
5895 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5897 * [55] StringType ::= 'CDATA'
5899 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5900 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5902 * Validity constraints for attribute values syntax are checked in
5903 * xmlValidateAttributeValue()
5906 * Values of type ID must match the Name production. A name must not
5907 * appear more than once in an XML document as a value of this type;
5908 * i.e., ID values must uniquely identify the elements which bear them.
5910 * [ VC: One ID per Element Type ]
5911 * No element type may have more than one ID attribute specified.
5913 * [ VC: ID Attribute Default ]
5914 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5917 * Values of type IDREF must match the Name production, and values
5918 * of type IDREFS must match Names; each IDREF Name must match the value
5919 * of an ID attribute on some element in the XML document; i.e. IDREF
5920 * values must match the value of some ID attribute.
5922 * [ VC: Entity Name ]
5923 * Values of type ENTITY must match the Name production, values
5924 * of type ENTITIES must match Names; each Entity Name must match the
5925 * name of an unparsed entity declared in the DTD.
5927 * [ VC: Name Token ]
5928 * Values of type NMTOKEN must match the Nmtoken production; values
5929 * of type NMTOKENS must match Nmtokens.
5931 * Returns the attribute type
5934 xmlParseAttributeType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5936 if (CMP5(CUR_PTR
, 'C', 'D', 'A', 'T', 'A')) {
5938 return(XML_ATTRIBUTE_CDATA
);
5939 } else if (CMP6(CUR_PTR
, 'I', 'D', 'R', 'E', 'F', 'S')) {
5941 return(XML_ATTRIBUTE_IDREFS
);
5942 } else if (CMP5(CUR_PTR
, 'I', 'D', 'R', 'E', 'F')) {
5944 return(XML_ATTRIBUTE_IDREF
);
5945 } else if ((RAW
== 'I') && (NXT(1) == 'D')) {
5947 return(XML_ATTRIBUTE_ID
);
5948 } else if (CMP6(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5950 return(XML_ATTRIBUTE_ENTITY
);
5951 } else if (CMP8(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5953 return(XML_ATTRIBUTE_ENTITIES
);
5954 } else if (CMP8(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5956 return(XML_ATTRIBUTE_NMTOKENS
);
5957 } else if (CMP7(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5959 return(XML_ATTRIBUTE_NMTOKEN
);
5961 return(xmlParseEnumeratedType(ctxt
, tree
));
5965 * xmlParseAttributeListDecl:
5966 * @ctxt: an XML parser context
5968 * : parse the Attribute list def for an element
5970 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5972 * [53] AttDef ::= S Name S AttType S DefaultDecl
5976 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt
) {
5977 const xmlChar
*elemName
;
5978 const xmlChar
*attrName
;
5979 xmlEnumerationPtr tree
;
5981 if (CMP9(CUR_PTR
, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5982 int inputid
= ctxt
->input
->id
;
5985 if (SKIP_BLANKS
== 0) {
5986 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5987 "Space required after '<!ATTLIST'\n");
5989 elemName
= xmlParseName(ctxt
);
5990 if (elemName
== NULL
) {
5991 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5992 "ATTLIST: no name for Element\n");
5997 while ((RAW
!= '>') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6000 xmlChar
*defaultValue
= NULL
;
6004 attrName
= xmlParseName(ctxt
);
6005 if (attrName
== NULL
) {
6006 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6007 "ATTLIST: no name for Attribute\n");
6011 if (SKIP_BLANKS
== 0) {
6012 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6013 "Space required after the attribute name\n");
6017 type
= xmlParseAttributeType(ctxt
, &tree
);
6023 if (SKIP_BLANKS
== 0) {
6024 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6025 "Space required after the attribute type\n");
6027 xmlFreeEnumeration(tree
);
6031 def
= xmlParseDefaultDecl(ctxt
, &defaultValue
);
6033 if (defaultValue
!= NULL
)
6034 xmlFree(defaultValue
);
6036 xmlFreeEnumeration(tree
);
6039 if ((type
!= XML_ATTRIBUTE_CDATA
) && (defaultValue
!= NULL
))
6040 xmlAttrNormalizeSpace(defaultValue
, defaultValue
);
6044 if (SKIP_BLANKS
== 0) {
6045 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6046 "Space required after the attribute default value\n");
6047 if (defaultValue
!= NULL
)
6048 xmlFree(defaultValue
);
6050 xmlFreeEnumeration(tree
);
6054 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6055 (ctxt
->sax
->attributeDecl
!= NULL
))
6056 ctxt
->sax
->attributeDecl(ctxt
->userData
, elemName
, attrName
,
6057 type
, def
, defaultValue
, tree
);
6058 else if (tree
!= NULL
)
6059 xmlFreeEnumeration(tree
);
6061 if ((ctxt
->sax2
) && (defaultValue
!= NULL
) &&
6062 (def
!= XML_ATTRIBUTE_IMPLIED
) &&
6063 (def
!= XML_ATTRIBUTE_REQUIRED
)) {
6064 xmlAddDefAttrs(ctxt
, elemName
, attrName
, defaultValue
);
6067 xmlAddSpecialAttr(ctxt
, elemName
, attrName
, type
);
6069 if (defaultValue
!= NULL
)
6070 xmlFree(defaultValue
);
6074 if (inputid
!= ctxt
->input
->id
) {
6075 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6076 "Attribute list declaration doesn't start and"
6077 " stop in the same entity\n");
6085 * xmlParseElementMixedContentDecl:
6086 * @ctxt: an XML parser context
6087 * @inputchk: the input used for the current entity, needed for boundary checks
6089 * parse the declaration for a Mixed Element content
6090 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6092 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6093 * '(' S? '#PCDATA' S? ')'
6095 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6097 * [ VC: No Duplicate Types ]
6098 * The same name must not appear more than once in a single
6099 * mixed-content declaration.
6101 * returns: the list of the xmlElementContentPtr describing the element choices
6103 xmlElementContentPtr
6104 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6105 xmlElementContentPtr ret
= NULL
, cur
= NULL
, n
;
6106 const xmlChar
*elem
= NULL
;
6109 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6114 if (ctxt
->input
->id
!= inputchk
) {
6115 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6116 "Element content declaration doesn't start and"
6117 " stop in the same entity\n");
6120 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6124 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6129 if ((RAW
== '(') || (RAW
== '|')) {
6130 ret
= cur
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6131 if (ret
== NULL
) return(NULL
);
6133 while ((RAW
== '|') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6136 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6138 xmlFreeDocElementContent(ctxt
->myDoc
, cur
);
6146 n
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6148 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6151 n
->c1
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6160 elem
= xmlParseName(ctxt
);
6162 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6163 "xmlParseElementMixedContentDecl : Name expected\n");
6164 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6170 if ((RAW
== ')') && (NXT(1) == '*')) {
6172 cur
->c2
= xmlNewDocElementContent(ctxt
->myDoc
, elem
,
6173 XML_ELEMENT_CONTENT_ELEMENT
);
6174 if (cur
->c2
!= NULL
)
6175 cur
->c2
->parent
= cur
;
6178 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6179 if (ctxt
->input
->id
!= inputchk
) {
6180 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6181 "Element content declaration doesn't start and"
6182 " stop in the same entity\n");
6186 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6187 xmlFatalErr(ctxt
, XML_ERR_MIXED_NOT_STARTED
, NULL
);
6192 xmlFatalErr(ctxt
, XML_ERR_PCDATA_REQUIRED
, NULL
);
6198 * xmlParseElementChildrenContentDeclPriv:
6199 * @ctxt: an XML parser context
6200 * @inputchk: the input used for the current entity, needed for boundary checks
6201 * @depth: the level of recursion
6203 * parse the declaration for a Mixed Element content
6204 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6207 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6209 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6211 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6213 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6215 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6216 * TODO Parameter-entity replacement text must be properly nested
6217 * with parenthesized groups. That is to say, if either of the
6218 * opening or closing parentheses in a choice, seq, or Mixed
6219 * construct is contained in the replacement text for a parameter
6220 * entity, both must be contained in the same replacement text. For
6221 * interoperability, if a parameter-entity reference appears in a
6222 * choice, seq, or Mixed construct, its replacement text should not
6223 * be empty, and neither the first nor last non-blank character of
6224 * the replacement text should be a connector (| or ,).
6226 * Returns the tree of xmlElementContentPtr describing the element
6229 static xmlElementContentPtr
6230 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt
, int inputchk
,
6232 xmlElementContentPtr ret
= NULL
, cur
= NULL
, last
= NULL
, op
= NULL
;
6233 const xmlChar
*elem
;
6236 if (((depth
> 128) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
6238 xmlFatalErrMsgInt(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
,
6239 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6246 int inputid
= ctxt
->input
->id
;
6248 /* Recurse on first child */
6251 cur
= ret
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6258 elem
= xmlParseName(ctxt
);
6260 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6263 cur
= ret
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6265 xmlErrMemory(ctxt
, NULL
);
6270 cur
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6272 } else if (RAW
== '*') {
6273 cur
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6275 } else if (RAW
== '+') {
6276 cur
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6279 cur
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6285 while ((RAW
!= ')') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6287 * Each loop we parse one separator and one element.
6290 if (type
== 0) type
= CUR
;
6293 * Detect "Name | Name , Name" error
6295 else if (type
!= CUR
) {
6296 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6297 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6299 if ((last
!= NULL
) && (last
!= ret
))
6300 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6302 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6307 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_SEQ
);
6309 if ((last
!= NULL
) && (last
!= ret
))
6310 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6311 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6329 } else if (RAW
== '|') {
6330 if (type
== 0) type
= CUR
;
6333 * Detect "Name , Name | Name" error
6335 else if (type
!= CUR
) {
6336 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6337 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6339 if ((last
!= NULL
) && (last
!= ret
))
6340 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6342 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6347 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6349 if ((last
!= NULL
) && (last
!= ret
))
6350 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6352 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6371 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
, NULL
);
6372 if ((last
!= NULL
) && (last
!= ret
))
6373 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6375 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6382 int inputid
= ctxt
->input
->id
;
6383 /* Recurse on second child */
6386 last
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6390 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6395 elem
= xmlParseName(ctxt
);
6397 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6399 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6402 last
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6405 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6409 last
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6411 } else if (RAW
== '*') {
6412 last
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6414 } else if (RAW
== '+') {
6415 last
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6418 last
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6424 if ((cur
!= NULL
) && (last
!= NULL
)) {
6429 if (ctxt
->input
->id
!= inputchk
) {
6430 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6431 "Element content declaration doesn't start and stop in"
6432 " the same entity\n");
6437 if ((ret
->ocur
== XML_ELEMENT_CONTENT_PLUS
) ||
6438 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6439 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6441 ret
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6444 } else if (RAW
== '*') {
6446 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6449 * Some normalization:
6450 * (a | b* | c?)* == (a | b | c)*
6452 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6453 if ((cur
->c1
!= NULL
) &&
6454 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6455 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6456 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6457 if ((cur
->c2
!= NULL
) &&
6458 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6459 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6460 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6465 } else if (RAW
== '+') {
6469 if ((ret
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6470 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6471 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6473 ret
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6475 * Some normalization:
6476 * (a | b*)+ == (a | b)*
6477 * (a | b?)+ == (a | b)*
6479 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6480 if ((cur
->c1
!= NULL
) &&
6481 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6482 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6483 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6486 if ((cur
->c2
!= NULL
) &&
6487 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6488 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6489 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6495 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6503 * xmlParseElementChildrenContentDecl:
6504 * @ctxt: an XML parser context
6505 * @inputchk: the input used for the current entity, needed for boundary checks
6507 * parse the declaration for a Mixed Element content
6508 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6510 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6512 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6514 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6516 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6518 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6519 * TODO Parameter-entity replacement text must be properly nested
6520 * with parenthesized groups. That is to say, if either of the
6521 * opening or closing parentheses in a choice, seq, or Mixed
6522 * construct is contained in the replacement text for a parameter
6523 * entity, both must be contained in the same replacement text. For
6524 * interoperability, if a parameter-entity reference appears in a
6525 * choice, seq, or Mixed construct, its replacement text should not
6526 * be empty, and neither the first nor last non-blank character of
6527 * the replacement text should be a connector (| or ,).
6529 * Returns the tree of xmlElementContentPtr describing the element
6532 xmlElementContentPtr
6533 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6534 /* stub left for API/ABI compat */
6535 return(xmlParseElementChildrenContentDeclPriv(ctxt
, inputchk
, 1));
6539 * xmlParseElementContentDecl:
6540 * @ctxt: an XML parser context
6541 * @name: the name of the element being defined.
6542 * @result: the Element Content pointer will be stored here if any
6544 * parse the declaration for an Element content either Mixed or Children,
6545 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6547 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6549 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6553 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt
, const xmlChar
*name
,
6554 xmlElementContentPtr
*result
) {
6556 xmlElementContentPtr tree
= NULL
;
6557 int inputid
= ctxt
->input
->id
;
6563 xmlFatalErrMsgStr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6564 "xmlParseElementContentDecl : %s '(' expected\n", name
);
6569 if (ctxt
->instate
== XML_PARSER_EOF
)
6572 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6573 tree
= xmlParseElementMixedContentDecl(ctxt
, inputid
);
6574 res
= XML_ELEMENT_TYPE_MIXED
;
6576 tree
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
, 1);
6577 res
= XML_ELEMENT_TYPE_ELEMENT
;
6585 * xmlParseElementDecl:
6586 * @ctxt: an XML parser context
6588 * parse an Element declaration.
6590 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6592 * [ VC: Unique Element Type Declaration ]
6593 * No element type may be declared more than once
6595 * Returns the type of the element, or -1 in case of error
6598 xmlParseElementDecl(xmlParserCtxtPtr ctxt
) {
6599 const xmlChar
*name
;
6601 xmlElementContentPtr content
= NULL
;
6603 /* GROW; done in the caller */
6604 if (CMP9(CUR_PTR
, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6605 int inputid
= ctxt
->input
->id
;
6608 if (SKIP_BLANKS
== 0) {
6609 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6610 "Space required after 'ELEMENT'\n");
6613 name
= xmlParseName(ctxt
);
6615 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6616 "xmlParseElementDecl: no name for Element\n");
6619 if (SKIP_BLANKS
== 0) {
6620 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6621 "Space required after the element name\n");
6623 if (CMP5(CUR_PTR
, 'E', 'M', 'P', 'T', 'Y')) {
6626 * Element must always be empty.
6628 ret
= XML_ELEMENT_TYPE_EMPTY
;
6629 } else if ((RAW
== 'A') && (NXT(1) == 'N') &&
6633 * Element is a generic container.
6635 ret
= XML_ELEMENT_TYPE_ANY
;
6636 } else if (RAW
== '(') {
6637 ret
= xmlParseElementContentDecl(ctxt
, name
, &content
);
6640 * [ WFC: PEs in Internal Subset ] error handling.
6642 if ((RAW
== '%') && (ctxt
->external
== 0) &&
6643 (ctxt
->inputNr
== 1)) {
6644 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_IN_INT_SUBSET
,
6645 "PEReference: forbidden within markup decl in internal subset\n");
6647 xmlFatalErrMsg(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6648 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6656 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
6657 if (content
!= NULL
) {
6658 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6661 if (inputid
!= ctxt
->input
->id
) {
6662 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6663 "Element declaration doesn't start and stop in"
6664 " the same entity\n");
6668 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6669 (ctxt
->sax
->elementDecl
!= NULL
)) {
6670 if (content
!= NULL
)
6671 content
->parent
= NULL
;
6672 ctxt
->sax
->elementDecl(ctxt
->userData
, name
, ret
,
6674 if ((content
!= NULL
) && (content
->parent
== NULL
)) {
6676 * this is a trick: if xmlAddElementDecl is called,
6677 * instead of copying the full tree it is plugged directly
6678 * if called from the parser. Avoid duplicating the
6679 * interfaces or change the API/ABI
6681 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6683 } else if (content
!= NULL
) {
6684 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6692 * xmlParseConditionalSections
6693 * @ctxt: an XML parser context
6695 * [61] conditionalSect ::= includeSect | ignoreSect
6696 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6697 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6698 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6699 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6703 xmlParseConditionalSections(xmlParserCtxtPtr ctxt
) {
6704 int *inputIds
= NULL
;
6705 size_t inputIdsSize
= 0;
6708 while (ctxt
->instate
!= XML_PARSER_EOF
) {
6709 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6710 int id
= ctxt
->input
->id
;
6715 if (CMP7(CUR_PTR
, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6719 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6720 xmlHaltParser(ctxt
);
6723 if (ctxt
->input
->id
!= id
) {
6724 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6725 "All markup of the conditional section is"
6726 " not in the same entity\n");
6730 if (inputIdsSize
<= depth
) {
6733 inputIdsSize
= (inputIdsSize
== 0 ? 4 : inputIdsSize
* 2);
6734 tmp
= (int *) xmlRealloc(inputIds
,
6735 inputIdsSize
* sizeof(int));
6737 xmlErrMemory(ctxt
, NULL
);
6742 inputIds
[depth
] = id
;
6744 } else if (CMP6(CUR_PTR
, 'I', 'G', 'N', 'O', 'R', 'E')) {
6746 xmlParserInputState instate
;
6747 size_t ignoreDepth
= 0;
6752 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6753 xmlHaltParser(ctxt
);
6756 if (ctxt
->input
->id
!= id
) {
6757 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6758 "All markup of the conditional section is"
6759 " not in the same entity\n");
6764 * Parse up to the end of the conditional section but disable
6765 * SAX event generating DTD building in the meantime
6767 state
= ctxt
->disableSAX
;
6768 instate
= ctxt
->instate
;
6769 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6770 ctxt
->instate
= XML_PARSER_IGNORE
;
6773 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6776 /* Check for integer overflow */
6777 if (ignoreDepth
== 0) {
6778 xmlErrMemory(ctxt
, NULL
);
6781 } else if ((RAW
== ']') && (NXT(1) == ']') &&
6783 if (ignoreDepth
== 0)
6792 ctxt
->disableSAX
= state
;
6793 ctxt
->instate
= instate
;
6796 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_NOT_FINISHED
, NULL
);
6799 if (ctxt
->input
->id
!= id
) {
6800 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6801 "All markup of the conditional section is"
6802 " not in the same entity\n");
6806 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID_KEYWORD
, NULL
);
6807 xmlHaltParser(ctxt
);
6810 } else if ((depth
> 0) &&
6811 (RAW
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6813 if (ctxt
->input
->id
!= inputIds
[depth
]) {
6814 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6815 "All markup of the conditional section is not"
6816 " in the same entity\n");
6820 int id
= ctxt
->input
->id
;
6821 unsigned long cons
= CUR_CONSUMED
;
6823 xmlParseMarkupDecl(ctxt
);
6825 if ((id
== ctxt
->input
->id
) && (cons
== CUR_CONSUMED
)) {
6826 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6827 xmlHaltParser(ctxt
);
6844 * xmlParseMarkupDecl:
6845 * @ctxt: an XML parser context
6847 * parse Markup declarations
6849 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6850 * NotationDecl | PI | Comment
6852 * [ VC: Proper Declaration/PE Nesting ]
6853 * Parameter-entity replacement text must be properly nested with
6854 * markup declarations. That is to say, if either the first character
6855 * or the last character of a markup declaration (markupdecl above) is
6856 * contained in the replacement text for a parameter-entity reference,
6857 * both must be contained in the same replacement text.
6859 * [ WFC: PEs in Internal Subset ]
6860 * In the internal DTD subset, parameter-entity references can occur
6861 * only where markup declarations can occur, not within markup declarations.
6862 * (This does not apply to references that occur in external parameter
6863 * entities or to the external subset.)
6866 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt
) {
6869 if (NXT(1) == '!') {
6873 xmlParseElementDecl(ctxt
);
6874 else if (NXT(3) == 'N')
6875 xmlParseEntityDecl(ctxt
);
6878 xmlParseAttributeListDecl(ctxt
);
6881 xmlParseNotationDecl(ctxt
);
6884 xmlParseComment(ctxt
);
6887 /* there is an error but it will be detected later */
6890 } else if (NXT(1) == '?') {
6896 * detect requirement to exit there and act accordingly
6897 * and avoid having instate overridden later on
6899 if (ctxt
->instate
== XML_PARSER_EOF
)
6902 ctxt
->instate
= XML_PARSER_DTD
;
6907 * @ctxt: an XML parser context
6909 * parse an XML declaration header for external entities
6911 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6915 xmlParseTextDecl(xmlParserCtxtPtr ctxt
) {
6917 const xmlChar
*encoding
;
6921 * We know that '<?xml' is here.
6923 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6926 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_STARTED
, NULL
);
6930 /* Avoid expansion of parameter entities when skipping blanks. */
6931 oldstate
= ctxt
->instate
;
6932 ctxt
->instate
= XML_PARSER_START
;
6934 if (SKIP_BLANKS
== 0) {
6935 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6936 "Space needed after '<?xml'\n");
6940 * We may have the VersionInfo here.
6942 version
= xmlParseVersionInfo(ctxt
);
6943 if (version
== NULL
)
6944 version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
6946 if (SKIP_BLANKS
== 0) {
6947 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6948 "Space needed here\n");
6951 ctxt
->input
->version
= version
;
6954 * We must have the encoding declaration
6956 encoding
= xmlParseEncodingDecl(ctxt
);
6957 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
6959 * The XML REC instructs us to stop parsing right here
6961 ctxt
->instate
= oldstate
;
6964 if ((encoding
== NULL
) && (ctxt
->errNo
== XML_ERR_OK
)) {
6965 xmlFatalErrMsg(ctxt
, XML_ERR_MISSING_ENCODING
,
6966 "Missing encoding in text declaration\n");
6970 if ((RAW
== '?') && (NXT(1) == '>')) {
6972 } else if (RAW
== '>') {
6973 /* Deprecated old WD ... */
6974 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6977 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6978 MOVETO_ENDTAG(CUR_PTR
);
6982 ctxt
->instate
= oldstate
;
6986 * xmlParseExternalSubset:
6987 * @ctxt: an XML parser context
6988 * @ExternalID: the external identifier
6989 * @SystemID: the system identifier (or URL)
6991 * parse Markup declarations from an external subset
6993 * [30] extSubset ::= textDecl? extSubsetDecl
6995 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6998 xmlParseExternalSubset(xmlParserCtxtPtr ctxt
, const xmlChar
*ExternalID
,
6999 const xmlChar
*SystemID
) {
7000 xmlDetectSAX2(ctxt
);
7003 if ((ctxt
->encoding
== NULL
) &&
7004 (ctxt
->input
->end
- ctxt
->input
->cur
>= 4)) {
7006 xmlCharEncoding enc
;
7012 enc
= xmlDetectCharEncoding(start
, 4);
7013 if (enc
!= XML_CHAR_ENCODING_NONE
)
7014 xmlSwitchEncoding(ctxt
, enc
);
7017 if (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) {
7018 xmlParseTextDecl(ctxt
);
7019 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7021 * The XML REC instructs us to stop parsing right here
7023 xmlHaltParser(ctxt
);
7027 if (ctxt
->myDoc
== NULL
) {
7028 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
7029 if (ctxt
->myDoc
== NULL
) {
7030 xmlErrMemory(ctxt
, "New Doc failed");
7033 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
7035 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->myDoc
->intSubset
== NULL
))
7036 xmlCreateIntSubset(ctxt
->myDoc
, NULL
, ExternalID
, SystemID
);
7038 ctxt
->instate
= XML_PARSER_DTD
;
7041 while (((RAW
== '<') && (NXT(1) == '?')) ||
7042 ((RAW
== '<') && (NXT(1) == '!')) ||
7044 int id
= ctxt
->input
->id
;
7045 unsigned long cons
= CUR_CONSUMED
;
7048 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7049 xmlParseConditionalSections(ctxt
);
7051 xmlParseMarkupDecl(ctxt
);
7054 if ((id
== ctxt
->input
->id
) && (cons
== CUR_CONSUMED
)) {
7055 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7061 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7067 * xmlParseReference:
7068 * @ctxt: an XML parser context
7070 * parse and handle entity references in content, depending on the SAX
7071 * interface, this may end-up in a call to character() if this is a
7072 * CharRef, a predefined entity, if there is no reference() callback.
7073 * or if the parser was asked to switch to that mode.
7075 * [67] Reference ::= EntityRef | CharRef
7078 xmlParseReference(xmlParserCtxtPtr ctxt
) {
7082 xmlNodePtr list
= NULL
;
7083 xmlParserErrors ret
= XML_ERR_OK
;
7090 * Simple case of a CharRef
7092 if (NXT(1) == '#') {
7096 int value
= xmlParseCharRef(ctxt
);
7100 if (ctxt
->charset
!= XML_CHAR_ENCODING_UTF8
) {
7102 * So we are using non-UTF-8 buffers
7103 * Check that the char fit on 8bits, if not
7104 * generate a CharRef.
7106 if (value
<= 0xFF) {
7109 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7110 (!ctxt
->disableSAX
))
7111 ctxt
->sax
->characters(ctxt
->userData
, out
, 1);
7113 if ((hex
== 'x') || (hex
== 'X'))
7114 snprintf((char *)out
, sizeof(out
), "#x%X", value
);
7116 snprintf((char *)out
, sizeof(out
), "#%d", value
);
7117 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7118 (!ctxt
->disableSAX
))
7119 ctxt
->sax
->reference(ctxt
->userData
, out
);
7123 * Just encode the value in UTF-8
7125 COPY_BUF(0 ,out
, i
, value
);
7127 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7128 (!ctxt
->disableSAX
))
7129 ctxt
->sax
->characters(ctxt
->userData
, out
, i
);
7135 * We are seeing an entity reference
7137 ent
= xmlParseEntityRef(ctxt
);
7138 if (ent
== NULL
) return;
7139 if (!ctxt
->wellFormed
)
7141 was_checked
= ent
->checked
;
7143 /* special case of predefined entities */
7144 if ((ent
->name
== NULL
) ||
7145 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
7147 if (val
== NULL
) return;
7149 * inline the entity.
7151 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7152 (!ctxt
->disableSAX
))
7153 ctxt
->sax
->characters(ctxt
->userData
, val
, xmlStrlen(val
));
7158 * The first reference to the entity trigger a parsing phase
7159 * where the ent->children is filled with the result from
7161 * Note: external parsed entities will not be loaded, it is not
7162 * required for a non-validating parser, unless the parsing option
7163 * of validating, or substituting entities were given. Doing so is
7164 * far more secure as the parser will only process data coming from
7165 * the document entity by default.
7167 if (((ent
->checked
== 0) ||
7168 ((ent
->children
== NULL
) && (ctxt
->options
& XML_PARSE_NOENT
))) &&
7169 ((ent
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
) ||
7170 (ctxt
->options
& (XML_PARSE_NOENT
| XML_PARSE_DTDVALID
)))) {
7171 unsigned long oldnbent
= ctxt
->nbentities
, diff
;
7174 * This is a bit hackish but this seems the best
7175 * way to make sure both SAX and DOM entity support
7179 if (ctxt
->userData
== ctxt
)
7182 user_data
= ctxt
->userData
;
7185 * Check that this entity is well formed
7186 * 4.3.2: An internal general parsed entity is well-formed
7187 * if its replacement text matches the production labeled
7190 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7192 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
, ent
->content
,
7196 } else if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7198 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
, ctxt
->sax
,
7199 user_data
, ctxt
->depth
, ent
->URI
,
7200 ent
->ExternalID
, &list
);
7203 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7204 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7205 "invalid entity type found\n", NULL
);
7209 * Store the number of entities needing parsing for this entity
7210 * content and do checkings
7212 diff
= ctxt
->nbentities
- oldnbent
+ 1;
7213 if (diff
> INT_MAX
/ 2)
7215 ent
->checked
= diff
* 2;
7216 if ((ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<')))
7218 if (ret
== XML_ERR_ENTITY_LOOP
) {
7219 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7220 xmlHaltParser(ctxt
);
7221 xmlFreeNodeList(list
);
7224 if (xmlParserEntityCheck(ctxt
, 0, ent
, 0)) {
7225 xmlFreeNodeList(list
);
7229 if ((ret
== XML_ERR_OK
) && (list
!= NULL
)) {
7230 if (((ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) ||
7231 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
))&&
7232 (ent
->children
== NULL
)) {
7233 ent
->children
= list
;
7235 * Prune it directly in the generated document
7236 * except for single text nodes.
7238 if ((ctxt
->replaceEntities
== 0) ||
7239 (ctxt
->parseMode
== XML_PARSE_READER
) ||
7240 ((list
->type
== XML_TEXT_NODE
) &&
7241 (list
->next
== NULL
))) {
7243 while (list
!= NULL
) {
7244 list
->parent
= (xmlNodePtr
) ent
;
7245 if (list
->doc
!= ent
->doc
)
7246 xmlSetTreeDoc(list
, ent
->doc
);
7247 if (list
->next
== NULL
)
7254 while (list
!= NULL
) {
7255 list
->parent
= (xmlNodePtr
) ctxt
->node
;
7256 list
->doc
= ctxt
->myDoc
;
7257 if (list
->next
== NULL
)
7261 list
= ent
->children
;
7262 #ifdef LIBXML_LEGACY_ENABLED
7263 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7264 xmlAddEntityReference(ent
, list
, NULL
);
7265 #endif /* LIBXML_LEGACY_ENABLED */
7268 xmlFreeNodeList(list
);
7271 } else if ((ret
!= XML_ERR_OK
) &&
7272 (ret
!= XML_WAR_UNDECLARED_ENTITY
)) {
7273 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7274 "Entity '%s' failed to parse\n", ent
->name
);
7275 if (ent
->content
!= NULL
)
7276 ent
->content
[0] = 0;
7277 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
7278 } else if (list
!= NULL
) {
7279 xmlFreeNodeList(list
);
7282 if (ent
->checked
== 0)
7285 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7287 } else if (ent
->checked
!= 1) {
7288 ctxt
->nbentities
+= ent
->checked
/ 2;
7292 * Now that the entity content has been gathered
7293 * provide it to the application, this can take different forms based
7294 * on the parsing modes.
7296 if (ent
->children
== NULL
) {
7298 * Probably running in SAX mode and the callbacks don't
7299 * build the entity content. So unless we already went
7300 * though parsing for first checking go though the entity
7301 * content to generate callbacks associated to the entity
7303 if (was_checked
!= 0) {
7306 * This is a bit hackish but this seems the best
7307 * way to make sure both SAX and DOM entity support
7310 if (ctxt
->userData
== ctxt
)
7313 user_data
= ctxt
->userData
;
7315 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7317 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
,
7318 ent
->content
, user_data
, NULL
);
7320 } else if (ent
->etype
==
7321 XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7323 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
,
7324 ctxt
->sax
, user_data
, ctxt
->depth
,
7325 ent
->URI
, ent
->ExternalID
, NULL
);
7328 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7329 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7330 "invalid entity type found\n", NULL
);
7332 if (ret
== XML_ERR_ENTITY_LOOP
) {
7333 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7337 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7338 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7340 * Entity reference callback comes second, it's somewhat
7341 * superfluous but a compatibility to historical behaviour
7343 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7349 * If we didn't get any children for the entity being built
7351 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7352 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7356 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7360 if ((ctxt
->replaceEntities
) || (ent
->children
== NULL
)) {
7362 * There is a problem on the handling of _private for entities
7363 * (bug 155816): Should we copy the content of the field from
7364 * the entity (possibly overwriting some value set by the user
7365 * when a copy is created), should we leave it alone, or should
7366 * we try to take care of different situations? The problem
7367 * is exacerbated by the usage of this field by the xmlReader.
7368 * To fix this bug, we look at _private on the created node
7369 * and, if it's NULL, we copy in whatever was in the entity.
7370 * If it's not NULL we leave it alone. This is somewhat of a
7371 * hack - maybe we should have further tests to determine
7374 if ((ctxt
->node
!= NULL
) && (ent
->children
!= NULL
)) {
7376 * Seems we are generating the DOM content, do
7377 * a simple tree copy for all references except the first
7378 * In the first occurrence list contains the replacement.
7380 if (((list
== NULL
) && (ent
->owner
== 0)) ||
7381 (ctxt
->parseMode
== XML_PARSE_READER
)) {
7382 xmlNodePtr nw
= NULL
, cur
, firstChild
= NULL
;
7385 * We are copying here, make sure there is no abuse
7387 ctxt
->sizeentcopy
+= ent
->length
+ 5;
7388 if (xmlParserEntityCheck(ctxt
, 0, ent
, ctxt
->sizeentcopy
))
7392 * when operating on a reader, the entities definitions
7393 * are always owning the entities subtree.
7394 if (ctxt->parseMode == XML_PARSE_READER)
7398 cur
= ent
->children
;
7399 while (cur
!= NULL
) {
7400 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7402 if (nw
->_private
== NULL
)
7403 nw
->_private
= cur
->_private
;
7404 if (firstChild
== NULL
){
7407 nw
= xmlAddChild(ctxt
->node
, nw
);
7409 if (cur
== ent
->last
) {
7411 * needed to detect some strange empty
7412 * node cases in the reader tests
7414 if ((ctxt
->parseMode
== XML_PARSE_READER
) &&
7416 (nw
->type
== XML_ELEMENT_NODE
) &&
7417 (nw
->children
== NULL
))
7424 #ifdef LIBXML_LEGACY_ENABLED
7425 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7426 xmlAddEntityReference(ent
, firstChild
, nw
);
7427 #endif /* LIBXML_LEGACY_ENABLED */
7428 } else if ((list
== NULL
) || (ctxt
->inputNr
> 0)) {
7429 xmlNodePtr nw
= NULL
, cur
, next
, last
,
7433 * We are copying here, make sure there is no abuse
7435 ctxt
->sizeentcopy
+= ent
->length
+ 5;
7436 if (xmlParserEntityCheck(ctxt
, 0, ent
, ctxt
->sizeentcopy
))
7440 * Copy the entity child list and make it the new
7441 * entity child list. The goal is to make sure any
7442 * ID or REF referenced will be the one from the
7443 * document content and not the entity copy.
7445 cur
= ent
->children
;
7446 ent
->children
= NULL
;
7449 while (cur
!= NULL
) {
7453 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7455 if (nw
->_private
== NULL
)
7456 nw
->_private
= cur
->_private
;
7457 if (firstChild
== NULL
){
7460 xmlAddChild((xmlNodePtr
) ent
, nw
);
7461 xmlAddChild(ctxt
->node
, cur
);
7467 if (ent
->owner
== 0)
7469 #ifdef LIBXML_LEGACY_ENABLED
7470 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7471 xmlAddEntityReference(ent
, firstChild
, nw
);
7472 #endif /* LIBXML_LEGACY_ENABLED */
7474 const xmlChar
*nbktext
;
7477 * the name change is to avoid coalescing of the
7478 * node with a possible previous text one which
7479 * would make ent->children a dangling pointer
7481 nbktext
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"nbktext",
7483 if (ent
->children
->type
== XML_TEXT_NODE
)
7484 ent
->children
->name
= nbktext
;
7485 if ((ent
->last
!= ent
->children
) &&
7486 (ent
->last
->type
== XML_TEXT_NODE
))
7487 ent
->last
->name
= nbktext
;
7488 xmlAddChildList(ctxt
->node
, ent
->children
);
7492 * This is to avoid a nasty side effect, see
7493 * characters() in SAX.c
7503 * xmlParseEntityRef:
7504 * @ctxt: an XML parser context
7506 * parse ENTITY references declarations
7508 * [68] EntityRef ::= '&' Name ';'
7510 * [ WFC: Entity Declared ]
7511 * In a document without any DTD, a document with only an internal DTD
7512 * subset which contains no parameter entity references, or a document
7513 * with "standalone='yes'", the Name given in the entity reference
7514 * must match that in an entity declaration, except that well-formed
7515 * documents need not declare any of the following entities: amp, lt,
7516 * gt, apos, quot. The declaration of a parameter entity must precede
7517 * any reference to it. Similarly, the declaration of a general entity
7518 * must precede any reference to it which appears in a default value in an
7519 * attribute-list declaration. Note that if entities are declared in the
7520 * external subset or in external parameter entities, a non-validating
7521 * processor is not obligated to read and process their declarations;
7522 * for such documents, the rule that an entity must be declared is a
7523 * well-formedness constraint only if standalone='yes'.
7525 * [ WFC: Parsed Entity ]
7526 * An entity reference must not contain the name of an unparsed entity
7528 * Returns the xmlEntityPtr if found, or NULL otherwise.
7531 xmlParseEntityRef(xmlParserCtxtPtr ctxt
) {
7532 const xmlChar
*name
;
7533 xmlEntityPtr ent
= NULL
;
7536 if (ctxt
->instate
== XML_PARSER_EOF
)
7542 name
= xmlParseName(ctxt
);
7544 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7545 "xmlParseEntityRef: no name\n");
7549 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7555 * Predefined entities override any extra definition
7557 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7558 ent
= xmlGetPredefinedEntity(name
);
7564 * Increase the number of entity references parsed
7569 * Ask first SAX for entity resolution, otherwise try the
7570 * entities which may have stored in the parser context.
7572 if (ctxt
->sax
!= NULL
) {
7573 if (ctxt
->sax
->getEntity
!= NULL
)
7574 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7575 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7576 (ctxt
->options
& XML_PARSE_OLDSAX
))
7577 ent
= xmlGetPredefinedEntity(name
);
7578 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7579 (ctxt
->userData
==ctxt
)) {
7580 ent
= xmlSAX2GetEntity(ctxt
, name
);
7583 if (ctxt
->instate
== XML_PARSER_EOF
)
7586 * [ WFC: Entity Declared ]
7587 * In a document without any DTD, a document with only an
7588 * internal DTD subset which contains no parameter entity
7589 * references, or a document with "standalone='yes'", the
7590 * Name given in the entity reference must match that in an
7591 * entity declaration, except that well-formed documents
7592 * need not declare any of the following entities: amp, lt,
7594 * The declaration of a parameter entity must precede any
7596 * Similarly, the declaration of a general entity must
7597 * precede any reference to it which appears in a default
7598 * value in an attribute-list declaration. Note that if
7599 * entities are declared in the external subset or in
7600 * external parameter entities, a non-validating processor
7601 * is not obligated to read and process their declarations;
7602 * for such documents, the rule that an entity must be
7603 * declared is a well-formedness constraint only if
7607 if ((ctxt
->standalone
== 1) ||
7608 ((ctxt
->hasExternalSubset
== 0) &&
7609 (ctxt
->hasPErefs
== 0))) {
7610 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7611 "Entity '%s' not defined\n", name
);
7613 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7614 "Entity '%s' not defined\n", name
);
7615 if ((ctxt
->inSubset
== 0) &&
7616 (ctxt
->sax
!= NULL
) &&
7617 (ctxt
->sax
->reference
!= NULL
)) {
7618 ctxt
->sax
->reference(ctxt
->userData
, name
);
7621 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
7626 * [ WFC: Parsed Entity ]
7627 * An entity reference must not contain the name of an
7630 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7631 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7632 "Entity reference to unparsed entity %s\n", name
);
7636 * [ WFC: No External Entity References ]
7637 * Attribute values cannot contain direct or indirect
7638 * entity references to external entities.
7640 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7641 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7642 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7643 "Attribute references external entity '%s'\n", name
);
7646 * [ WFC: No < in Attribute Values ]
7647 * The replacement text of any entity referred to directly or
7648 * indirectly in an attribute value (other than "<") must
7651 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7653 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
)) {
7654 if (((ent
->checked
& 1) || (ent
->checked
== 0)) &&
7655 (ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<'))) {
7656 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7657 "'<' in entity '%s' is not allowed in attributes values\n", name
);
7662 * Internal check, no parameter entities here ...
7665 switch (ent
->etype
) {
7666 case XML_INTERNAL_PARAMETER_ENTITY
:
7667 case XML_EXTERNAL_PARAMETER_ENTITY
:
7668 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7669 "Attempt to reference the parameter entity '%s'\n",
7678 * [ WFC: No Recursion ]
7679 * A parsed entity must not contain a recursive reference
7680 * to itself, either directly or indirectly.
7681 * Done somewhere else
7687 * xmlParseStringEntityRef:
7688 * @ctxt: an XML parser context
7689 * @str: a pointer to an index in the string
7691 * parse ENTITY references declarations, but this version parses it from
7694 * [68] EntityRef ::= '&' Name ';'
7696 * [ WFC: Entity Declared ]
7697 * In a document without any DTD, a document with only an internal DTD
7698 * subset which contains no parameter entity references, or a document
7699 * with "standalone='yes'", the Name given in the entity reference
7700 * must match that in an entity declaration, except that well-formed
7701 * documents need not declare any of the following entities: amp, lt,
7702 * gt, apos, quot. The declaration of a parameter entity must precede
7703 * any reference to it. Similarly, the declaration of a general entity
7704 * must precede any reference to it which appears in a default value in an
7705 * attribute-list declaration. Note that if entities are declared in the
7706 * external subset or in external parameter entities, a non-validating
7707 * processor is not obligated to read and process their declarations;
7708 * for such documents, the rule that an entity must be declared is a
7709 * well-formedness constraint only if standalone='yes'.
7711 * [ WFC: Parsed Entity ]
7712 * An entity reference must not contain the name of an unparsed entity
7714 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7715 * is updated to the current location in the string.
7718 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
7722 xmlEntityPtr ent
= NULL
;
7724 if ((str
== NULL
) || (*str
== NULL
))
7732 name
= xmlParseStringName(ctxt
, &ptr
);
7734 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7735 "xmlParseStringEntityRef: no name\n");
7740 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7749 * Predefined entities override any extra definition
7751 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7752 ent
= xmlGetPredefinedEntity(name
);
7761 * Increase the number of entity references parsed
7766 * Ask first SAX for entity resolution, otherwise try the
7767 * entities which may have stored in the parser context.
7769 if (ctxt
->sax
!= NULL
) {
7770 if (ctxt
->sax
->getEntity
!= NULL
)
7771 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7772 if ((ent
== NULL
) && (ctxt
->options
& XML_PARSE_OLDSAX
))
7773 ent
= xmlGetPredefinedEntity(name
);
7774 if ((ent
== NULL
) && (ctxt
->userData
==ctxt
)) {
7775 ent
= xmlSAX2GetEntity(ctxt
, name
);
7778 if (ctxt
->instate
== XML_PARSER_EOF
) {
7784 * [ WFC: Entity Declared ]
7785 * In a document without any DTD, a document with only an
7786 * internal DTD subset which contains no parameter entity
7787 * references, or a document with "standalone='yes'", the
7788 * Name given in the entity reference must match that in an
7789 * entity declaration, except that well-formed documents
7790 * need not declare any of the following entities: amp, lt,
7792 * The declaration of a parameter entity must precede any
7794 * Similarly, the declaration of a general entity must
7795 * precede any reference to it which appears in a default
7796 * value in an attribute-list declaration. Note that if
7797 * entities are declared in the external subset or in
7798 * external parameter entities, a non-validating processor
7799 * is not obligated to read and process their declarations;
7800 * for such documents, the rule that an entity must be
7801 * declared is a well-formedness constraint only if
7805 if ((ctxt
->standalone
== 1) ||
7806 ((ctxt
->hasExternalSubset
== 0) &&
7807 (ctxt
->hasPErefs
== 0))) {
7808 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7809 "Entity '%s' not defined\n", name
);
7811 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7812 "Entity '%s' not defined\n",
7815 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
7816 /* TODO ? check regressions ctxt->valid = 0; */
7820 * [ WFC: Parsed Entity ]
7821 * An entity reference must not contain the name of an
7824 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7825 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7826 "Entity reference to unparsed entity %s\n", name
);
7830 * [ WFC: No External Entity References ]
7831 * Attribute values cannot contain direct or indirect
7832 * entity references to external entities.
7834 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7835 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7836 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7837 "Attribute references external entity '%s'\n", name
);
7840 * [ WFC: No < in Attribute Values ]
7841 * The replacement text of any entity referred to directly or
7842 * indirectly in an attribute value (other than "<") must
7845 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7846 (ent
!= NULL
) && (ent
->content
!= NULL
) &&
7847 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
7848 (xmlStrchr(ent
->content
, '<'))) {
7849 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7850 "'<' in entity '%s' is not allowed in attributes values\n",
7855 * Internal check, no parameter entities here ...
7858 switch (ent
->etype
) {
7859 case XML_INTERNAL_PARAMETER_ENTITY
:
7860 case XML_EXTERNAL_PARAMETER_ENTITY
:
7861 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7862 "Attempt to reference the parameter entity '%s'\n",
7871 * [ WFC: No Recursion ]
7872 * A parsed entity must not contain a recursive reference
7873 * to itself, either directly or indirectly.
7874 * Done somewhere else
7883 * xmlParsePEReference:
7884 * @ctxt: an XML parser context
7886 * parse PEReference declarations
7887 * The entity content is handled directly by pushing it's content as
7888 * a new input stream.
7890 * [69] PEReference ::= '%' Name ';'
7892 * [ WFC: No Recursion ]
7893 * A parsed entity must not contain a recursive
7894 * reference to itself, either directly or indirectly.
7896 * [ WFC: Entity Declared ]
7897 * In a document without any DTD, a document with only an internal DTD
7898 * subset which contains no parameter entity references, or a document
7899 * with "standalone='yes'", ... ... The declaration of a parameter
7900 * entity must precede any reference to it...
7902 * [ VC: Entity Declared ]
7903 * In a document with an external subset or external parameter entities
7904 * with "standalone='no'", ... ... The declaration of a parameter entity
7905 * must precede any reference to it...
7908 * Parameter-entity references may only appear in the DTD.
7909 * NOTE: misleading but this is handled.
7912 xmlParsePEReference(xmlParserCtxtPtr ctxt
)
7914 const xmlChar
*name
;
7915 xmlEntityPtr entity
= NULL
;
7916 xmlParserInputPtr input
;
7921 name
= xmlParseName(ctxt
);
7923 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_NO_NAME
, "PEReference: no name\n");
7926 if (xmlParserDebugEntities
)
7927 xmlGenericError(xmlGenericErrorContext
,
7928 "PEReference: %s\n", name
);
7930 xmlFatalErr(ctxt
, XML_ERR_PEREF_SEMICOL_MISSING
, NULL
);
7937 * Increase the number of entity references parsed
7942 * Request the entity from SAX
7944 if ((ctxt
->sax
!= NULL
) &&
7945 (ctxt
->sax
->getParameterEntity
!= NULL
))
7946 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
7947 if (ctxt
->instate
== XML_PARSER_EOF
)
7949 if (entity
== NULL
) {
7951 * [ WFC: Entity Declared ]
7952 * In a document without any DTD, a document with only an
7953 * internal DTD subset which contains no parameter entity
7954 * references, or a document with "standalone='yes'", ...
7955 * ... The declaration of a parameter entity must precede
7956 * any reference to it...
7958 if ((ctxt
->standalone
== 1) ||
7959 ((ctxt
->hasExternalSubset
== 0) &&
7960 (ctxt
->hasPErefs
== 0))) {
7961 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7962 "PEReference: %%%s; not found\n",
7966 * [ VC: Entity Declared ]
7967 * In a document with an external subset or external
7968 * parameter entities with "standalone='no'", ...
7969 * ... The declaration of a parameter entity must
7970 * precede any reference to it...
7972 if ((ctxt
->validate
) && (ctxt
->vctxt
.error
!= NULL
)) {
7973 xmlValidityError(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7974 "PEReference: %%%s; not found\n",
7977 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7978 "PEReference: %%%s; not found\n",
7982 xmlParserEntityCheck(ctxt
, 0, NULL
, 0);
7985 * Internal checking in case the entity quest barfed
7987 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
7988 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
7989 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7990 "Internal: %%%s; is not a parameter entity\n",
7994 xmlCharEncoding enc
;
7996 if (xmlParserEntityCheck(ctxt
, 0, entity
, 0))
7999 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
8000 ((ctxt
->options
& XML_PARSE_NOENT
) == 0) &&
8001 ((ctxt
->options
& XML_PARSE_DTDVALID
) == 0) &&
8002 ((ctxt
->options
& XML_PARSE_DTDLOAD
) == 0) &&
8003 ((ctxt
->options
& XML_PARSE_DTDATTR
) == 0) &&
8004 (ctxt
->replaceEntities
== 0) &&
8005 (ctxt
->validate
== 0))
8008 input
= xmlNewEntityInputStream(ctxt
, entity
);
8009 if (xmlPushInput(ctxt
, input
) < 0) {
8010 xmlFreeInputStream(input
);
8014 if (entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) {
8016 * Get the 4 first bytes and decode the charset
8017 * if enc != XML_CHAR_ENCODING_NONE
8018 * plug some encoding conversion routines.
8019 * Note that, since we may have some non-UTF8
8020 * encoding (like UTF16, bug 135229), the 'length'
8021 * is not known, but we can calculate based upon
8022 * the amount of data in the buffer.
8025 if (ctxt
->instate
== XML_PARSER_EOF
)
8027 if ((ctxt
->input
->end
- ctxt
->input
->cur
)>=4) {
8032 enc
= xmlDetectCharEncoding(start
, 4);
8033 if (enc
!= XML_CHAR_ENCODING_NONE
) {
8034 xmlSwitchEncoding(ctxt
, enc
);
8038 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) &&
8039 (IS_BLANK_CH(NXT(5)))) {
8040 xmlParseTextDecl(ctxt
);
8045 ctxt
->hasPErefs
= 1;
8049 * xmlLoadEntityContent:
8050 * @ctxt: an XML parser context
8051 * @entity: an unloaded system entity
8053 * Load the original content of the given system entity from the
8054 * ExternalID/SystemID given. This is to be used for Included in Literal
8055 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8057 * Returns 0 in case of success and -1 in case of failure
8060 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
8061 xmlParserInputPtr input
;
8066 if ((ctxt
== NULL
) || (entity
== NULL
) ||
8067 ((entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
) &&
8068 (entity
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) ||
8069 (entity
->content
!= NULL
)) {
8070 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8071 "xmlLoadEntityContent parameter error");
8075 if (xmlParserDebugEntities
)
8076 xmlGenericError(xmlGenericErrorContext
,
8077 "Reading %s entity content input\n", entity
->name
);
8079 buf
= xmlBufferCreate();
8081 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8082 "xmlLoadEntityContent parameter error");
8085 xmlBufferSetAllocationScheme(buf
, XML_BUFFER_ALLOC_DOUBLEIT
);
8087 input
= xmlNewEntityInputStream(ctxt
, entity
);
8088 if (input
== NULL
) {
8089 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8090 "xmlLoadEntityContent input error");
8096 * Push the entity as the current input, read char by char
8097 * saving to the buffer until the end of the entity or an error
8099 if (xmlPushInput(ctxt
, input
) < 0) {
8101 xmlFreeInputStream(input
);
8107 while ((ctxt
->input
== input
) && (ctxt
->input
->cur
< ctxt
->input
->end
) &&
8109 xmlBufferAdd(buf
, ctxt
->input
->cur
, l
);
8110 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
8113 if (ctxt
->instate
== XML_PARSER_EOF
) {
8123 if (ctxt
->instate
== XML_PARSER_EOF
) {
8131 if ((ctxt
->input
== input
) && (ctxt
->input
->cur
>= ctxt
->input
->end
)) {
8133 } else if (!IS_CHAR(c
)) {
8134 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
8135 "xmlLoadEntityContent: invalid char value %d\n",
8140 entity
->content
= buf
->content
;
8141 buf
->content
= NULL
;
8148 * xmlParseStringPEReference:
8149 * @ctxt: an XML parser context
8150 * @str: a pointer to an index in the string
8152 * parse PEReference declarations
8154 * [69] PEReference ::= '%' Name ';'
8156 * [ WFC: No Recursion ]
8157 * A parsed entity must not contain a recursive
8158 * reference to itself, either directly or indirectly.
8160 * [ WFC: Entity Declared ]
8161 * In a document without any DTD, a document with only an internal DTD
8162 * subset which contains no parameter entity references, or a document
8163 * with "standalone='yes'", ... ... The declaration of a parameter
8164 * entity must precede any reference to it...
8166 * [ VC: Entity Declared ]
8167 * In a document with an external subset or external parameter entities
8168 * with "standalone='no'", ... ... The declaration of a parameter entity
8169 * must precede any reference to it...
8172 * Parameter-entity references may only appear in the DTD.
8173 * NOTE: misleading but this is handled.
8175 * Returns the string of the entity content.
8176 * str is updated to the current value of the index
8179 xmlParseStringPEReference(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
8183 xmlEntityPtr entity
= NULL
;
8185 if ((str
== NULL
) || (*str
== NULL
)) return(NULL
);
8191 name
= xmlParseStringName(ctxt
, &ptr
);
8193 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8194 "xmlParseStringPEReference: no name\n");
8200 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
8208 * Increase the number of entity references parsed
8213 * Request the entity from SAX
8215 if ((ctxt
->sax
!= NULL
) &&
8216 (ctxt
->sax
->getParameterEntity
!= NULL
))
8217 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
8218 if (ctxt
->instate
== XML_PARSER_EOF
) {
8223 if (entity
== NULL
) {
8225 * [ WFC: Entity Declared ]
8226 * In a document without any DTD, a document with only an
8227 * internal DTD subset which contains no parameter entity
8228 * references, or a document with "standalone='yes'", ...
8229 * ... The declaration of a parameter entity must precede
8230 * any reference to it...
8232 if ((ctxt
->standalone
== 1) ||
8233 ((ctxt
->hasExternalSubset
== 0) && (ctxt
->hasPErefs
== 0))) {
8234 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
8235 "PEReference: %%%s; not found\n", name
);
8238 * [ VC: Entity Declared ]
8239 * In a document with an external subset or external
8240 * parameter entities with "standalone='no'", ...
8241 * ... The declaration of a parameter entity must
8242 * precede any reference to it...
8244 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8245 "PEReference: %%%s; not found\n",
8249 xmlParserEntityCheck(ctxt
, 0, NULL
, 0);
8252 * Internal checking in case the entity quest barfed
8254 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
8255 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
8256 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8257 "%%%s; is not a parameter entity\n",
8261 ctxt
->hasPErefs
= 1;
8268 * xmlParseDocTypeDecl:
8269 * @ctxt: an XML parser context
8271 * parse a DOCTYPE declaration
8273 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8274 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8276 * [ VC: Root Element Type ]
8277 * The Name in the document type declaration must match the element
8278 * type of the root element.
8282 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt
) {
8283 const xmlChar
*name
= NULL
;
8284 xmlChar
*ExternalID
= NULL
;
8285 xmlChar
*URI
= NULL
;
8288 * We know that '<!DOCTYPE' has been detected.
8295 * Parse the DOCTYPE name.
8297 name
= xmlParseName(ctxt
);
8299 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8300 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8302 ctxt
->intSubName
= name
;
8307 * Check for SystemID and ExternalID
8309 URI
= xmlParseExternalID(ctxt
, &ExternalID
, 1);
8311 if ((URI
!= NULL
) || (ExternalID
!= NULL
)) {
8312 ctxt
->hasExternalSubset
= 1;
8314 ctxt
->extSubURI
= URI
;
8315 ctxt
->extSubSystem
= ExternalID
;
8320 * Create and update the internal subset.
8322 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->internalSubset
!= NULL
) &&
8323 (!ctxt
->disableSAX
))
8324 ctxt
->sax
->internalSubset(ctxt
->userData
, name
, ExternalID
, URI
);
8325 if (ctxt
->instate
== XML_PARSER_EOF
)
8329 * Is there any internal subset declarations ?
8330 * they are handled separately in xmlParseInternalSubset()
8336 * We should be at the end of the DOCTYPE declaration.
8339 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8345 * xmlParseInternalSubset:
8346 * @ctxt: an XML parser context
8348 * parse the internal subset declaration
8350 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8354 xmlParseInternalSubset(xmlParserCtxtPtr ctxt
) {
8356 * Is there any DTD definition ?
8359 int baseInputNr
= ctxt
->inputNr
;
8360 ctxt
->instate
= XML_PARSER_DTD
;
8363 * Parse the succession of Markup declarations and
8365 * Subsequence (markupdecl | PEReference | S)*
8367 while (((RAW
!= ']') || (ctxt
->inputNr
> baseInputNr
)) &&
8368 (ctxt
->instate
!= XML_PARSER_EOF
)) {
8369 int id
= ctxt
->input
->id
;
8370 unsigned long cons
= CUR_CONSUMED
;
8373 xmlParseMarkupDecl(ctxt
);
8374 xmlParsePEReference(ctxt
);
8377 * Conditional sections are allowed from external entities included
8378 * by PE References in the internal subset.
8380 if ((ctxt
->inputNr
> 1) && (ctxt
->input
->filename
!= NULL
) &&
8381 (RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8382 xmlParseConditionalSections(ctxt
);
8385 if ((id
== ctxt
->input
->id
) && (cons
== CUR_CONSUMED
)) {
8386 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8387 "xmlParseInternalSubset: error detected in Markup declaration\n");
8388 if (ctxt
->inputNr
> baseInputNr
)
8401 * We should be at the end of the DOCTYPE declaration.
8404 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8410 #ifdef LIBXML_SAX1_ENABLED
8412 * xmlParseAttribute:
8413 * @ctxt: an XML parser context
8414 * @value: a xmlChar ** used to store the value of the attribute
8416 * parse an attribute
8418 * [41] Attribute ::= Name Eq AttValue
8420 * [ WFC: No External Entity References ]
8421 * Attribute values cannot contain direct or indirect entity references
8422 * to external entities.
8424 * [ WFC: No < in Attribute Values ]
8425 * The replacement text of any entity referred to directly or indirectly in
8426 * an attribute value (other than "<") must not contain a <.
8428 * [ VC: Attribute Value Type ]
8429 * The attribute must have been declared; the value must be of the type
8432 * [25] Eq ::= S? '=' S?
8436 * [NS 11] Attribute ::= QName Eq AttValue
8438 * Also the case QName == xmlns:??? is handled independently as a namespace
8441 * Returns the attribute name, and the value in *value.
8445 xmlParseAttribute(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
8446 const xmlChar
*name
;
8451 name
= xmlParseName(ctxt
);
8453 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8454 "error parsing attribute name\n");
8465 val
= xmlParseAttValue(ctxt
);
8466 ctxt
->instate
= XML_PARSER_CONTENT
;
8468 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
8469 "Specification mandates value for attribute %s\n", name
);
8474 * Check that xml:lang conforms to the specification
8475 * No more registered as an error, just generate a warning now
8476 * since this was deprecated in XML second edition
8478 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"xml:lang"))) {
8479 if (!xmlCheckLanguageID(val
)) {
8480 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
8481 "Malformed value for xml:lang : %s\n",
8487 * Check that xml:space conforms to the specification
8489 if (xmlStrEqual(name
, BAD_CAST
"xml:space")) {
8490 if (xmlStrEqual(val
, BAD_CAST
"default"))
8492 else if (xmlStrEqual(val
, BAD_CAST
"preserve"))
8495 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
8496 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8507 * @ctxt: an XML parser context
8509 * parse a start of tag either for rule element or
8510 * EmptyElement. In both case we don't parse the tag closing chars.
8512 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8514 * [ WFC: Unique Att Spec ]
8515 * No attribute name may appear more than once in the same start-tag or
8516 * empty-element tag.
8518 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8520 * [ WFC: Unique Att Spec ]
8521 * No attribute name may appear more than once in the same start-tag or
8522 * empty-element tag.
8526 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8528 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8530 * Returns the element name parsed
8534 xmlParseStartTag(xmlParserCtxtPtr ctxt
) {
8535 const xmlChar
*name
;
8536 const xmlChar
*attname
;
8538 const xmlChar
**atts
= ctxt
->atts
;
8540 int maxatts
= ctxt
->maxatts
;
8543 if (RAW
!= '<') return(NULL
);
8546 name
= xmlParseName(ctxt
);
8548 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8549 "xmlParseStartTag: invalid element name\n");
8554 * Now parse the attributes, it ends up with the ending
8561 while (((RAW
!= '>') &&
8562 ((RAW
!= '/') || (NXT(1) != '>')) &&
8563 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
8564 int id
= ctxt
->input
->id
;
8565 unsigned long cons
= CUR_CONSUMED
;
8567 attname
= xmlParseAttribute(ctxt
, &attvalue
);
8568 if ((attname
!= NULL
) && (attvalue
!= NULL
)) {
8570 * [ WFC: Unique Att Spec ]
8571 * No attribute name may appear more than once in the same
8572 * start-tag or empty-element tag.
8574 for (i
= 0; i
< nbatts
;i
+= 2) {
8575 if (xmlStrEqual(atts
[i
], attname
)) {
8576 xmlErrAttributeDup(ctxt
, NULL
, attname
);
8582 * Add the pair to atts
8585 maxatts
= 22; /* allow for 10 attrs by default */
8586 atts
= (const xmlChar
**)
8587 xmlMalloc(maxatts
* sizeof(xmlChar
*));
8589 xmlErrMemory(ctxt
, NULL
);
8590 if (attvalue
!= NULL
)
8595 ctxt
->maxatts
= maxatts
;
8596 } else if (nbatts
+ 4 > maxatts
) {
8600 n
= (const xmlChar
**) xmlRealloc((void *) atts
,
8601 maxatts
* sizeof(const xmlChar
*));
8603 xmlErrMemory(ctxt
, NULL
);
8604 if (attvalue
!= NULL
)
8610 ctxt
->maxatts
= maxatts
;
8612 atts
[nbatts
++] = attname
;
8613 atts
[nbatts
++] = attvalue
;
8614 atts
[nbatts
] = NULL
;
8615 atts
[nbatts
+ 1] = NULL
;
8617 if (attvalue
!= NULL
)
8624 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
8626 if (SKIP_BLANKS
== 0) {
8627 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
8628 "attributes construct error\n");
8630 if ((cons
== CUR_CONSUMED
) && (id
== ctxt
->input
->id
) &&
8631 (attname
== NULL
) && (attvalue
== NULL
)) {
8632 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
8633 "xmlParseStartTag: problem parsing attributes\n");
8641 * SAX: Start of Element !
8643 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElement
!= NULL
) &&
8644 (!ctxt
->disableSAX
)) {
8646 ctxt
->sax
->startElement(ctxt
->userData
, name
, atts
);
8648 ctxt
->sax
->startElement(ctxt
->userData
, name
, NULL
);
8652 /* Free only the content strings */
8653 for (i
= 1;i
< nbatts
;i
+=2)
8654 if (atts
[i
] != NULL
)
8655 xmlFree((xmlChar
*) atts
[i
]);
8662 * @ctxt: an XML parser context
8663 * @line: line of the start tag
8664 * @nsNr: number of namespaces on the start tag
8666 * parse an end of tag
8668 * [42] ETag ::= '</' Name S? '>'
8672 * [NS 9] ETag ::= '</' QName S? '>'
8676 xmlParseEndTag1(xmlParserCtxtPtr ctxt
, int line
) {
8677 const xmlChar
*name
;
8680 if ((RAW
!= '<') || (NXT(1) != '/')) {
8681 xmlFatalErrMsg(ctxt
, XML_ERR_LTSLASH_REQUIRED
,
8682 "xmlParseEndTag: '</' not found\n");
8687 name
= xmlParseNameAndCompare(ctxt
,ctxt
->name
);
8690 * We should definitely be at the ending "S? '>'" part
8694 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
8695 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
8700 * [ WFC: Element Type Match ]
8701 * The Name in an element's end-tag must match the element type in the
8705 if (name
!= (xmlChar
*)1) {
8706 if (name
== NULL
) name
= BAD_CAST
"unparsable";
8707 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
8708 "Opening and ending tag mismatch: %s line %d and %s\n",
8709 ctxt
->name
, line
, name
);
8715 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
8716 (!ctxt
->disableSAX
))
8717 ctxt
->sax
->endElement(ctxt
->userData
, ctxt
->name
);
8726 * @ctxt: an XML parser context
8728 * parse an end of tag
8730 * [42] ETag ::= '</' Name S? '>'
8734 * [NS 9] ETag ::= '</' QName S? '>'
8738 xmlParseEndTag(xmlParserCtxtPtr ctxt
) {
8739 xmlParseEndTag1(ctxt
, 0);
8741 #endif /* LIBXML_SAX1_ENABLED */
8743 /************************************************************************
8745 * SAX 2 specific operations *
8747 ************************************************************************/
8751 * @ctxt: an XML parser context
8752 * @prefix: the prefix to lookup
8754 * Lookup the namespace name for the @prefix (which ca be NULL)
8755 * The prefix must come from the @ctxt->dict dictionary
8757 * Returns the namespace name or NULL if not bound
8759 static const xmlChar
*
8760 xmlGetNamespace(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
) {
8763 if (prefix
== ctxt
->str_xml
) return(ctxt
->str_xml_ns
);
8764 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-=2)
8765 if (ctxt
->nsTab
[i
] == prefix
) {
8766 if ((prefix
== NULL
) && (*ctxt
->nsTab
[i
+ 1] == 0))
8768 return(ctxt
->nsTab
[i
+ 1]);
8775 * @ctxt: an XML parser context
8776 * @prefix: pointer to store the prefix part
8778 * parse an XML Namespace QName
8780 * [6] QName ::= (Prefix ':')? LocalPart
8781 * [7] Prefix ::= NCName
8782 * [8] LocalPart ::= NCName
8784 * Returns the Name parsed or NULL
8787 static const xmlChar
*
8788 xmlParseQName(xmlParserCtxtPtr ctxt
, const xmlChar
**prefix
) {
8789 const xmlChar
*l
, *p
;
8793 l
= xmlParseNCName(ctxt
);
8796 l
= xmlParseName(ctxt
);
8798 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8799 "Failed to parse QName '%s'\n", l
, NULL
, NULL
);
8809 l
= xmlParseNCName(ctxt
);
8813 if (ctxt
->instate
== XML_PARSER_EOF
)
8815 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8816 "Failed to parse QName '%s:'\n", p
, NULL
, NULL
);
8817 l
= xmlParseNmtoken(ctxt
);
8819 if (ctxt
->instate
== XML_PARSER_EOF
)
8821 tmp
= xmlBuildQName(BAD_CAST
"", p
, NULL
, 0);
8823 tmp
= xmlBuildQName(l
, p
, NULL
, 0);
8826 p
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8827 if (tmp
!= NULL
) xmlFree(tmp
);
8834 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8835 "Failed to parse QName '%s:%s:'\n", p
, l
, NULL
);
8837 tmp
= (xmlChar
*) xmlParseName(ctxt
);
8839 tmp
= xmlBuildQName(tmp
, l
, NULL
, 0);
8840 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8841 if (tmp
!= NULL
) xmlFree(tmp
);
8845 if (ctxt
->instate
== XML_PARSER_EOF
)
8847 tmp
= xmlBuildQName(BAD_CAST
"", l
, NULL
, 0);
8848 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8849 if (tmp
!= NULL
) xmlFree(tmp
);
8860 * xmlParseQNameAndCompare:
8861 * @ctxt: an XML parser context
8862 * @name: the localname
8863 * @prefix: the prefix, if any.
8865 * parse an XML name and compares for match
8866 * (specialized for endtag parsing)
8868 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8869 * and the name for mismatch
8872 static const xmlChar
*
8873 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *name
,
8874 xmlChar
const *prefix
) {
8878 const xmlChar
*prefix2
;
8880 if (prefix
== NULL
) return(xmlParseNameAndCompare(ctxt
, name
));
8883 in
= ctxt
->input
->cur
;
8886 while (*in
!= 0 && *in
== *cmp
) {
8890 if ((*cmp
== 0) && (*in
== ':')) {
8893 while (*in
!= 0 && *in
== *cmp
) {
8897 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
8899 ctxt
->input
->col
+= in
- ctxt
->input
->cur
;
8900 ctxt
->input
->cur
= in
;
8901 return((const xmlChar
*) 1);
8905 * all strings coms from the dictionary, equality can be done directly
8907 ret
= xmlParseQName (ctxt
, &prefix2
);
8908 if ((ret
== name
) && (prefix
== prefix2
))
8909 return((const xmlChar
*) 1);
8914 * xmlParseAttValueInternal:
8915 * @ctxt: an XML parser context
8916 * @len: attribute len result
8917 * @alloc: whether the attribute was reallocated as a new string
8918 * @normalize: if 1 then further non-CDATA normalization must be done
8920 * parse a value for an attribute.
8921 * NOTE: if no normalization is needed, the routine will return pointers
8922 * directly from the data buffer.
8924 * 3.3.3 Attribute-Value Normalization:
8925 * Before the value of an attribute is passed to the application or
8926 * checked for validity, the XML processor must normalize it as follows:
8927 * - a character reference is processed by appending the referenced
8928 * character to the attribute value
8929 * - an entity reference is processed by recursively processing the
8930 * replacement text of the entity
8931 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8932 * appending #x20 to the normalized value, except that only a single
8933 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8934 * parsed entity or the literal entity value of an internal parsed entity
8935 * - other characters are processed by appending them to the normalized value
8936 * If the declared value is not CDATA, then the XML processor must further
8937 * process the normalized attribute value by discarding any leading and
8938 * trailing space (#x20) characters, and by replacing sequences of space
8939 * (#x20) characters by a single space (#x20) character.
8940 * All attributes for which no declaration has been read should be treated
8941 * by a non-validating parser as if declared CDATA.
8943 * Returns the AttValue parsed or NULL. The value has to be freed by the
8944 * caller if it was copied, this can be detected by val[*len] == 0.
8947 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8948 const xmlChar *oldbase = ctxt->input->base;\
8950 if (ctxt->instate == XML_PARSER_EOF)\
8952 if (oldbase != ctxt->input->base) {\
8953 ptrdiff_t delta = ctxt->input->base - oldbase;\
8954 start = start + delta;\
8957 end = ctxt->input->end;
8960 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
, int *len
, int *alloc
,
8964 const xmlChar
*in
= NULL
, *start
, *end
, *last
;
8965 xmlChar
*ret
= NULL
;
8967 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
8968 XML_MAX_HUGE_LENGTH
:
8969 XML_MAX_TEXT_LENGTH
;
8972 in
= (xmlChar
*) CUR_PTR
;
8973 line
= ctxt
->input
->line
;
8974 col
= ctxt
->input
->col
;
8975 if (*in
!= '"' && *in
!= '\'') {
8976 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
8979 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
8982 * try to handle in this routine the most common case where no
8983 * allocation of a new string is required and where content is
8988 end
= ctxt
->input
->end
;
8991 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
8995 * Skip any leading spaces
8997 while ((in
< end
) && (*in
!= limit
) &&
8998 ((*in
== 0x20) || (*in
== 0x9) ||
8999 (*in
== 0xA) || (*in
== 0xD))) {
9008 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
9009 if ((in
- start
) > maxLength
) {
9010 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9011 "AttValue length too long\n");
9016 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9017 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9019 if ((*in
++ == 0x20) && (*in
== 0x20)) break;
9021 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
9022 if ((in
- start
) > maxLength
) {
9023 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9024 "AttValue length too long\n");
9031 * skip the trailing blanks
9033 while ((last
[-1] == 0x20) && (last
> start
)) last
--;
9034 while ((in
< end
) && (*in
!= limit
) &&
9035 ((*in
== 0x20) || (*in
== 0x9) ||
9036 (*in
== 0xA) || (*in
== 0xD))) {
9044 const xmlChar
*oldbase
= ctxt
->input
->base
;
9046 if (ctxt
->instate
== XML_PARSER_EOF
)
9048 if (oldbase
!= ctxt
->input
->base
) {
9049 ptrdiff_t delta
= ctxt
->input
->base
- oldbase
;
9050 start
= start
+ delta
;
9052 last
= last
+ delta
;
9054 end
= ctxt
->input
->end
;
9055 if ((in
- start
) > maxLength
) {
9056 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9057 "AttValue length too long\n");
9062 if ((in
- start
) > maxLength
) {
9063 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9064 "AttValue length too long\n");
9067 if (*in
!= limit
) goto need_complex
;
9069 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9070 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9074 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
9075 if ((in
- start
) > maxLength
) {
9076 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9077 "AttValue length too long\n");
9083 if ((in
- start
) > maxLength
) {
9084 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9085 "AttValue length too long\n");
9088 if (*in
!= limit
) goto need_complex
;
9093 *len
= last
- start
;
9094 ret
= (xmlChar
*) start
;
9096 if (alloc
) *alloc
= 1;
9097 ret
= xmlStrndup(start
, last
- start
);
9100 ctxt
->input
->line
= line
;
9101 ctxt
->input
->col
= col
;
9102 if (alloc
) *alloc
= 0;
9105 if (alloc
) *alloc
= 1;
9106 return xmlParseAttValueComplex(ctxt
, len
, normalize
);
9110 * xmlParseAttribute2:
9111 * @ctxt: an XML parser context
9112 * @pref: the element prefix
9113 * @elem: the element name
9114 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9115 * @value: a xmlChar ** used to store the value of the attribute
9116 * @len: an int * to save the length of the attribute
9117 * @alloc: an int * to indicate if the attribute was allocated
9119 * parse an attribute in the new SAX2 framework.
9121 * Returns the attribute name, and the value in *value, .
9124 static const xmlChar
*
9125 xmlParseAttribute2(xmlParserCtxtPtr ctxt
,
9126 const xmlChar
* pref
, const xmlChar
* elem
,
9127 const xmlChar
** prefix
, xmlChar
** value
,
9128 int *len
, int *alloc
)
9130 const xmlChar
*name
;
9131 xmlChar
*val
, *internal_val
= NULL
;
9136 name
= xmlParseQName(ctxt
, prefix
);
9138 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9139 "error parsing attribute name\n");
9144 * get the type if needed
9146 if (ctxt
->attsSpecial
!= NULL
) {
9149 type
= (int) (ptrdiff_t) xmlHashQLookup2(ctxt
->attsSpecial
,
9150 pref
, elem
, *prefix
, name
);
9162 val
= xmlParseAttValueInternal(ctxt
, len
, alloc
, normalize
);
9165 * Sometimes a second normalisation pass for spaces is needed
9166 * but that only happens if charrefs or entities references
9167 * have been used in the attribute value, i.e. the attribute
9168 * value have been extracted in an allocated string already.
9171 const xmlChar
*val2
;
9173 val2
= xmlAttrNormalizeSpace2(ctxt
, val
, len
);
9174 if ((val2
!= NULL
) && (val2
!= val
)) {
9176 val
= (xmlChar
*) val2
;
9180 ctxt
->instate
= XML_PARSER_CONTENT
;
9182 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
9183 "Specification mandates value for attribute %s\n",
9188 if (*prefix
== ctxt
->str_xml
) {
9190 * Check that xml:lang conforms to the specification
9191 * No more registered as an error, just generate a warning now
9192 * since this was deprecated in XML second edition
9194 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"lang"))) {
9195 internal_val
= xmlStrndup(val
, *len
);
9196 if (!xmlCheckLanguageID(internal_val
)) {
9197 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
9198 "Malformed value for xml:lang : %s\n",
9199 internal_val
, NULL
);
9204 * Check that xml:space conforms to the specification
9206 if (xmlStrEqual(name
, BAD_CAST
"space")) {
9207 internal_val
= xmlStrndup(val
, *len
);
9208 if (xmlStrEqual(internal_val
, BAD_CAST
"default"))
9210 else if (xmlStrEqual(internal_val
, BAD_CAST
"preserve"))
9213 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
9214 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9215 internal_val
, NULL
);
9219 xmlFree(internal_val
);
9227 * xmlParseStartTag2:
9228 * @ctxt: an XML parser context
9230 * parse a start of tag either for rule element or
9231 * EmptyElement. In both case we don't parse the tag closing chars.
9232 * This routine is called when running SAX2 parsing
9234 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9236 * [ WFC: Unique Att Spec ]
9237 * No attribute name may appear more than once in the same start-tag or
9238 * empty-element tag.
9240 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9242 * [ WFC: Unique Att Spec ]
9243 * No attribute name may appear more than once in the same start-tag or
9244 * empty-element tag.
9248 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9250 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9252 * Returns the element name parsed
9255 static const xmlChar
*
9256 xmlParseStartTag2(xmlParserCtxtPtr ctxt
, const xmlChar
**pref
,
9257 const xmlChar
**URI
, int *tlen
) {
9258 const xmlChar
*localname
;
9259 const xmlChar
*prefix
;
9260 const xmlChar
*attname
;
9261 const xmlChar
*aprefix
;
9262 const xmlChar
*nsname
;
9264 const xmlChar
**atts
= ctxt
->atts
;
9265 int maxatts
= ctxt
->maxatts
;
9266 int nratts
, nbatts
, nbdef
, inputid
;
9267 int i
, j
, nbNs
, attval
;
9269 int nsNr
= ctxt
->nsNr
;
9271 if (RAW
!= '<') return(NULL
);
9275 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9276 * point since the attribute values may be stored as pointers to
9277 * the buffer and calling SHRINK would destroy them !
9278 * The Shrinking is only possible once the full set of attribute
9279 * callbacks have been done.
9282 cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
9283 inputid
= ctxt
->input
->id
;
9289 /* Forget any namespaces added during an earlier parse of this element. */
9292 localname
= xmlParseQName(ctxt
, &prefix
);
9293 if (localname
== NULL
) {
9294 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9295 "StartTag: invalid element name\n");
9298 *tlen
= ctxt
->input
->cur
- ctxt
->input
->base
- cur
;
9301 * Now parse the attributes, it ends up with the ending
9308 while (((RAW
!= '>') &&
9309 ((RAW
!= '/') || (NXT(1) != '>')) &&
9310 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
9311 int id
= ctxt
->input
->id
;
9312 unsigned long cons
= CUR_CONSUMED
;
9313 int len
= -1, alloc
= 0;
9315 attname
= xmlParseAttribute2(ctxt
, prefix
, localname
,
9316 &aprefix
, &attvalue
, &len
, &alloc
);
9317 if ((attname
== NULL
) || (attvalue
== NULL
))
9319 if (len
< 0) len
= xmlStrlen(attvalue
);
9321 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9322 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9326 xmlErrMemory(ctxt
, "dictionary allocation failure");
9327 if ((attvalue
!= NULL
) && (alloc
!= 0))
9333 uri
= xmlParseURI((const char *) URL
);
9335 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9336 "xmlns: '%s' is not a valid URI\n",
9339 if (uri
->scheme
== NULL
) {
9340 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9341 "xmlns: URI %s is not absolute\n",
9346 if (URL
== ctxt
->str_xml_ns
) {
9347 if (attname
!= ctxt
->str_xml
) {
9348 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9349 "xml namespace URI cannot be the default namespace\n",
9356 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9357 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9358 "reuse of the xmlns namespace name is forbidden\n",
9364 * check that it's not a defined namespace
9366 for (j
= 1;j
<= nbNs
;j
++)
9367 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9370 xmlErrAttributeDup(ctxt
, NULL
, attname
);
9372 if (nsPush(ctxt
, NULL
, URL
) > 0) nbNs
++;
9374 } else if (aprefix
== ctxt
->str_xmlns
) {
9375 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9378 if (attname
== ctxt
->str_xml
) {
9379 if (URL
!= ctxt
->str_xml_ns
) {
9380 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9381 "xml namespace prefix mapped to wrong URI\n",
9385 * Do not keep a namespace definition node
9389 if (URL
== ctxt
->str_xml_ns
) {
9390 if (attname
!= ctxt
->str_xml
) {
9391 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9392 "xml namespace URI mapped to wrong prefix\n",
9397 if (attname
== ctxt
->str_xmlns
) {
9398 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9399 "redefinition of the xmlns prefix is forbidden\n",
9405 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9406 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9407 "reuse of the xmlns namespace name is forbidden\n",
9411 if ((URL
== NULL
) || (URL
[0] == 0)) {
9412 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9413 "xmlns:%s: Empty XML namespace is not allowed\n",
9414 attname
, NULL
, NULL
);
9417 uri
= xmlParseURI((const char *) URL
);
9419 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9420 "xmlns:%s: '%s' is not a valid URI\n",
9421 attname
, URL
, NULL
);
9423 if ((ctxt
->pedantic
) && (uri
->scheme
== NULL
)) {
9424 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9425 "xmlns:%s: URI %s is not absolute\n",
9426 attname
, URL
, NULL
);
9433 * check that it's not a defined namespace
9435 for (j
= 1;j
<= nbNs
;j
++)
9436 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9439 xmlErrAttributeDup(ctxt
, aprefix
, attname
);
9441 if (nsPush(ctxt
, attname
, URL
) > 0) nbNs
++;
9445 * Add the pair to atts
9447 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9448 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9451 maxatts
= ctxt
->maxatts
;
9454 ctxt
->attallocs
[nratts
++] = alloc
;
9455 atts
[nbatts
++] = attname
;
9456 atts
[nbatts
++] = aprefix
;
9458 * The namespace URI field is used temporarily to point at the
9459 * base of the current input buffer for non-alloced attributes.
9460 * When the input buffer is reallocated, all the pointers become
9461 * invalid, but they can be reconstructed later.
9464 atts
[nbatts
++] = NULL
;
9466 atts
[nbatts
++] = ctxt
->input
->base
;
9467 atts
[nbatts
++] = attvalue
;
9469 atts
[nbatts
++] = attvalue
;
9471 * tag if some deallocation is needed
9473 if (alloc
!= 0) attval
= 1;
9474 attvalue
= NULL
; /* moved into atts */
9478 if ((attvalue
!= NULL
) && (alloc
!= 0)) {
9484 if (ctxt
->instate
== XML_PARSER_EOF
)
9486 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
9488 if (SKIP_BLANKS
== 0) {
9489 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
9490 "attributes construct error\n");
9493 if ((cons
== CUR_CONSUMED
) && (id
== ctxt
->input
->id
) &&
9494 (attname
== NULL
) && (attvalue
== NULL
)) {
9495 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9496 "xmlParseStartTag: problem parsing attributes\n");
9502 if (ctxt
->input
->id
!= inputid
) {
9503 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9504 "Unexpected change of input\n");
9509 /* Reconstruct attribute value pointers. */
9510 for (i
= 0, j
= 0; j
< nratts
; i
+= 5, j
++) {
9511 if (atts
[i
+2] != NULL
) {
9513 * Arithmetic on dangling pointers is technically undefined
9514 * behavior, but well...
9516 ptrdiff_t offset
= ctxt
->input
->base
- atts
[i
+2];
9517 atts
[i
+2] = NULL
; /* Reset repurposed namespace URI */
9518 atts
[i
+3] += offset
; /* value */
9519 atts
[i
+4] += offset
; /* valuend */
9524 * The attributes defaulting
9526 if (ctxt
->attsDefault
!= NULL
) {
9527 xmlDefAttrsPtr defaults
;
9529 defaults
= xmlHashLookup2(ctxt
->attsDefault
, localname
, prefix
);
9530 if (defaults
!= NULL
) {
9531 for (i
= 0;i
< defaults
->nbAttrs
;i
++) {
9532 attname
= defaults
->values
[5 * i
];
9533 aprefix
= defaults
->values
[5 * i
+ 1];
9536 * special work for namespaces defaulted defs
9538 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9540 * check that it's not a defined namespace
9542 for (j
= 1;j
<= nbNs
;j
++)
9543 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9545 if (j
<= nbNs
) continue;
9547 nsname
= xmlGetNamespace(ctxt
, NULL
);
9548 if (nsname
!= defaults
->values
[5 * i
+ 2]) {
9549 if (nsPush(ctxt
, NULL
,
9550 defaults
->values
[5 * i
+ 2]) > 0)
9553 } else if (aprefix
== ctxt
->str_xmlns
) {
9555 * check that it's not a defined namespace
9557 for (j
= 1;j
<= nbNs
;j
++)
9558 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9560 if (j
<= nbNs
) continue;
9562 nsname
= xmlGetNamespace(ctxt
, attname
);
9563 if (nsname
!= defaults
->values
[2]) {
9564 if (nsPush(ctxt
, attname
,
9565 defaults
->values
[5 * i
+ 2]) > 0)
9570 * check that it's not a defined attribute
9572 for (j
= 0;j
< nbatts
;j
+=5) {
9573 if ((attname
== atts
[j
]) && (aprefix
== atts
[j
+1]))
9576 if (j
< nbatts
) continue;
9578 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9579 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9583 maxatts
= ctxt
->maxatts
;
9586 atts
[nbatts
++] = attname
;
9587 atts
[nbatts
++] = aprefix
;
9588 if (aprefix
== NULL
)
9589 atts
[nbatts
++] = NULL
;
9591 atts
[nbatts
++] = xmlGetNamespace(ctxt
, aprefix
);
9592 atts
[nbatts
++] = defaults
->values
[5 * i
+ 2];
9593 atts
[nbatts
++] = defaults
->values
[5 * i
+ 3];
9594 if ((ctxt
->standalone
== 1) &&
9595 (defaults
->values
[5 * i
+ 4] != NULL
)) {
9596 xmlValidityError(ctxt
, XML_DTD_STANDALONE_DEFAULTED
,
9597 "standalone: attribute %s on %s defaulted from external subset\n",
9598 attname
, localname
);
9607 * The attributes checkings
9609 for (i
= 0; i
< nbatts
;i
+= 5) {
9611 * The default namespace does not apply to attribute names.
9613 if (atts
[i
+ 1] != NULL
) {
9614 nsname
= xmlGetNamespace(ctxt
, atts
[i
+ 1]);
9615 if (nsname
== NULL
) {
9616 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9617 "Namespace prefix %s for %s on %s is not defined\n",
9618 atts
[i
+ 1], atts
[i
], localname
);
9620 atts
[i
+ 2] = nsname
;
9624 * [ WFC: Unique Att Spec ]
9625 * No attribute name may appear more than once in the same
9626 * start-tag or empty-element tag.
9627 * As extended by the Namespace in XML REC.
9629 for (j
= 0; j
< i
;j
+= 5) {
9630 if (atts
[i
] == atts
[j
]) {
9631 if (atts
[i
+1] == atts
[j
+1]) {
9632 xmlErrAttributeDup(ctxt
, atts
[i
+1], atts
[i
]);
9635 if ((nsname
!= NULL
) && (atts
[j
+ 2] == nsname
)) {
9636 xmlNsErr(ctxt
, XML_NS_ERR_ATTRIBUTE_REDEFINED
,
9637 "Namespaced Attribute %s in '%s' redefined\n",
9638 atts
[i
], nsname
, NULL
);
9645 nsname
= xmlGetNamespace(ctxt
, prefix
);
9646 if ((prefix
!= NULL
) && (nsname
== NULL
)) {
9647 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9648 "Namespace prefix %s on %s is not defined\n",
9649 prefix
, localname
, NULL
);
9655 * SAX: Start of Element !
9657 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElementNs
!= NULL
) &&
9658 (!ctxt
->disableSAX
)) {
9660 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9661 nsname
, nbNs
, &ctxt
->nsTab
[ctxt
->nsNr
- 2 * nbNs
],
9662 nbatts
/ 5, nbdef
, atts
);
9664 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9665 nsname
, 0, NULL
, nbatts
/ 5, nbdef
, atts
);
9670 * Free up attribute allocated strings if needed
9673 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9674 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9675 xmlFree((xmlChar
*) atts
[i
]);
9683 * @ctxt: an XML parser context
9684 * @line: line of the start tag
9685 * @nsNr: number of namespaces on the start tag
9687 * parse an end of tag
9689 * [42] ETag ::= '</' Name S? '>'
9693 * [NS 9] ETag ::= '</' QName S? '>'
9697 xmlParseEndTag2(xmlParserCtxtPtr ctxt
, const xmlStartTag
*tag
) {
9698 const xmlChar
*name
;
9701 if ((RAW
!= '<') || (NXT(1) != '/')) {
9702 xmlFatalErr(ctxt
, XML_ERR_LTSLASH_REQUIRED
, NULL
);
9707 if (tag
->prefix
== NULL
)
9708 name
= xmlParseNameAndCompare(ctxt
, ctxt
->name
);
9710 name
= xmlParseQNameAndCompare(ctxt
, ctxt
->name
, tag
->prefix
);
9713 * We should definitely be at the ending "S? '>'" part
9716 if (ctxt
->instate
== XML_PARSER_EOF
)
9719 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
9720 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
9725 * [ WFC: Element Type Match ]
9726 * The Name in an element's end-tag must match the element type in the
9730 if (name
!= (xmlChar
*)1) {
9731 if (name
== NULL
) name
= BAD_CAST
"unparsable";
9732 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
9733 "Opening and ending tag mismatch: %s line %d and %s\n",
9734 ctxt
->name
, tag
->line
, name
);
9740 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
9741 (!ctxt
->disableSAX
))
9742 ctxt
->sax
->endElementNs(ctxt
->userData
, ctxt
->name
, tag
->prefix
,
9747 nsPop(ctxt
, tag
->nsNr
);
9752 * @ctxt: an XML parser context
9754 * Parse escaped pure raw content.
9756 * [18] CDSect ::= CDStart CData CDEnd
9758 * [19] CDStart ::= '<![CDATA['
9760 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9762 * [21] CDEnd ::= ']]>'
9765 xmlParseCDSect(xmlParserCtxtPtr ctxt
) {
9766 xmlChar
*buf
= NULL
;
9768 int size
= XML_PARSER_BUFFER_SIZE
;
9773 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
9774 XML_MAX_HUGE_LENGTH
:
9775 XML_MAX_TEXT_LENGTH
;
9777 /* Check 2.6.0 was NXT(0) not RAW */
9778 if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9783 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
9786 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9787 ctxt
->instate
= XML_PARSER_CONTENT
;
9793 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9794 ctxt
->instate
= XML_PARSER_CONTENT
;
9799 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
9801 xmlErrMemory(ctxt
, NULL
);
9804 while (IS_CHAR(cur
) &&
9805 ((r
!= ']') || (s
!= ']') || (cur
!= '>'))) {
9806 if (len
+ 5 >= size
) {
9809 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* 2 * sizeof(xmlChar
));
9812 xmlErrMemory(ctxt
, NULL
);
9818 COPY_BUF(rl
,buf
,len
,r
);
9827 if (ctxt
->instate
== XML_PARSER_EOF
) {
9835 if (len
> maxLength
) {
9836 xmlFatalErrMsg(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9837 "CData section too big found\n");
9843 ctxt
->instate
= XML_PARSER_CONTENT
;
9845 xmlFatalErrMsgStr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9846 "CData section not finished\n%.50s\n", buf
);
9853 * OK the buffer is to be consumed as cdata.
9855 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
9856 if (ctxt
->sax
->cdataBlock
!= NULL
)
9857 ctxt
->sax
->cdataBlock(ctxt
->userData
, buf
, len
);
9858 else if (ctxt
->sax
->characters
!= NULL
)
9859 ctxt
->sax
->characters(ctxt
->userData
, buf
, len
);
9865 * xmlParseContentInternal:
9866 * @ctxt: an XML parser context
9868 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9869 * unexpected EOF to the caller.
9873 xmlParseContentInternal(xmlParserCtxtPtr ctxt
) {
9874 int nameNr
= ctxt
->nameNr
;
9877 while ((RAW
!= 0) &&
9878 (ctxt
->instate
!= XML_PARSER_EOF
)) {
9879 int id
= ctxt
->input
->id
;
9880 unsigned long cons
= CUR_CONSUMED
;
9881 const xmlChar
*cur
= ctxt
->input
->cur
;
9884 * First case : a Processing Instruction.
9886 if ((*cur
== '<') && (cur
[1] == '?')) {
9891 * Second case : a CDSection
9893 /* 2.6.0 test was *cur not RAW */
9894 else if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9895 xmlParseCDSect(ctxt
);
9899 * Third case : a comment
9901 else if ((*cur
== '<') && (NXT(1) == '!') &&
9902 (NXT(2) == '-') && (NXT(3) == '-')) {
9903 xmlParseComment(ctxt
);
9904 ctxt
->instate
= XML_PARSER_CONTENT
;
9908 * Fourth case : a sub-element.
9910 else if (*cur
== '<') {
9911 if (NXT(1) == '/') {
9912 if (ctxt
->nameNr
<= nameNr
)
9914 xmlParseElementEnd(ctxt
);
9916 xmlParseElementStart(ctxt
);
9921 * Fifth case : a reference. If if has not been resolved,
9922 * parsing returns it's Name, create the node
9925 else if (*cur
== '&') {
9926 xmlParseReference(ctxt
);
9930 * Last case, text. Note that References are handled directly.
9933 xmlParseCharData(ctxt
, 0);
9939 if ((cons
== CUR_CONSUMED
) && (id
== ctxt
->input
->id
)) {
9940 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9941 "detected an error in element content\n");
9942 xmlHaltParser(ctxt
);
9950 * @ctxt: an XML parser context
9952 * Parse a content sequence. Stops at EOF or '</'.
9954 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9958 xmlParseContent(xmlParserCtxtPtr ctxt
) {
9959 int nameNr
= ctxt
->nameNr
;
9961 xmlParseContentInternal(ctxt
);
9963 if ((ctxt
->instate
!= XML_PARSER_EOF
) && (ctxt
->nameNr
> nameNr
)) {
9964 const xmlChar
*name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
9965 int line
= ctxt
->pushTab
[ctxt
->nameNr
- 1].line
;
9966 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9967 "Premature end of data in tag %s line %d\n",
9974 * @ctxt: an XML parser context
9976 * parse an XML element
9978 * [39] element ::= EmptyElemTag | STag content ETag
9980 * [ WFC: Element Type Match ]
9981 * The Name in an element's end-tag must match the element type in the
9987 xmlParseElement(xmlParserCtxtPtr ctxt
) {
9988 if (xmlParseElementStart(ctxt
) != 0)
9991 xmlParseContentInternal(ctxt
);
9992 if (ctxt
->instate
== XML_PARSER_EOF
)
9996 const xmlChar
*name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
9997 int line
= ctxt
->pushTab
[ctxt
->nameNr
- 1].line
;
9998 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9999 "Premature end of data in tag %s line %d\n",
10004 xmlParseElementEnd(ctxt
);
10008 * xmlParseElementStart:
10009 * @ctxt: an XML parser context
10011 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10012 * opening tag was parsed, 1 if an empty element was parsed.
10015 xmlParseElementStart(xmlParserCtxtPtr ctxt
) {
10016 const xmlChar
*name
;
10017 const xmlChar
*prefix
= NULL
;
10018 const xmlChar
*URI
= NULL
;
10019 xmlParserNodeInfo node_info
;
10020 int line
, tlen
= 0;
10022 int nsNr
= ctxt
->nsNr
;
10024 if (((unsigned int) ctxt
->nameNr
> xmlParserMaxDepth
) &&
10025 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
10026 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
10027 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10028 xmlParserMaxDepth
);
10029 xmlHaltParser(ctxt
);
10033 /* Capture start position */
10034 if (ctxt
->record_info
) {
10035 node_info
.begin_pos
= ctxt
->input
->consumed
+
10036 (CUR_PTR
- ctxt
->input
->base
);
10037 node_info
.begin_line
= ctxt
->input
->line
;
10040 if (ctxt
->spaceNr
== 0)
10041 spacePush(ctxt
, -1);
10042 else if (*ctxt
->space
== -2)
10043 spacePush(ctxt
, -1);
10045 spacePush(ctxt
, *ctxt
->space
);
10047 line
= ctxt
->input
->line
;
10048 #ifdef LIBXML_SAX1_ENABLED
10050 #endif /* LIBXML_SAX1_ENABLED */
10051 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
10052 #ifdef LIBXML_SAX1_ENABLED
10054 name
= xmlParseStartTag(ctxt
);
10055 #endif /* LIBXML_SAX1_ENABLED */
10056 if (ctxt
->instate
== XML_PARSER_EOF
)
10058 if (name
== NULL
) {
10062 nameNsPush(ctxt
, name
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
);
10065 #ifdef LIBXML_VALID_ENABLED
10067 * [ VC: Root Element Type ]
10068 * The Name in the document type declaration must match the element
10069 * type of the root element.
10071 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
10072 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
10073 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
10074 #endif /* LIBXML_VALID_ENABLED */
10077 * Check for an Empty Element.
10079 if ((RAW
== '/') && (NXT(1) == '>')) {
10082 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
10083 (!ctxt
->disableSAX
))
10084 ctxt
->sax
->endElementNs(ctxt
->userData
, name
, prefix
, URI
);
10085 #ifdef LIBXML_SAX1_ENABLED
10087 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
10088 (!ctxt
->disableSAX
))
10089 ctxt
->sax
->endElement(ctxt
->userData
, name
);
10090 #endif /* LIBXML_SAX1_ENABLED */
10094 if (nsNr
!= ctxt
->nsNr
)
10095 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10096 if ( ret
!= NULL
&& ctxt
->record_info
) {
10097 node_info
.end_pos
= ctxt
->input
->consumed
+
10098 (CUR_PTR
- ctxt
->input
->base
);
10099 node_info
.end_line
= ctxt
->input
->line
;
10100 node_info
.node
= ret
;
10101 xmlParserAddNodeInfo(ctxt
, &node_info
);
10108 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_GT_REQUIRED
,
10109 "Couldn't find end of Start Tag %s line %d\n",
10113 * end of parsing of this node.
10118 if (nsNr
!= ctxt
->nsNr
)
10119 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10122 * Capture end position and add node
10124 if ( ret
!= NULL
&& ctxt
->record_info
) {
10125 node_info
.end_pos
= ctxt
->input
->consumed
+
10126 (CUR_PTR
- ctxt
->input
->base
);
10127 node_info
.end_line
= ctxt
->input
->line
;
10128 node_info
.node
= ret
;
10129 xmlParserAddNodeInfo(ctxt
, &node_info
);
10138 * xmlParseElementEnd:
10139 * @ctxt: an XML parser context
10141 * Parse the end of an XML element.
10144 xmlParseElementEnd(xmlParserCtxtPtr ctxt
) {
10145 xmlParserNodeInfo node_info
;
10146 xmlNodePtr ret
= ctxt
->node
;
10148 if (ctxt
->nameNr
<= 0)
10152 * parse the end of tag: '</' should be here.
10155 xmlParseEndTag2(ctxt
, &ctxt
->pushTab
[ctxt
->nameNr
- 1]);
10158 #ifdef LIBXML_SAX1_ENABLED
10160 xmlParseEndTag1(ctxt
, 0);
10161 #endif /* LIBXML_SAX1_ENABLED */
10164 * Capture end position and add node
10166 if ( ret
!= NULL
&& ctxt
->record_info
) {
10167 node_info
.end_pos
= ctxt
->input
->consumed
+
10168 (CUR_PTR
- ctxt
->input
->base
);
10169 node_info
.end_line
= ctxt
->input
->line
;
10170 node_info
.node
= ret
;
10171 xmlParserAddNodeInfo(ctxt
, &node_info
);
10176 * xmlParseVersionNum:
10177 * @ctxt: an XML parser context
10179 * parse the XML version value.
10181 * [26] VersionNum ::= '1.' [0-9]+
10183 * In practice allow [0-9].[0-9]+ at that level
10185 * Returns the string giving the XML version number, or NULL
10188 xmlParseVersionNum(xmlParserCtxtPtr ctxt
) {
10189 xmlChar
*buf
= NULL
;
10194 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
10196 xmlErrMemory(ctxt
, NULL
);
10200 if (!((cur
>= '0') && (cur
<= '9'))) {
10214 while ((cur
>= '0') && (cur
<= '9')) {
10215 if (len
+ 1 >= size
) {
10219 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
10222 xmlErrMemory(ctxt
, NULL
);
10236 * xmlParseVersionInfo:
10237 * @ctxt: an XML parser context
10239 * parse the XML version.
10241 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10243 * [25] Eq ::= S? '=' S?
10245 * Returns the version string, e.g. "1.0"
10249 xmlParseVersionInfo(xmlParserCtxtPtr ctxt
) {
10250 xmlChar
*version
= NULL
;
10252 if (CMP7(CUR_PTR
, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10256 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10263 version
= xmlParseVersionNum(ctxt
);
10265 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10268 } else if (RAW
== '\''){
10270 version
= xmlParseVersionNum(ctxt
);
10272 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10276 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10284 * @ctxt: an XML parser context
10286 * parse the XML encoding name
10288 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10290 * Returns the encoding name value or NULL
10293 xmlParseEncName(xmlParserCtxtPtr ctxt
) {
10294 xmlChar
*buf
= NULL
;
10300 if (((cur
>= 'a') && (cur
<= 'z')) ||
10301 ((cur
>= 'A') && (cur
<= 'Z'))) {
10302 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
10304 xmlErrMemory(ctxt
, NULL
);
10311 while (((cur
>= 'a') && (cur
<= 'z')) ||
10312 ((cur
>= 'A') && (cur
<= 'Z')) ||
10313 ((cur
>= '0') && (cur
<= '9')) ||
10314 (cur
== '.') || (cur
== '_') ||
10316 if (len
+ 1 >= size
) {
10320 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
10322 xmlErrMemory(ctxt
, NULL
);
10339 xmlFatalErr(ctxt
, XML_ERR_ENCODING_NAME
, NULL
);
10345 * xmlParseEncodingDecl:
10346 * @ctxt: an XML parser context
10348 * parse the XML encoding declaration
10350 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10352 * this setups the conversion filters.
10354 * Returns the encoding value or NULL
10358 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt
) {
10359 xmlChar
*encoding
= NULL
;
10362 if (CMP8(CUR_PTR
, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10366 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10373 encoding
= xmlParseEncName(ctxt
);
10375 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10376 xmlFree((xmlChar
*) encoding
);
10380 } else if (RAW
== '\''){
10382 encoding
= xmlParseEncName(ctxt
);
10384 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10385 xmlFree((xmlChar
*) encoding
);
10390 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10394 * Non standard parsing, allowing the user to ignore encoding
10396 if (ctxt
->options
& XML_PARSE_IGNORE_ENC
) {
10397 xmlFree((xmlChar
*) encoding
);
10402 * UTF-16 encoding switch has already taken place at this stage,
10403 * more over the little-endian/big-endian selection is already done
10405 if ((encoding
!= NULL
) &&
10406 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-16")) ||
10407 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF16")))) {
10409 * If no encoding was passed to the parser, that we are
10410 * using UTF-16 and no decoder is present i.e. the
10411 * document is apparently UTF-8 compatible, then raise an
10412 * encoding mismatch fatal error
10414 if ((ctxt
->encoding
== NULL
) &&
10415 (ctxt
->input
->buf
!= NULL
) &&
10416 (ctxt
->input
->buf
->encoder
== NULL
)) {
10417 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_ENCODING
,
10418 "Document labelled UTF-16 but has UTF-8 content\n");
10420 if (ctxt
->encoding
!= NULL
)
10421 xmlFree((xmlChar
*) ctxt
->encoding
);
10422 ctxt
->encoding
= encoding
;
10425 * UTF-8 encoding is handled natively
10427 else if ((encoding
!= NULL
) &&
10428 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-8")) ||
10429 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF8")))) {
10430 if (ctxt
->encoding
!= NULL
)
10431 xmlFree((xmlChar
*) ctxt
->encoding
);
10432 ctxt
->encoding
= encoding
;
10434 else if (encoding
!= NULL
) {
10435 xmlCharEncodingHandlerPtr handler
;
10437 if (ctxt
->input
->encoding
!= NULL
)
10438 xmlFree((xmlChar
*) ctxt
->input
->encoding
);
10439 ctxt
->input
->encoding
= encoding
;
10441 handler
= xmlFindCharEncodingHandler((const char *) encoding
);
10442 if (handler
!= NULL
) {
10443 if (xmlSwitchToEncoding(ctxt
, handler
) < 0) {
10444 /* failed to convert */
10445 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
10449 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
10450 "Unsupported encoding %s\n", encoding
);
10460 * @ctxt: an XML parser context
10462 * parse the XML standalone declaration
10464 * [32] SDDecl ::= S 'standalone' Eq
10465 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10467 * [ VC: Standalone Document Declaration ]
10468 * TODO The standalone document declaration must have the value "no"
10469 * if any external markup declarations contain declarations of:
10470 * - attributes with default values, if elements to which these
10471 * attributes apply appear in the document without specifications
10472 * of values for these attributes, or
10473 * - entities (other than amp, lt, gt, apos, quot), if references
10474 * to those entities appear in the document, or
10475 * - attributes with values subject to normalization, where the
10476 * attribute appears in the document with a value which will change
10477 * as a result of normalization, or
10478 * - element types with element content, if white space occurs directly
10479 * within any instance of those types.
10482 * 1 if standalone="yes"
10483 * 0 if standalone="no"
10484 * -2 if standalone attribute is missing or invalid
10485 * (A standalone value of -2 means that the XML declaration was found,
10486 * but no value was specified for the standalone attribute).
10490 xmlParseSDDecl(xmlParserCtxtPtr ctxt
) {
10491 int standalone
= -2;
10494 if (CMP10(CUR_PTR
, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10498 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10499 return(standalone
);
10505 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10508 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10513 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10516 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10519 } else if (RAW
== '"'){
10521 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10524 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10529 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10532 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10536 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10539 return(standalone
);
10544 * @ctxt: an XML parser context
10546 * parse an XML declaration header
10548 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10552 xmlParseXMLDecl(xmlParserCtxtPtr ctxt
) {
10556 * This value for standalone indicates that the document has an
10557 * XML declaration but it does not have a standalone attribute.
10558 * It will be overwritten later if a standalone attribute is found.
10560 ctxt
->input
->standalone
= -2;
10563 * We know that '<?xml' is here.
10567 if (!IS_BLANK_CH(RAW
)) {
10568 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
10569 "Blank needed after '<?xml'\n");
10574 * We must have the VersionInfo here.
10576 version
= xmlParseVersionInfo(ctxt
);
10577 if (version
== NULL
) {
10578 xmlFatalErr(ctxt
, XML_ERR_VERSION_MISSING
, NULL
);
10580 if (!xmlStrEqual(version
, (const xmlChar
*) XML_DEFAULT_VERSION
)) {
10582 * Changed here for XML-1.0 5th edition
10584 if (ctxt
->options
& XML_PARSE_OLD10
) {
10585 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10586 "Unsupported version '%s'\n",
10589 if ((version
[0] == '1') && ((version
[1] == '.'))) {
10590 xmlWarningMsg(ctxt
, XML_WAR_UNKNOWN_VERSION
,
10591 "Unsupported version '%s'\n",
10594 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10595 "Unsupported version '%s'\n",
10600 if (ctxt
->version
!= NULL
)
10601 xmlFree((void *) ctxt
->version
);
10602 ctxt
->version
= version
;
10606 * We may have the encoding declaration
10608 if (!IS_BLANK_CH(RAW
)) {
10609 if ((RAW
== '?') && (NXT(1) == '>')) {
10613 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10615 xmlParseEncodingDecl(ctxt
);
10616 if ((ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) ||
10617 (ctxt
->instate
== XML_PARSER_EOF
)) {
10619 * The XML REC instructs us to stop parsing right here
10625 * We may have the standalone status.
10627 if ((ctxt
->input
->encoding
!= NULL
) && (!IS_BLANK_CH(RAW
))) {
10628 if ((RAW
== '?') && (NXT(1) == '>')) {
10632 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10636 * We can grow the input buffer freely at that point
10641 ctxt
->input
->standalone
= xmlParseSDDecl(ctxt
);
10644 if ((RAW
== '?') && (NXT(1) == '>')) {
10646 } else if (RAW
== '>') {
10647 /* Deprecated old WD ... */
10648 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10651 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10652 MOVETO_ENDTAG(CUR_PTR
);
10659 * @ctxt: an XML parser context
10661 * parse an XML Misc* optional field.
10663 * [27] Misc ::= Comment | PI | S
10667 xmlParseMisc(xmlParserCtxtPtr ctxt
) {
10668 while (ctxt
->instate
!= XML_PARSER_EOF
) {
10671 if ((RAW
== '<') && (NXT(1) == '?')) {
10673 } else if (CMP4(CUR_PTR
, '<', '!', '-', '-')) {
10674 xmlParseComment(ctxt
);
10682 * xmlParseDocument:
10683 * @ctxt: an XML parser context
10685 * parse an XML document (and build a tree if using the standard SAX
10688 * [1] document ::= prolog element Misc*
10690 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10692 * Returns 0, -1 in case of error. the parser context is augmented
10693 * as a result of the parsing.
10697 xmlParseDocument(xmlParserCtxtPtr ctxt
) {
10699 xmlCharEncoding enc
;
10703 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10709 * SAX: detecting the level.
10711 xmlDetectSAX2(ctxt
);
10714 * SAX: beginning of the document processing.
10716 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10717 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10718 if (ctxt
->instate
== XML_PARSER_EOF
)
10721 if ((ctxt
->encoding
== NULL
) &&
10722 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
10724 * Get the 4 first bytes and decode the charset
10725 * if enc != XML_CHAR_ENCODING_NONE
10726 * plug some encoding conversion routines.
10732 enc
= xmlDetectCharEncoding(&start
[0], 4);
10733 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10734 xmlSwitchEncoding(ctxt
, enc
);
10740 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10745 * Check for the XMLDecl in the Prolog.
10746 * do not GROW here to avoid the detected encoder to decode more
10747 * than just the first line, unless the amount of data is really
10748 * too small to hold "<?xml version="1.0" encoding="foo"
10750 if ((ctxt
->input
->end
- ctxt
->input
->cur
) < 35) {
10753 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10756 * Note that we will switch encoding on the fly.
10758 xmlParseXMLDecl(ctxt
);
10759 if ((ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) ||
10760 (ctxt
->instate
== XML_PARSER_EOF
)) {
10762 * The XML REC instructs us to stop parsing right here
10766 ctxt
->standalone
= ctxt
->input
->standalone
;
10769 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10771 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10772 ctxt
->sax
->startDocument(ctxt
->userData
);
10773 if (ctxt
->instate
== XML_PARSER_EOF
)
10775 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->input
!= NULL
) &&
10776 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->compressed
>= 0)) {
10777 ctxt
->myDoc
->compression
= ctxt
->input
->buf
->compressed
;
10781 * The Misc part of the Prolog
10783 xmlParseMisc(ctxt
);
10786 * Then possibly doc type declaration(s) and more Misc
10787 * (doctypedecl Misc*)?
10790 if (CMP9(CUR_PTR
, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10792 ctxt
->inSubset
= 1;
10793 xmlParseDocTypeDecl(ctxt
);
10795 ctxt
->instate
= XML_PARSER_DTD
;
10796 xmlParseInternalSubset(ctxt
);
10797 if (ctxt
->instate
== XML_PARSER_EOF
)
10802 * Create and update the external subset.
10804 ctxt
->inSubset
= 2;
10805 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->externalSubset
!= NULL
) &&
10806 (!ctxt
->disableSAX
))
10807 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
10808 ctxt
->extSubSystem
, ctxt
->extSubURI
);
10809 if (ctxt
->instate
== XML_PARSER_EOF
)
10811 ctxt
->inSubset
= 0;
10813 xmlCleanSpecialAttr(ctxt
);
10815 ctxt
->instate
= XML_PARSER_PROLOG
;
10816 xmlParseMisc(ctxt
);
10820 * Time to start parsing the tree itself
10824 xmlFatalErrMsg(ctxt
, XML_ERR_DOCUMENT_EMPTY
,
10825 "Start tag expected, '<' not found\n");
10827 ctxt
->instate
= XML_PARSER_CONTENT
;
10828 xmlParseElement(ctxt
);
10829 ctxt
->instate
= XML_PARSER_EPILOG
;
10833 * The Misc part at the end
10835 xmlParseMisc(ctxt
);
10838 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
10840 ctxt
->instate
= XML_PARSER_EOF
;
10844 * SAX: end of the document processing.
10846 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10847 ctxt
->sax
->endDocument(ctxt
->userData
);
10850 * Remove locally kept entity definitions if the tree was not built
10852 if ((ctxt
->myDoc
!= NULL
) &&
10853 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
10854 xmlFreeDoc(ctxt
->myDoc
);
10855 ctxt
->myDoc
= NULL
;
10858 if ((ctxt
->wellFormed
) && (ctxt
->myDoc
!= NULL
)) {
10859 ctxt
->myDoc
->properties
|= XML_DOC_WELLFORMED
;
10861 ctxt
->myDoc
->properties
|= XML_DOC_DTDVALID
;
10862 if (ctxt
->nsWellFormed
)
10863 ctxt
->myDoc
->properties
|= XML_DOC_NSVALID
;
10864 if (ctxt
->options
& XML_PARSE_OLD10
)
10865 ctxt
->myDoc
->properties
|= XML_DOC_OLD10
;
10867 if (! ctxt
->wellFormed
) {
10875 * xmlParseExtParsedEnt:
10876 * @ctxt: an XML parser context
10878 * parse a general parsed entity
10879 * An external general parsed entity is well-formed if it matches the
10880 * production labeled extParsedEnt.
10882 * [78] extParsedEnt ::= TextDecl? content
10884 * Returns 0, -1 in case of error. the parser context is augmented
10885 * as a result of the parsing.
10889 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt
) {
10891 xmlCharEncoding enc
;
10893 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10896 xmlDetectSAX2(ctxt
);
10901 * SAX: beginning of the document processing.
10903 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10904 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10907 * Get the 4 first bytes and decode the charset
10908 * if enc != XML_CHAR_ENCODING_NONE
10909 * plug some encoding conversion routines.
10911 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
10916 enc
= xmlDetectCharEncoding(start
, 4);
10917 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10918 xmlSwitchEncoding(ctxt
, enc
);
10924 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10928 * Check for the XMLDecl in the Prolog.
10931 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10934 * Note that we will switch encoding on the fly.
10936 xmlParseXMLDecl(ctxt
);
10937 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10939 * The XML REC instructs us to stop parsing right here
10945 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10947 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10948 ctxt
->sax
->startDocument(ctxt
->userData
);
10949 if (ctxt
->instate
== XML_PARSER_EOF
)
10953 * Doing validity checking on chunk doesn't make sense
10955 ctxt
->instate
= XML_PARSER_CONTENT
;
10956 ctxt
->validate
= 0;
10957 ctxt
->loadsubset
= 0;
10960 xmlParseContent(ctxt
);
10961 if (ctxt
->instate
== XML_PARSER_EOF
)
10964 if ((RAW
== '<') && (NXT(1) == '/')) {
10965 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
10966 } else if (RAW
!= 0) {
10967 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
10971 * SAX: end of the document processing.
10973 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10974 ctxt
->sax
->endDocument(ctxt
->userData
);
10976 if (! ctxt
->wellFormed
) return(-1);
10980 #ifdef LIBXML_PUSH_ENABLED
10981 /************************************************************************
10983 * Progressive parsing interfaces *
10985 ************************************************************************/
10988 * xmlParseLookupSequence:
10989 * @ctxt: an XML parser context
10990 * @first: the first char to lookup
10991 * @next: the next char to lookup or zero
10992 * @third: the next char to lookup or zero
10994 * Try to find if a sequence (first, next, third) or just (first next) or
10995 * (first) is available in the input stream.
10996 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10997 * to avoid rescanning sequences of bytes, it DOES change the state of the
10998 * parser, do not use liberally.
11000 * Returns the index to the current parsing point if the full sequence
11001 * is available, -1 otherwise.
11004 xmlParseLookupSequence(xmlParserCtxtPtr ctxt
, xmlChar first
,
11005 xmlChar next
, xmlChar third
) {
11007 xmlParserInputPtr in
;
11008 const xmlChar
*buf
;
11011 if (in
== NULL
) return(-1);
11012 base
= in
->cur
- in
->base
;
11013 if (base
< 0) return(-1);
11014 if (ctxt
->checkIndex
> base
)
11015 base
= ctxt
->checkIndex
;
11016 if (in
->buf
== NULL
) {
11020 buf
= xmlBufContent(in
->buf
->buffer
);
11021 len
= xmlBufUse(in
->buf
->buffer
);
11023 /* take into account the sequence length */
11024 if (third
) len
-= 2;
11025 else if (next
) len
--;
11026 for (;base
< len
;base
++) {
11027 if (buf
[base
] == first
) {
11029 if ((buf
[base
+ 1] != next
) ||
11030 (buf
[base
+ 2] != third
)) continue;
11031 } else if (next
!= 0) {
11032 if (buf
[base
+ 1] != next
) continue;
11034 ctxt
->checkIndex
= 0;
11037 xmlGenericError(xmlGenericErrorContext
,
11038 "PP: lookup '%c' found at %d\n",
11040 else if (third
== 0)
11041 xmlGenericError(xmlGenericErrorContext
,
11042 "PP: lookup '%c%c' found at %d\n",
11043 first
, next
, base
);
11045 xmlGenericError(xmlGenericErrorContext
,
11046 "PP: lookup '%c%c%c' found at %d\n",
11047 first
, next
, third
, base
);
11049 return(base
- (in
->cur
- in
->base
));
11052 ctxt
->checkIndex
= base
;
11055 xmlGenericError(xmlGenericErrorContext
,
11056 "PP: lookup '%c' failed\n", first
);
11057 else if (third
== 0)
11058 xmlGenericError(xmlGenericErrorContext
,
11059 "PP: lookup '%c%c' failed\n", first
, next
);
11061 xmlGenericError(xmlGenericErrorContext
,
11062 "PP: lookup '%c%c%c' failed\n", first
, next
, third
);
11068 * xmlParseGetLasts:
11069 * @ctxt: an XML parser context
11070 * @lastlt: pointer to store the last '<' from the input
11071 * @lastgt: pointer to store the last '>' from the input
11073 * Lookup the last < and > in the current chunk
11076 xmlParseGetLasts(xmlParserCtxtPtr ctxt
, const xmlChar
**lastlt
,
11077 const xmlChar
**lastgt
) {
11078 const xmlChar
*tmp
;
11080 if ((ctxt
== NULL
) || (lastlt
== NULL
) || (lastgt
== NULL
)) {
11081 xmlGenericError(xmlGenericErrorContext
,
11082 "Internal error: xmlParseGetLasts\n");
11085 if ((ctxt
->progressive
!= 0) && (ctxt
->inputNr
== 1)) {
11086 tmp
= ctxt
->input
->end
;
11088 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '<')) tmp
--;
11089 if (tmp
< ctxt
->input
->base
) {
11095 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '>')) {
11096 if (*tmp
== '\'') {
11098 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '\'')) tmp
++;
11099 if (tmp
< ctxt
->input
->end
) tmp
++;
11100 } else if (*tmp
== '"') {
11102 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '"')) tmp
++;
11103 if (tmp
< ctxt
->input
->end
) tmp
++;
11107 if (tmp
< ctxt
->input
->end
)
11112 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '>')) tmp
--;
11113 if (tmp
>= ctxt
->input
->base
)
11125 * xmlCheckCdataPush:
11126 * @cur: pointer to the block of characters
11127 * @len: length of the block in bytes
11128 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11130 * Check that the block of characters is okay as SCdata content [20]
11132 * Returns the number of bytes to pass if okay, a negative index where an
11133 * UTF-8 error occurred otherwise
11136 xmlCheckCdataPush(const xmlChar
*utf
, int len
, int complete
) {
11141 if ((utf
== NULL
) || (len
<= 0))
11144 for (ix
= 0; ix
< len
;) { /* string is 0-terminated */
11146 if ((c
& 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11149 else if ((c
== 0xA) || (c
== 0xD) || (c
== 0x9))
11153 } else if ((c
& 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11154 if (ix
+ 2 > len
) return(complete
? -ix
: ix
);
11155 if ((utf
[ix
+1] & 0xc0 ) != 0x80)
11157 codepoint
= (utf
[ix
] & 0x1f) << 6;
11158 codepoint
|= utf
[ix
+1] & 0x3f;
11159 if (!xmlIsCharQ(codepoint
))
11162 } else if ((c
& 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11163 if (ix
+ 3 > len
) return(complete
? -ix
: ix
);
11164 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11165 ((utf
[ix
+2] & 0xc0) != 0x80))
11167 codepoint
= (utf
[ix
] & 0xf) << 12;
11168 codepoint
|= (utf
[ix
+1] & 0x3f) << 6;
11169 codepoint
|= utf
[ix
+2] & 0x3f;
11170 if (!xmlIsCharQ(codepoint
))
11173 } else if ((c
& 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11174 if (ix
+ 4 > len
) return(complete
? -ix
: ix
);
11175 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11176 ((utf
[ix
+2] & 0xc0) != 0x80) ||
11177 ((utf
[ix
+3] & 0xc0) != 0x80))
11179 codepoint
= (utf
[ix
] & 0x7) << 18;
11180 codepoint
|= (utf
[ix
+1] & 0x3f) << 12;
11181 codepoint
|= (utf
[ix
+2] & 0x3f) << 6;
11182 codepoint
|= utf
[ix
+3] & 0x3f;
11183 if (!xmlIsCharQ(codepoint
))
11186 } else /* unknown encoding */
11193 * xmlParseTryOrFinish:
11194 * @ctxt: an XML parser context
11195 * @terminate: last chunk indicator
11197 * Try to progress on parsing
11199 * Returns zero if no parsing was possible
11202 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt
, int terminate
) {
11206 const xmlChar
*lastlt
, *lastgt
;
11208 if (ctxt
->input
== NULL
)
11212 switch (ctxt
->instate
) {
11213 case XML_PARSER_EOF
:
11214 xmlGenericError(xmlGenericErrorContext
,
11215 "PP: try EOF\n"); break;
11216 case XML_PARSER_START
:
11217 xmlGenericError(xmlGenericErrorContext
,
11218 "PP: try START\n"); break;
11219 case XML_PARSER_MISC
:
11220 xmlGenericError(xmlGenericErrorContext
,
11221 "PP: try MISC\n");break;
11222 case XML_PARSER_COMMENT
:
11223 xmlGenericError(xmlGenericErrorContext
,
11224 "PP: try COMMENT\n");break;
11225 case XML_PARSER_PROLOG
:
11226 xmlGenericError(xmlGenericErrorContext
,
11227 "PP: try PROLOG\n");break;
11228 case XML_PARSER_START_TAG
:
11229 xmlGenericError(xmlGenericErrorContext
,
11230 "PP: try START_TAG\n");break;
11231 case XML_PARSER_CONTENT
:
11232 xmlGenericError(xmlGenericErrorContext
,
11233 "PP: try CONTENT\n");break;
11234 case XML_PARSER_CDATA_SECTION
:
11235 xmlGenericError(xmlGenericErrorContext
,
11236 "PP: try CDATA_SECTION\n");break;
11237 case XML_PARSER_END_TAG
:
11238 xmlGenericError(xmlGenericErrorContext
,
11239 "PP: try END_TAG\n");break;
11240 case XML_PARSER_ENTITY_DECL
:
11241 xmlGenericError(xmlGenericErrorContext
,
11242 "PP: try ENTITY_DECL\n");break;
11243 case XML_PARSER_ENTITY_VALUE
:
11244 xmlGenericError(xmlGenericErrorContext
,
11245 "PP: try ENTITY_VALUE\n");break;
11246 case XML_PARSER_ATTRIBUTE_VALUE
:
11247 xmlGenericError(xmlGenericErrorContext
,
11248 "PP: try ATTRIBUTE_VALUE\n");break;
11249 case XML_PARSER_DTD
:
11250 xmlGenericError(xmlGenericErrorContext
,
11251 "PP: try DTD\n");break;
11252 case XML_PARSER_EPILOG
:
11253 xmlGenericError(xmlGenericErrorContext
,
11254 "PP: try EPILOG\n");break;
11255 case XML_PARSER_PI
:
11256 xmlGenericError(xmlGenericErrorContext
,
11257 "PP: try PI\n");break;
11258 case XML_PARSER_IGNORE
:
11259 xmlGenericError(xmlGenericErrorContext
,
11260 "PP: try IGNORE\n");break;
11264 if ((ctxt
->input
!= NULL
) &&
11265 (ctxt
->input
->cur
- ctxt
->input
->base
> 4096)) {
11267 ctxt
->checkIndex
= 0;
11269 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11271 while (ctxt
->instate
!= XML_PARSER_EOF
) {
11272 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
11275 if (ctxt
->input
== NULL
) break;
11276 if (ctxt
->input
->buf
== NULL
)
11277 avail
= ctxt
->input
->length
-
11278 (ctxt
->input
->cur
- ctxt
->input
->base
);
11281 * If we are operating on converted input, try to flush
11282 * remaining chars to avoid them stalling in the non-converted
11283 * buffer. But do not do this in document start where
11284 * encoding="..." may not have been read and we work on a
11285 * guessed encoding.
11287 if ((ctxt
->instate
!= XML_PARSER_START
) &&
11288 (ctxt
->input
->buf
->raw
!= NULL
) &&
11289 (xmlBufIsEmpty(ctxt
->input
->buf
->raw
) == 0)) {
11290 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
11292 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
11294 xmlParserInputBufferPush(ctxt
->input
->buf
, 0, "");
11295 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
11298 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11299 (ctxt
->input
->cur
- ctxt
->input
->base
);
11303 switch (ctxt
->instate
) {
11304 case XML_PARSER_EOF
:
11306 * Document parsing is done !
11309 case XML_PARSER_START
:
11310 if (ctxt
->charset
== XML_CHAR_ENCODING_NONE
) {
11312 xmlCharEncoding enc
;
11315 * Very first chars read from the document flow.
11321 * Get the 4 first bytes and decode the charset
11322 * if enc != XML_CHAR_ENCODING_NONE
11323 * plug some encoding conversion routines,
11324 * else xmlSwitchEncoding will set to (default)
11331 enc
= xmlDetectCharEncoding(start
, 4);
11332 xmlSwitchEncoding(ctxt
, enc
);
11338 cur
= ctxt
->input
->cur
[0];
11339 next
= ctxt
->input
->cur
[1];
11341 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11342 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11343 &xmlDefaultSAXLocator
);
11344 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11345 xmlHaltParser(ctxt
);
11347 xmlGenericError(xmlGenericErrorContext
,
11348 "PP: entering EOF\n");
11350 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11351 ctxt
->sax
->endDocument(ctxt
->userData
);
11354 if ((cur
== '<') && (next
== '?')) {
11355 /* PI or XML decl */
11356 if (avail
< 5) return(ret
);
11357 if ((!terminate
) &&
11358 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11360 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11361 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11362 &xmlDefaultSAXLocator
);
11363 if ((ctxt
->input
->cur
[2] == 'x') &&
11364 (ctxt
->input
->cur
[3] == 'm') &&
11365 (ctxt
->input
->cur
[4] == 'l') &&
11366 (IS_BLANK_CH(ctxt
->input
->cur
[5]))) {
11369 xmlGenericError(xmlGenericErrorContext
,
11370 "PP: Parsing XML Decl\n");
11372 xmlParseXMLDecl(ctxt
);
11373 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
11375 * The XML REC instructs us to stop parsing right
11378 xmlHaltParser(ctxt
);
11381 ctxt
->standalone
= ctxt
->input
->standalone
;
11382 if ((ctxt
->encoding
== NULL
) &&
11383 (ctxt
->input
->encoding
!= NULL
))
11384 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
11385 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11386 (!ctxt
->disableSAX
))
11387 ctxt
->sax
->startDocument(ctxt
->userData
);
11388 ctxt
->instate
= XML_PARSER_MISC
;
11390 xmlGenericError(xmlGenericErrorContext
,
11391 "PP: entering MISC\n");
11394 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11395 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11396 (!ctxt
->disableSAX
))
11397 ctxt
->sax
->startDocument(ctxt
->userData
);
11398 ctxt
->instate
= XML_PARSER_MISC
;
11400 xmlGenericError(xmlGenericErrorContext
,
11401 "PP: entering MISC\n");
11405 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11406 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11407 &xmlDefaultSAXLocator
);
11408 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11409 if (ctxt
->version
== NULL
) {
11410 xmlErrMemory(ctxt
, NULL
);
11413 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11414 (!ctxt
->disableSAX
))
11415 ctxt
->sax
->startDocument(ctxt
->userData
);
11416 ctxt
->instate
= XML_PARSER_MISC
;
11418 xmlGenericError(xmlGenericErrorContext
,
11419 "PP: entering MISC\n");
11423 case XML_PARSER_START_TAG
: {
11424 const xmlChar
*name
;
11425 const xmlChar
*prefix
= NULL
;
11426 const xmlChar
*URI
= NULL
;
11427 int line
= ctxt
->input
->line
;
11428 int nsNr
= ctxt
->nsNr
;
11430 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11432 cur
= ctxt
->input
->cur
[0];
11434 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11435 xmlHaltParser(ctxt
);
11436 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11437 ctxt
->sax
->endDocument(ctxt
->userData
);
11441 if (ctxt
->progressive
) {
11442 /* > can be found unescaped in attribute values */
11443 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
11445 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
11449 if (ctxt
->spaceNr
== 0)
11450 spacePush(ctxt
, -1);
11451 else if (*ctxt
->space
== -2)
11452 spacePush(ctxt
, -1);
11454 spacePush(ctxt
, *ctxt
->space
);
11455 #ifdef LIBXML_SAX1_ENABLED
11457 #endif /* LIBXML_SAX1_ENABLED */
11458 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
11459 #ifdef LIBXML_SAX1_ENABLED
11461 name
= xmlParseStartTag(ctxt
);
11462 #endif /* LIBXML_SAX1_ENABLED */
11463 if (ctxt
->instate
== XML_PARSER_EOF
)
11465 if (name
== NULL
) {
11467 xmlHaltParser(ctxt
);
11468 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11469 ctxt
->sax
->endDocument(ctxt
->userData
);
11472 #ifdef LIBXML_VALID_ENABLED
11474 * [ VC: Root Element Type ]
11475 * The Name in the document type declaration must match
11476 * the element type of the root element.
11478 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
11479 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
11480 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
11481 #endif /* LIBXML_VALID_ENABLED */
11484 * Check for an Empty Element.
11486 if ((RAW
== '/') && (NXT(1) == '>')) {
11490 if ((ctxt
->sax
!= NULL
) &&
11491 (ctxt
->sax
->endElementNs
!= NULL
) &&
11492 (!ctxt
->disableSAX
))
11493 ctxt
->sax
->endElementNs(ctxt
->userData
, name
,
11495 if (ctxt
->nsNr
- nsNr
> 0)
11496 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
11497 #ifdef LIBXML_SAX1_ENABLED
11499 if ((ctxt
->sax
!= NULL
) &&
11500 (ctxt
->sax
->endElement
!= NULL
) &&
11501 (!ctxt
->disableSAX
))
11502 ctxt
->sax
->endElement(ctxt
->userData
, name
);
11503 #endif /* LIBXML_SAX1_ENABLED */
11505 if (ctxt
->instate
== XML_PARSER_EOF
)
11508 if (ctxt
->nameNr
== 0) {
11509 ctxt
->instate
= XML_PARSER_EPILOG
;
11511 ctxt
->instate
= XML_PARSER_CONTENT
;
11513 ctxt
->progressive
= 1;
11519 xmlFatalErrMsgStr(ctxt
, XML_ERR_GT_REQUIRED
,
11520 "Couldn't find end of Start Tag %s\n",
11525 nameNsPush(ctxt
, name
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
);
11527 ctxt
->instate
= XML_PARSER_CONTENT
;
11528 ctxt
->progressive
= 1;
11531 case XML_PARSER_CONTENT
: {
11533 unsigned long cons
;
11534 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11536 cur
= ctxt
->input
->cur
[0];
11537 next
= ctxt
->input
->cur
[1];
11539 id
= ctxt
->input
->id
;
11540 cons
= CUR_CONSUMED
;
11541 if ((cur
== '<') && (next
== '/')) {
11542 ctxt
->instate
= XML_PARSER_END_TAG
;
11544 } else if ((cur
== '<') && (next
== '?')) {
11545 if ((!terminate
) &&
11546 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11547 ctxt
->progressive
= XML_PARSER_PI
;
11551 ctxt
->instate
= XML_PARSER_CONTENT
;
11552 ctxt
->progressive
= 1;
11553 } else if ((cur
== '<') && (next
!= '!')) {
11554 ctxt
->instate
= XML_PARSER_START_TAG
;
11556 } else if ((cur
== '<') && (next
== '!') &&
11557 (ctxt
->input
->cur
[2] == '-') &&
11558 (ctxt
->input
->cur
[3] == '-')) {
11563 ctxt
->input
->cur
+= 4;
11564 term
= xmlParseLookupSequence(ctxt
, '-', '-', '>');
11565 ctxt
->input
->cur
-= 4;
11566 if ((!terminate
) && (term
< 0)) {
11567 ctxt
->progressive
= XML_PARSER_COMMENT
;
11570 xmlParseComment(ctxt
);
11571 ctxt
->instate
= XML_PARSER_CONTENT
;
11572 ctxt
->progressive
= 1;
11573 } else if ((cur
== '<') && (ctxt
->input
->cur
[1] == '!') &&
11574 (ctxt
->input
->cur
[2] == '[') &&
11575 (ctxt
->input
->cur
[3] == 'C') &&
11576 (ctxt
->input
->cur
[4] == 'D') &&
11577 (ctxt
->input
->cur
[5] == 'A') &&
11578 (ctxt
->input
->cur
[6] == 'T') &&
11579 (ctxt
->input
->cur
[7] == 'A') &&
11580 (ctxt
->input
->cur
[8] == '[')) {
11582 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
11584 } else if ((cur
== '<') && (next
== '!') &&
11587 } else if (cur
== '&') {
11588 if ((!terminate
) &&
11589 (xmlParseLookupSequence(ctxt
, ';', 0, 0) < 0))
11591 xmlParseReference(ctxt
);
11593 /* TODO Avoid the extra copy, handle directly !!! */
11595 * Goal of the following test is:
11596 * - minimize calls to the SAX 'character' callback
11597 * when they are mergeable
11598 * - handle an problem for isBlank when we only parse
11599 * a sequence of blank chars and the next one is
11600 * not available to check against '<' presence.
11601 * - tries to homogenize the differences in SAX
11602 * callbacks between the push and pull versions
11605 if ((ctxt
->inputNr
== 1) &&
11606 (avail
< XML_PARSER_BIG_BUFFER_SIZE
)) {
11608 if (ctxt
->progressive
) {
11609 if ((lastlt
== NULL
) ||
11610 (ctxt
->input
->cur
> lastlt
))
11612 } else if (xmlParseLookupSequence(ctxt
,
11618 ctxt
->checkIndex
= 0;
11619 xmlParseCharData(ctxt
, 0);
11621 if ((cons
== CUR_CONSUMED
) && (id
== ctxt
->input
->id
)) {
11622 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
11623 "detected an error in element content\n");
11624 xmlHaltParser(ctxt
);
11629 case XML_PARSER_END_TAG
:
11633 if (ctxt
->progressive
) {
11634 /* > can be found unescaped in attribute values */
11635 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
11637 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
11642 xmlParseEndTag2(ctxt
, &ctxt
->pushTab
[ctxt
->nameNr
- 1]);
11645 #ifdef LIBXML_SAX1_ENABLED
11647 xmlParseEndTag1(ctxt
, 0);
11648 #endif /* LIBXML_SAX1_ENABLED */
11649 if (ctxt
->instate
== XML_PARSER_EOF
) {
11651 } else if (ctxt
->nameNr
== 0) {
11652 ctxt
->instate
= XML_PARSER_EPILOG
;
11654 ctxt
->instate
= XML_PARSER_CONTENT
;
11657 case XML_PARSER_CDATA_SECTION
: {
11659 * The Push mode need to have the SAX callback for
11660 * cdataBlock merge back contiguous callbacks.
11664 base
= xmlParseLookupSequence(ctxt
, ']', ']', '>');
11666 if (avail
>= XML_PARSER_BIG_BUFFER_SIZE
+ 2) {
11669 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
,
11670 XML_PARSER_BIG_BUFFER_SIZE
, 0);
11673 ctxt
->input
->cur
+= tmp
;
11674 goto encoding_error
;
11676 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
11677 if (ctxt
->sax
->cdataBlock
!= NULL
)
11678 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11679 ctxt
->input
->cur
, tmp
);
11680 else if (ctxt
->sax
->characters
!= NULL
)
11681 ctxt
->sax
->characters(ctxt
->userData
,
11682 ctxt
->input
->cur
, tmp
);
11684 if (ctxt
->instate
== XML_PARSER_EOF
)
11687 ctxt
->checkIndex
= 0;
11693 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
, base
, 1);
11694 if ((tmp
< 0) || (tmp
!= base
)) {
11696 ctxt
->input
->cur
+= tmp
;
11697 goto encoding_error
;
11699 if ((ctxt
->sax
!= NULL
) && (base
== 0) &&
11700 (ctxt
->sax
->cdataBlock
!= NULL
) &&
11701 (!ctxt
->disableSAX
)) {
11703 * Special case to provide identical behaviour
11704 * between pull and push parsers on enpty CDATA
11707 if ((ctxt
->input
->cur
- ctxt
->input
->base
>= 9) &&
11708 (!strncmp((const char *)&ctxt
->input
->cur
[-9],
11710 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11712 } else if ((ctxt
->sax
!= NULL
) && (base
> 0) &&
11713 (!ctxt
->disableSAX
)) {
11714 if (ctxt
->sax
->cdataBlock
!= NULL
)
11715 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11716 ctxt
->input
->cur
, base
);
11717 else if (ctxt
->sax
->characters
!= NULL
)
11718 ctxt
->sax
->characters(ctxt
->userData
,
11719 ctxt
->input
->cur
, base
);
11721 if (ctxt
->instate
== XML_PARSER_EOF
)
11724 ctxt
->checkIndex
= 0;
11725 ctxt
->instate
= XML_PARSER_CONTENT
;
11727 xmlGenericError(xmlGenericErrorContext
,
11728 "PP: entering CONTENT\n");
11733 case XML_PARSER_MISC
:
11735 if (ctxt
->input
->buf
== NULL
)
11736 avail
= ctxt
->input
->length
-
11737 (ctxt
->input
->cur
- ctxt
->input
->base
);
11739 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11740 (ctxt
->input
->cur
- ctxt
->input
->base
);
11743 cur
= ctxt
->input
->cur
[0];
11744 next
= ctxt
->input
->cur
[1];
11745 if ((cur
== '<') && (next
== '?')) {
11746 if ((!terminate
) &&
11747 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11748 ctxt
->progressive
= XML_PARSER_PI
;
11752 xmlGenericError(xmlGenericErrorContext
,
11753 "PP: Parsing PI\n");
11756 if (ctxt
->instate
== XML_PARSER_EOF
)
11758 ctxt
->instate
= XML_PARSER_MISC
;
11759 ctxt
->progressive
= 1;
11760 ctxt
->checkIndex
= 0;
11761 } else if ((cur
== '<') && (next
== '!') &&
11762 (ctxt
->input
->cur
[2] == '-') &&
11763 (ctxt
->input
->cur
[3] == '-')) {
11764 if ((!terminate
) &&
11765 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0)) {
11766 ctxt
->progressive
= XML_PARSER_COMMENT
;
11770 xmlGenericError(xmlGenericErrorContext
,
11771 "PP: Parsing Comment\n");
11773 xmlParseComment(ctxt
);
11774 if (ctxt
->instate
== XML_PARSER_EOF
)
11776 ctxt
->instate
= XML_PARSER_MISC
;
11777 ctxt
->progressive
= 1;
11778 ctxt
->checkIndex
= 0;
11779 } else if ((cur
== '<') && (next
== '!') &&
11780 (ctxt
->input
->cur
[2] == 'D') &&
11781 (ctxt
->input
->cur
[3] == 'O') &&
11782 (ctxt
->input
->cur
[4] == 'C') &&
11783 (ctxt
->input
->cur
[5] == 'T') &&
11784 (ctxt
->input
->cur
[6] == 'Y') &&
11785 (ctxt
->input
->cur
[7] == 'P') &&
11786 (ctxt
->input
->cur
[8] == 'E')) {
11787 if ((!terminate
) &&
11788 (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0)) {
11789 ctxt
->progressive
= XML_PARSER_DTD
;
11793 xmlGenericError(xmlGenericErrorContext
,
11794 "PP: Parsing internal subset\n");
11796 ctxt
->inSubset
= 1;
11797 ctxt
->progressive
= 0;
11798 ctxt
->checkIndex
= 0;
11799 xmlParseDocTypeDecl(ctxt
);
11800 if (ctxt
->instate
== XML_PARSER_EOF
)
11803 ctxt
->instate
= XML_PARSER_DTD
;
11805 xmlGenericError(xmlGenericErrorContext
,
11806 "PP: entering DTD\n");
11810 * Create and update the external subset.
11812 ctxt
->inSubset
= 2;
11813 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11814 (ctxt
->sax
->externalSubset
!= NULL
))
11815 ctxt
->sax
->externalSubset(ctxt
->userData
,
11816 ctxt
->intSubName
, ctxt
->extSubSystem
,
11818 ctxt
->inSubset
= 0;
11819 xmlCleanSpecialAttr(ctxt
);
11820 ctxt
->instate
= XML_PARSER_PROLOG
;
11822 xmlGenericError(xmlGenericErrorContext
,
11823 "PP: entering PROLOG\n");
11826 } else if ((cur
== '<') && (next
== '!') &&
11830 ctxt
->instate
= XML_PARSER_START_TAG
;
11831 ctxt
->progressive
= XML_PARSER_START_TAG
;
11832 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11834 xmlGenericError(xmlGenericErrorContext
,
11835 "PP: entering START_TAG\n");
11839 case XML_PARSER_PROLOG
:
11841 if (ctxt
->input
->buf
== NULL
)
11842 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11844 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11845 (ctxt
->input
->cur
- ctxt
->input
->base
);
11848 cur
= ctxt
->input
->cur
[0];
11849 next
= ctxt
->input
->cur
[1];
11850 if ((cur
== '<') && (next
== '?')) {
11851 if ((!terminate
) &&
11852 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11853 ctxt
->progressive
= XML_PARSER_PI
;
11857 xmlGenericError(xmlGenericErrorContext
,
11858 "PP: Parsing PI\n");
11861 if (ctxt
->instate
== XML_PARSER_EOF
)
11863 ctxt
->instate
= XML_PARSER_PROLOG
;
11864 ctxt
->progressive
= 1;
11865 } else if ((cur
== '<') && (next
== '!') &&
11866 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11867 if ((!terminate
) &&
11868 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0)) {
11869 ctxt
->progressive
= XML_PARSER_COMMENT
;
11873 xmlGenericError(xmlGenericErrorContext
,
11874 "PP: Parsing Comment\n");
11876 xmlParseComment(ctxt
);
11877 if (ctxt
->instate
== XML_PARSER_EOF
)
11879 ctxt
->instate
= XML_PARSER_PROLOG
;
11880 ctxt
->progressive
= 1;
11881 } else if ((cur
== '<') && (next
== '!') &&
11885 ctxt
->instate
= XML_PARSER_START_TAG
;
11886 if (ctxt
->progressive
== 0)
11887 ctxt
->progressive
= XML_PARSER_START_TAG
;
11888 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11890 xmlGenericError(xmlGenericErrorContext
,
11891 "PP: entering START_TAG\n");
11895 case XML_PARSER_EPILOG
:
11897 if (ctxt
->input
->buf
== NULL
)
11898 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11900 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11901 (ctxt
->input
->cur
- ctxt
->input
->base
);
11904 cur
= ctxt
->input
->cur
[0];
11905 next
= ctxt
->input
->cur
[1];
11906 if ((cur
== '<') && (next
== '?')) {
11907 if ((!terminate
) &&
11908 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11909 ctxt
->progressive
= XML_PARSER_PI
;
11913 xmlGenericError(xmlGenericErrorContext
,
11914 "PP: Parsing PI\n");
11917 if (ctxt
->instate
== XML_PARSER_EOF
)
11919 ctxt
->instate
= XML_PARSER_EPILOG
;
11920 ctxt
->progressive
= 1;
11921 } else if ((cur
== '<') && (next
== '!') &&
11922 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11923 if ((!terminate
) &&
11924 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0)) {
11925 ctxt
->progressive
= XML_PARSER_COMMENT
;
11929 xmlGenericError(xmlGenericErrorContext
,
11930 "PP: Parsing Comment\n");
11932 xmlParseComment(ctxt
);
11933 if (ctxt
->instate
== XML_PARSER_EOF
)
11935 ctxt
->instate
= XML_PARSER_EPILOG
;
11936 ctxt
->progressive
= 1;
11937 } else if ((cur
== '<') && (next
== '!') &&
11941 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11942 xmlHaltParser(ctxt
);
11944 xmlGenericError(xmlGenericErrorContext
,
11945 "PP: entering EOF\n");
11947 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11948 ctxt
->sax
->endDocument(ctxt
->userData
);
11952 case XML_PARSER_DTD
: {
11954 * Sorry but progressive parsing of the internal subset
11955 * is not expected to be supported. We first check that
11956 * the full content of the internal subset is available and
11957 * the parsing is launched only at that point.
11958 * Internal subset ends up with "']' S? '>'" in an unescaped
11959 * section and not in a ']]>' sequence which are conditional
11960 * sections (whoever argued to keep that crap in XML deserve
11961 * a place in hell !).
11968 base
= ctxt
->input
->cur
- ctxt
->input
->base
;
11969 if (base
< 0) return(0);
11970 if (ctxt
->checkIndex
> base
)
11971 base
= ctxt
->checkIndex
;
11972 buf
= xmlBufContent(ctxt
->input
->buf
->buffer
);
11973 use
= xmlBufUse(ctxt
->input
->buf
->buffer
);
11974 for (;(unsigned int) base
< use
; base
++) {
11976 if (buf
[base
] == quote
)
11980 if ((quote
== 0) && (buf
[base
] == '<')) {
11982 /* special handling of comments */
11983 if (((unsigned int) base
+ 4 < use
) &&
11984 (buf
[base
+ 1] == '!') &&
11985 (buf
[base
+ 2] == '-') &&
11986 (buf
[base
+ 3] == '-')) {
11987 for (;(unsigned int) base
+ 3 < use
; base
++) {
11988 if ((buf
[base
] == '-') &&
11989 (buf
[base
+ 1] == '-') &&
11990 (buf
[base
+ 2] == '>')) {
11998 fprintf(stderr
, "unfinished comment\n");
12005 if (buf
[base
] == '"') {
12009 if (buf
[base
] == '\'') {
12013 if (buf
[base
] == ']') {
12015 fprintf(stderr
, "%c%c%c%c: ", buf
[base
],
12016 buf
[base
+ 1], buf
[base
+ 2], buf
[base
+ 3]);
12018 if ((unsigned int) base
+1 >= use
)
12020 if (buf
[base
+ 1] == ']') {
12021 /* conditional crap, skip both ']' ! */
12025 for (i
= 1; (unsigned int) base
+ i
< use
; i
++) {
12026 if (buf
[base
+ i
] == '>') {
12028 fprintf(stderr
, "found\n");
12030 goto found_end_int_subset
;
12032 if (!IS_BLANK_CH(buf
[base
+ i
])) {
12034 fprintf(stderr
, "not found\n");
12036 goto not_end_of_int_subset
;
12040 fprintf(stderr
, "end of stream\n");
12045 not_end_of_int_subset
:
12046 continue; /* for */
12049 * We didn't found the end of the Internal subset
12052 ctxt
->checkIndex
= base
;
12054 ctxt
->checkIndex
= 0;
12057 xmlGenericError(xmlGenericErrorContext
,
12058 "PP: lookup of int subset end filed\n");
12062 found_end_int_subset
:
12063 ctxt
->checkIndex
= 0;
12064 xmlParseInternalSubset(ctxt
);
12065 if (ctxt
->instate
== XML_PARSER_EOF
)
12067 ctxt
->inSubset
= 2;
12068 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
12069 (ctxt
->sax
->externalSubset
!= NULL
))
12070 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
12071 ctxt
->extSubSystem
, ctxt
->extSubURI
);
12072 ctxt
->inSubset
= 0;
12073 xmlCleanSpecialAttr(ctxt
);
12074 if (ctxt
->instate
== XML_PARSER_EOF
)
12076 ctxt
->instate
= XML_PARSER_PROLOG
;
12077 ctxt
->checkIndex
= 0;
12079 xmlGenericError(xmlGenericErrorContext
,
12080 "PP: entering PROLOG\n");
12084 case XML_PARSER_COMMENT
:
12085 xmlGenericError(xmlGenericErrorContext
,
12086 "PP: internal error, state == COMMENT\n");
12087 ctxt
->instate
= XML_PARSER_CONTENT
;
12089 xmlGenericError(xmlGenericErrorContext
,
12090 "PP: entering CONTENT\n");
12093 case XML_PARSER_IGNORE
:
12094 xmlGenericError(xmlGenericErrorContext
,
12095 "PP: internal error, state == IGNORE");
12096 ctxt
->instate
= XML_PARSER_DTD
;
12098 xmlGenericError(xmlGenericErrorContext
,
12099 "PP: entering DTD\n");
12102 case XML_PARSER_PI
:
12103 xmlGenericError(xmlGenericErrorContext
,
12104 "PP: internal error, state == PI\n");
12105 ctxt
->instate
= XML_PARSER_CONTENT
;
12107 xmlGenericError(xmlGenericErrorContext
,
12108 "PP: entering CONTENT\n");
12111 case XML_PARSER_ENTITY_DECL
:
12112 xmlGenericError(xmlGenericErrorContext
,
12113 "PP: internal error, state == ENTITY_DECL\n");
12114 ctxt
->instate
= XML_PARSER_DTD
;
12116 xmlGenericError(xmlGenericErrorContext
,
12117 "PP: entering DTD\n");
12120 case XML_PARSER_ENTITY_VALUE
:
12121 xmlGenericError(xmlGenericErrorContext
,
12122 "PP: internal error, state == ENTITY_VALUE\n");
12123 ctxt
->instate
= XML_PARSER_CONTENT
;
12125 xmlGenericError(xmlGenericErrorContext
,
12126 "PP: entering DTD\n");
12129 case XML_PARSER_ATTRIBUTE_VALUE
:
12130 xmlGenericError(xmlGenericErrorContext
,
12131 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12132 ctxt
->instate
= XML_PARSER_START_TAG
;
12134 xmlGenericError(xmlGenericErrorContext
,
12135 "PP: entering START_TAG\n");
12138 case XML_PARSER_SYSTEM_LITERAL
:
12139 xmlGenericError(xmlGenericErrorContext
,
12140 "PP: internal error, state == SYSTEM_LITERAL\n");
12141 ctxt
->instate
= XML_PARSER_START_TAG
;
12143 xmlGenericError(xmlGenericErrorContext
,
12144 "PP: entering START_TAG\n");
12147 case XML_PARSER_PUBLIC_LITERAL
:
12148 xmlGenericError(xmlGenericErrorContext
,
12149 "PP: internal error, state == PUBLIC_LITERAL\n");
12150 ctxt
->instate
= XML_PARSER_START_TAG
;
12152 xmlGenericError(xmlGenericErrorContext
,
12153 "PP: entering START_TAG\n");
12160 xmlGenericError(xmlGenericErrorContext
, "PP: done %d\n", ret
);
12167 snprintf(buffer
, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12168 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
12169 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
12170 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
12171 "Input is not proper UTF-8, indicate encoding !\n%s",
12172 BAD_CAST buffer
, NULL
);
12178 * xmlParseCheckTransition:
12179 * @ctxt: an XML parser context
12180 * @chunk: a char array
12181 * @size: the size in byte of the chunk
12183 * Check depending on the current parser state if the chunk given must be
12184 * processed immediately or one need more data to advance on parsing.
12186 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12189 xmlParseCheckTransition(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
) {
12190 if ((ctxt
== NULL
) || (chunk
== NULL
) || (size
< 0))
12192 if (ctxt
->instate
== XML_PARSER_START_TAG
) {
12193 if (memchr(chunk
, '>', size
) != NULL
)
12197 if (ctxt
->progressive
== XML_PARSER_COMMENT
) {
12198 if (memchr(chunk
, '>', size
) != NULL
)
12202 if (ctxt
->instate
== XML_PARSER_CDATA_SECTION
) {
12203 if (memchr(chunk
, '>', size
) != NULL
)
12207 if (ctxt
->progressive
== XML_PARSER_PI
) {
12208 if (memchr(chunk
, '>', size
) != NULL
)
12212 if (ctxt
->instate
== XML_PARSER_END_TAG
) {
12213 if (memchr(chunk
, '>', size
) != NULL
)
12217 if ((ctxt
->progressive
== XML_PARSER_DTD
) ||
12218 (ctxt
->instate
== XML_PARSER_DTD
)) {
12219 if (memchr(chunk
, '>', size
) != NULL
)
12228 * @ctxt: an XML parser context
12229 * @chunk: an char array
12230 * @size: the size in byte of the chunk
12231 * @terminate: last chunk indicator
12233 * Parse a Chunk of memory
12235 * Returns zero if no error, the xmlParserErrors otherwise.
12238 xmlParseChunk(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
,
12242 size_t old_avail
= 0;
12246 return(XML_ERR_INTERNAL_ERROR
);
12247 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12248 return(ctxt
->errNo
);
12249 if (ctxt
->instate
== XML_PARSER_EOF
)
12251 if (ctxt
->instate
== XML_PARSER_START
)
12252 xmlDetectSAX2(ctxt
);
12253 if ((size
> 0) && (chunk
!= NULL
) && (!terminate
) &&
12254 (chunk
[size
- 1] == '\r')) {
12261 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
12262 (ctxt
->input
->buf
!= NULL
) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
12263 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12264 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12267 old_avail
= xmlBufUse(ctxt
->input
->buf
->buffer
);
12269 * Specific handling if we autodetected an encoding, we should not
12270 * push more than the first line ... which depend on the encoding
12271 * And only push the rest once the final encoding was detected
12273 if ((ctxt
->instate
== XML_PARSER_START
) && (ctxt
->input
!= NULL
) &&
12274 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->encoder
!= NULL
)) {
12275 unsigned int len
= 45;
12277 if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12278 BAD_CAST
"UTF-16")) ||
12279 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12280 BAD_CAST
"UTF16")))
12282 else if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12283 BAD_CAST
"UCS-4")) ||
12284 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12288 if (ctxt
->input
->buf
->rawconsumed
< len
)
12289 len
-= ctxt
->input
->buf
->rawconsumed
;
12292 * Change size for reading the initial declaration only
12293 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12294 * will blindly copy extra bytes from memory.
12296 if ((unsigned int) size
> len
) {
12297 remain
= size
- len
;
12303 res
= xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12304 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12306 ctxt
->errNo
= XML_PARSER_EOF
;
12307 xmlHaltParser(ctxt
);
12308 return (XML_PARSER_EOF
);
12311 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12314 } else if (ctxt
->instate
!= XML_PARSER_EOF
) {
12315 if ((ctxt
->input
!= NULL
) && ctxt
->input
->buf
!= NULL
) {
12316 xmlParserInputBufferPtr in
= ctxt
->input
->buf
;
12317 if ((in
->encoder
!= NULL
) && (in
->buffer
!= NULL
) &&
12318 (in
->raw
!= NULL
)) {
12320 size_t base
= xmlBufGetInputBase(in
->buffer
, ctxt
->input
);
12321 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12323 nbchars
= xmlCharEncInput(in
, terminate
);
12324 xmlBufSetInputBaseCur(in
->buffer
, ctxt
->input
, base
, current
);
12327 xmlGenericError(xmlGenericErrorContext
,
12328 "xmlParseChunk: encoder error\n");
12329 xmlHaltParser(ctxt
);
12330 return(XML_ERR_INVALID_ENCODING
);
12336 xmlParseTryOrFinish(ctxt
, 0);
12338 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
))
12339 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
);
12341 * Depending on the current state it may not be such
12342 * a good idea to try parsing if there is nothing in the chunk
12343 * which would be worth doing a parser state transition and we
12344 * need to wait for more data
12346 if ((terminate
) || (avail
> XML_MAX_TEXT_LENGTH
) ||
12347 (old_avail
== 0) || (avail
== 0) ||
12348 (xmlParseCheckTransition(ctxt
,
12349 (const char *)&ctxt
->input
->base
[old_avail
],
12350 avail
- old_avail
)))
12351 xmlParseTryOrFinish(ctxt
, terminate
);
12353 if (ctxt
->instate
== XML_PARSER_EOF
)
12354 return(ctxt
->errNo
);
12356 if ((ctxt
->input
!= NULL
) &&
12357 (((ctxt
->input
->end
- ctxt
->input
->cur
) > XML_MAX_LOOKUP_LIMIT
) ||
12358 ((ctxt
->input
->cur
- ctxt
->input
->base
) > XML_MAX_LOOKUP_LIMIT
)) &&
12359 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
12360 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "Huge input lookup");
12361 xmlHaltParser(ctxt
);
12363 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12364 return(ctxt
->errNo
);
12372 if ((end_in_lf
== 1) && (ctxt
->input
!= NULL
) &&
12373 (ctxt
->input
->buf
!= NULL
)) {
12374 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
12376 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12378 xmlParserInputBufferPush(ctxt
->input
->buf
, 1, "\r");
12380 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
12385 * Check for termination
12389 if (ctxt
->input
!= NULL
) {
12390 if (ctxt
->input
->buf
== NULL
)
12391 cur_avail
= ctxt
->input
->length
-
12392 (ctxt
->input
->cur
- ctxt
->input
->base
);
12394 cur_avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
12395 (ctxt
->input
->cur
- ctxt
->input
->base
);
12398 if ((ctxt
->instate
!= XML_PARSER_EOF
) &&
12399 (ctxt
->instate
!= XML_PARSER_EPILOG
)) {
12400 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12402 if ((ctxt
->instate
== XML_PARSER_EPILOG
) && (cur_avail
> 0)) {
12403 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12405 if (ctxt
->instate
!= XML_PARSER_EOF
) {
12406 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
12407 ctxt
->sax
->endDocument(ctxt
->userData
);
12409 ctxt
->instate
= XML_PARSER_EOF
;
12411 if (ctxt
->wellFormed
== 0)
12412 return((xmlParserErrors
) ctxt
->errNo
);
12417 /************************************************************************
12419 * I/O front end functions to the parser *
12421 ************************************************************************/
12424 * xmlCreatePushParserCtxt:
12425 * @sax: a SAX handler
12426 * @user_data: The user data returned on SAX callbacks
12427 * @chunk: a pointer to an array of chars
12428 * @size: number of chars in the array
12429 * @filename: an optional file name or URI
12431 * Create a parser context for using the XML parser in push mode.
12432 * If @buffer and @size are non-NULL, the data is used to detect
12433 * the encoding. The remaining characters will be parsed so they
12434 * don't need to be fed in again through xmlParseChunk.
12435 * To allow content encoding detection, @size should be >= 4
12436 * The value of @filename is used for fetching external entities
12437 * and error/warning reports.
12439 * Returns the new parser context or NULL
12443 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12444 const char *chunk
, int size
, const char *filename
) {
12445 xmlParserCtxtPtr ctxt
;
12446 xmlParserInputPtr inputStream
;
12447 xmlParserInputBufferPtr buf
;
12448 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
12451 * plug some encoding conversion routines
12453 if ((chunk
!= NULL
) && (size
>= 4))
12454 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
12456 buf
= xmlAllocParserInputBuffer(enc
);
12457 if (buf
== NULL
) return(NULL
);
12459 ctxt
= xmlNewParserCtxt();
12460 if (ctxt
== NULL
) {
12461 xmlErrMemory(NULL
, "creating parser: out of memory\n");
12462 xmlFreeParserInputBuffer(buf
);
12465 ctxt
->dictNames
= 1;
12467 #ifdef LIBXML_SAX1_ENABLED
12468 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
12469 #endif /* LIBXML_SAX1_ENABLED */
12470 xmlFree(ctxt
->sax
);
12471 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
12472 if (ctxt
->sax
== NULL
) {
12473 xmlErrMemory(ctxt
, NULL
);
12474 xmlFreeParserInputBuffer(buf
);
12475 xmlFreeParserCtxt(ctxt
);
12478 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
12479 if (sax
->initialized
== XML_SAX2_MAGIC
)
12480 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
12482 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
12483 if (user_data
!= NULL
)
12484 ctxt
->userData
= user_data
;
12486 if (filename
== NULL
) {
12487 ctxt
->directory
= NULL
;
12489 ctxt
->directory
= xmlParserGetDirectory(filename
);
12492 inputStream
= xmlNewInputStream(ctxt
);
12493 if (inputStream
== NULL
) {
12494 xmlFreeParserCtxt(ctxt
);
12495 xmlFreeParserInputBuffer(buf
);
12499 if (filename
== NULL
)
12500 inputStream
->filename
= NULL
;
12502 inputStream
->filename
= (char *)
12503 xmlCanonicPath((const xmlChar
*) filename
);
12504 if (inputStream
->filename
== NULL
) {
12505 xmlFreeParserCtxt(ctxt
);
12506 xmlFreeParserInputBuffer(buf
);
12510 inputStream
->buf
= buf
;
12511 xmlBufResetInput(inputStream
->buf
->buffer
, inputStream
);
12512 inputPush(ctxt
, inputStream
);
12515 * If the caller didn't provide an initial 'chunk' for determining
12516 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12517 * that it can be automatically determined later
12519 if ((size
== 0) || (chunk
== NULL
)) {
12520 ctxt
->charset
= XML_CHAR_ENCODING_NONE
;
12521 } else if ((ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
12522 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12523 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12525 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12527 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12529 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12533 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12534 xmlSwitchEncoding(ctxt
, enc
);
12539 #endif /* LIBXML_PUSH_ENABLED */
12543 * @ctxt: an XML parser context
12545 * Blocks further parser processing don't override error
12549 xmlHaltParser(xmlParserCtxtPtr ctxt
) {
12552 ctxt
->instate
= XML_PARSER_EOF
;
12553 ctxt
->disableSAX
= 1;
12554 while (ctxt
->inputNr
> 1)
12555 xmlFreeInputStream(inputPop(ctxt
));
12556 if (ctxt
->input
!= NULL
) {
12558 * in case there was a specific allocation deallocate before
12561 if (ctxt
->input
->free
!= NULL
) {
12562 ctxt
->input
->free((xmlChar
*) ctxt
->input
->base
);
12563 ctxt
->input
->free
= NULL
;
12565 if (ctxt
->input
->buf
!= NULL
) {
12566 xmlFreeParserInputBuffer(ctxt
->input
->buf
);
12567 ctxt
->input
->buf
= NULL
;
12569 ctxt
->input
->cur
= BAD_CAST
"";
12570 ctxt
->input
->length
= 0;
12571 ctxt
->input
->base
= ctxt
->input
->cur
;
12572 ctxt
->input
->end
= ctxt
->input
->cur
;
12578 * @ctxt: an XML parser context
12580 * Blocks further parser processing
12583 xmlStopParser(xmlParserCtxtPtr ctxt
) {
12586 xmlHaltParser(ctxt
);
12587 ctxt
->errNo
= XML_ERR_USER_STOP
;
12591 * xmlCreateIOParserCtxt:
12592 * @sax: a SAX handler
12593 * @user_data: The user data returned on SAX callbacks
12594 * @ioread: an I/O read function
12595 * @ioclose: an I/O close function
12596 * @ioctx: an I/O handler
12597 * @enc: the charset encoding if known
12599 * Create a parser context for using the XML parser with an existing
12602 * Returns the new parser context or NULL
12605 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12606 xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
12607 void *ioctx
, xmlCharEncoding enc
) {
12608 xmlParserCtxtPtr ctxt
;
12609 xmlParserInputPtr inputStream
;
12610 xmlParserInputBufferPtr buf
;
12612 if (ioread
== NULL
) return(NULL
);
12614 buf
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
, enc
);
12616 if (ioclose
!= NULL
)
12621 ctxt
= xmlNewParserCtxt();
12622 if (ctxt
== NULL
) {
12623 xmlFreeParserInputBuffer(buf
);
12627 #ifdef LIBXML_SAX1_ENABLED
12628 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
12629 #endif /* LIBXML_SAX1_ENABLED */
12630 xmlFree(ctxt
->sax
);
12631 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
12632 if (ctxt
->sax
== NULL
) {
12633 xmlFreeParserInputBuffer(buf
);
12634 xmlErrMemory(ctxt
, NULL
);
12635 xmlFreeParserCtxt(ctxt
);
12638 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
12639 if (sax
->initialized
== XML_SAX2_MAGIC
)
12640 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
12642 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
12643 if (user_data
!= NULL
)
12644 ctxt
->userData
= user_data
;
12647 inputStream
= xmlNewIOInputStream(ctxt
, buf
, enc
);
12648 if (inputStream
== NULL
) {
12649 xmlFreeParserCtxt(ctxt
);
12652 inputPush(ctxt
, inputStream
);
12657 #ifdef LIBXML_VALID_ENABLED
12658 /************************************************************************
12660 * Front ends when parsing a DTD *
12662 ************************************************************************/
12666 * @sax: the SAX handler block or NULL
12667 * @input: an Input Buffer
12668 * @enc: the charset encoding if known
12670 * Load and parse a DTD
12672 * Returns the resulting xmlDtdPtr or NULL in case of error.
12673 * @input will be freed by the function in any case.
12677 xmlIOParseDTD(xmlSAXHandlerPtr sax
, xmlParserInputBufferPtr input
,
12678 xmlCharEncoding enc
) {
12679 xmlDtdPtr ret
= NULL
;
12680 xmlParserCtxtPtr ctxt
;
12681 xmlParserInputPtr pinput
= NULL
;
12687 ctxt
= xmlNewParserCtxt();
12688 if (ctxt
== NULL
) {
12689 xmlFreeParserInputBuffer(input
);
12693 /* We are loading a DTD */
12694 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12697 * Set-up the SAX context
12700 if (ctxt
->sax
!= NULL
)
12701 xmlFree(ctxt
->sax
);
12703 ctxt
->userData
= ctxt
;
12705 xmlDetectSAX2(ctxt
);
12708 * generate a parser input from the I/O handler
12711 pinput
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
12712 if (pinput
== NULL
) {
12713 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12714 xmlFreeParserInputBuffer(input
);
12715 xmlFreeParserCtxt(ctxt
);
12720 * plug some encoding conversion routines here.
12722 if (xmlPushInput(ctxt
, pinput
) < 0) {
12723 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12724 xmlFreeParserCtxt(ctxt
);
12727 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12728 xmlSwitchEncoding(ctxt
, enc
);
12731 pinput
->filename
= NULL
;
12734 pinput
->base
= ctxt
->input
->cur
;
12735 pinput
->cur
= ctxt
->input
->cur
;
12736 pinput
->free
= NULL
;
12739 * let's parse that entity knowing it's an external subset.
12741 ctxt
->inSubset
= 2;
12742 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12743 if (ctxt
->myDoc
== NULL
) {
12744 xmlErrMemory(ctxt
, "New Doc failed");
12747 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12748 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12749 BAD_CAST
"none", BAD_CAST
"none");
12751 if ((enc
== XML_CHAR_ENCODING_NONE
) &&
12752 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
12754 * Get the 4 first bytes and decode the charset
12755 * if enc != XML_CHAR_ENCODING_NONE
12756 * plug some encoding conversion routines.
12762 enc
= xmlDetectCharEncoding(start
, 4);
12763 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12764 xmlSwitchEncoding(ctxt
, enc
);
12768 xmlParseExternalSubset(ctxt
, BAD_CAST
"none", BAD_CAST
"none");
12770 if (ctxt
->myDoc
!= NULL
) {
12771 if (ctxt
->wellFormed
) {
12772 ret
= ctxt
->myDoc
->extSubset
;
12773 ctxt
->myDoc
->extSubset
= NULL
;
12778 tmp
= ret
->children
;
12779 while (tmp
!= NULL
) {
12787 xmlFreeDoc(ctxt
->myDoc
);
12788 ctxt
->myDoc
= NULL
;
12790 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12791 xmlFreeParserCtxt(ctxt
);
12798 * @sax: the SAX handler block
12799 * @ExternalID: a NAME* containing the External ID of the DTD
12800 * @SystemID: a NAME* containing the URL to the DTD
12802 * Load and parse an external subset.
12804 * Returns the resulting xmlDtdPtr or NULL in case of error.
12808 xmlSAXParseDTD(xmlSAXHandlerPtr sax
, const xmlChar
*ExternalID
,
12809 const xmlChar
*SystemID
) {
12810 xmlDtdPtr ret
= NULL
;
12811 xmlParserCtxtPtr ctxt
;
12812 xmlParserInputPtr input
= NULL
;
12813 xmlCharEncoding enc
;
12814 xmlChar
* systemIdCanonic
;
12816 if ((ExternalID
== NULL
) && (SystemID
== NULL
)) return(NULL
);
12818 ctxt
= xmlNewParserCtxt();
12819 if (ctxt
== NULL
) {
12823 /* We are loading a DTD */
12824 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12827 * Set-up the SAX context
12830 if (ctxt
->sax
!= NULL
)
12831 xmlFree(ctxt
->sax
);
12833 ctxt
->userData
= ctxt
;
12837 * Canonicalise the system ID
12839 systemIdCanonic
= xmlCanonicPath(SystemID
);
12840 if ((SystemID
!= NULL
) && (systemIdCanonic
== NULL
)) {
12841 xmlFreeParserCtxt(ctxt
);
12846 * Ask the Entity resolver to load the damn thing
12849 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->resolveEntity
!= NULL
))
12850 input
= ctxt
->sax
->resolveEntity(ctxt
->userData
, ExternalID
,
12852 if (input
== NULL
) {
12853 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12854 xmlFreeParserCtxt(ctxt
);
12855 if (systemIdCanonic
!= NULL
)
12856 xmlFree(systemIdCanonic
);
12861 * plug some encoding conversion routines here.
12863 if (xmlPushInput(ctxt
, input
) < 0) {
12864 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12865 xmlFreeParserCtxt(ctxt
);
12866 if (systemIdCanonic
!= NULL
)
12867 xmlFree(systemIdCanonic
);
12870 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12871 enc
= xmlDetectCharEncoding(ctxt
->input
->cur
, 4);
12872 xmlSwitchEncoding(ctxt
, enc
);
12875 if (input
->filename
== NULL
)
12876 input
->filename
= (char *) systemIdCanonic
;
12878 xmlFree(systemIdCanonic
);
12881 input
->base
= ctxt
->input
->cur
;
12882 input
->cur
= ctxt
->input
->cur
;
12883 input
->free
= NULL
;
12886 * let's parse that entity knowing it's an external subset.
12888 ctxt
->inSubset
= 2;
12889 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12890 if (ctxt
->myDoc
== NULL
) {
12891 xmlErrMemory(ctxt
, "New Doc failed");
12892 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12893 xmlFreeParserCtxt(ctxt
);
12896 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12897 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12898 ExternalID
, SystemID
);
12899 xmlParseExternalSubset(ctxt
, ExternalID
, SystemID
);
12901 if (ctxt
->myDoc
!= NULL
) {
12902 if (ctxt
->wellFormed
) {
12903 ret
= ctxt
->myDoc
->extSubset
;
12904 ctxt
->myDoc
->extSubset
= NULL
;
12909 tmp
= ret
->children
;
12910 while (tmp
!= NULL
) {
12918 xmlFreeDoc(ctxt
->myDoc
);
12919 ctxt
->myDoc
= NULL
;
12921 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12922 xmlFreeParserCtxt(ctxt
);
12930 * @ExternalID: a NAME* containing the External ID of the DTD
12931 * @SystemID: a NAME* containing the URL to the DTD
12933 * Load and parse an external subset.
12935 * Returns the resulting xmlDtdPtr or NULL in case of error.
12939 xmlParseDTD(const xmlChar
*ExternalID
, const xmlChar
*SystemID
) {
12940 return(xmlSAXParseDTD(NULL
, ExternalID
, SystemID
));
12942 #endif /* LIBXML_VALID_ENABLED */
12944 /************************************************************************
12946 * Front ends when parsing an Entity *
12948 ************************************************************************/
12951 * xmlParseCtxtExternalEntity:
12952 * @ctx: the existing parsing context
12953 * @URL: the URL for the entity to load
12954 * @ID: the System ID for the entity to load
12955 * @lst: the return value for the set of parsed nodes
12957 * Parse an external general entity within an existing parsing context
12958 * An external general parsed entity is well-formed if it matches the
12959 * production labeled extParsedEnt.
12961 * [78] extParsedEnt ::= TextDecl? content
12963 * Returns 0 if the entity is well formed, -1 in case of args problem and
12964 * the parser error code otherwise
12968 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx
, const xmlChar
*URL
,
12969 const xmlChar
*ID
, xmlNodePtr
*lst
) {
12972 if (ctx
== NULL
) return(-1);
12974 * If the user provided their own SAX callbacks, then reuse the
12975 * userData callback field, otherwise the expected setup in a
12976 * DOM builder is to have userData == ctxt
12978 if (ctx
->userData
== ctx
)
12981 userData
= ctx
->userData
;
12982 return xmlParseExternalEntityPrivate(ctx
->myDoc
, ctx
, ctx
->sax
,
12983 userData
, ctx
->depth
+ 1,
12988 * xmlParseExternalEntityPrivate:
12989 * @doc: the document the chunk pertains to
12990 * @oldctxt: the previous parser context if available
12991 * @sax: the SAX handler block (possibly NULL)
12992 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12993 * @depth: Used for loop detection, use 0
12994 * @URL: the URL for the entity to load
12995 * @ID: the System ID for the entity to load
12996 * @list: the return value for the set of parsed nodes
12998 * Private version of xmlParseExternalEntity()
13000 * Returns 0 if the entity is well formed, -1 in case of args problem and
13001 * the parser error code otherwise
13004 static xmlParserErrors
13005 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
13006 xmlSAXHandlerPtr sax
,
13007 void *user_data
, int depth
, const xmlChar
*URL
,
13008 const xmlChar
*ID
, xmlNodePtr
*list
) {
13009 xmlParserCtxtPtr ctxt
;
13011 xmlNodePtr newRoot
;
13012 xmlSAXHandlerPtr oldsax
= NULL
;
13013 xmlParserErrors ret
= XML_ERR_OK
;
13015 xmlCharEncoding enc
;
13017 if (((depth
> 40) &&
13018 ((oldctxt
== NULL
) || (oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
13020 return(XML_ERR_ENTITY_LOOP
);
13025 if ((URL
== NULL
) && (ID
== NULL
))
13026 return(XML_ERR_INTERNAL_ERROR
);
13028 return(XML_ERR_INTERNAL_ERROR
);
13031 ctxt
= xmlCreateEntityParserCtxtInternal(URL
, ID
, NULL
, oldctxt
);
13032 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
13033 ctxt
->userData
= ctxt
;
13035 oldsax
= ctxt
->sax
;
13037 if (user_data
!= NULL
)
13038 ctxt
->userData
= user_data
;
13040 xmlDetectSAX2(ctxt
);
13041 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13042 if (newDoc
== NULL
) {
13043 xmlFreeParserCtxt(ctxt
);
13044 return(XML_ERR_INTERNAL_ERROR
);
13046 newDoc
->properties
= XML_DOC_INTERNAL
;
13048 newDoc
->intSubset
= doc
->intSubset
;
13049 newDoc
->extSubset
= doc
->extSubset
;
13051 newDoc
->dict
= doc
->dict
;
13052 xmlDictReference(newDoc
->dict
);
13054 if (doc
->URL
!= NULL
) {
13055 newDoc
->URL
= xmlStrdup(doc
->URL
);
13058 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13059 if (newRoot
== NULL
) {
13061 ctxt
->sax
= oldsax
;
13062 xmlFreeParserCtxt(ctxt
);
13063 newDoc
->intSubset
= NULL
;
13064 newDoc
->extSubset
= NULL
;
13065 xmlFreeDoc(newDoc
);
13066 return(XML_ERR_INTERNAL_ERROR
);
13068 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13069 nodePush(ctxt
, newDoc
->children
);
13071 ctxt
->myDoc
= newDoc
;
13074 newRoot
->doc
= doc
;
13078 * Get the 4 first bytes and decode the charset
13079 * if enc != XML_CHAR_ENCODING_NONE
13080 * plug some encoding conversion routines.
13083 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
13088 enc
= xmlDetectCharEncoding(start
, 4);
13089 if (enc
!= XML_CHAR_ENCODING_NONE
) {
13090 xmlSwitchEncoding(ctxt
, enc
);
13095 * Parse a possible text declaration first
13097 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13098 xmlParseTextDecl(ctxt
);
13100 * An XML-1.0 document can't reference an entity not XML-1.0
13102 if ((xmlStrEqual(oldctxt
->version
, BAD_CAST
"1.0")) &&
13103 (!xmlStrEqual(ctxt
->input
->version
, BAD_CAST
"1.0"))) {
13104 xmlFatalErrMsg(ctxt
, XML_ERR_VERSION_MISMATCH
,
13105 "Version mismatch between document and entity\n");
13109 ctxt
->instate
= XML_PARSER_CONTENT
;
13110 ctxt
->depth
= depth
;
13111 if (oldctxt
!= NULL
) {
13112 ctxt
->_private
= oldctxt
->_private
;
13113 ctxt
->loadsubset
= oldctxt
->loadsubset
;
13114 ctxt
->validate
= oldctxt
->validate
;
13115 ctxt
->valid
= oldctxt
->valid
;
13116 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
13117 if (oldctxt
->validate
) {
13118 ctxt
->vctxt
.error
= oldctxt
->vctxt
.error
;
13119 ctxt
->vctxt
.warning
= oldctxt
->vctxt
.warning
;
13120 ctxt
->vctxt
.userData
= oldctxt
->vctxt
.userData
;
13122 ctxt
->external
= oldctxt
->external
;
13123 if (ctxt
->dict
) xmlDictFree(ctxt
->dict
);
13124 ctxt
->dict
= oldctxt
->dict
;
13125 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13126 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13127 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13128 ctxt
->dictNames
= oldctxt
->dictNames
;
13129 ctxt
->attsDefault
= oldctxt
->attsDefault
;
13130 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
13131 ctxt
->linenumbers
= oldctxt
->linenumbers
;
13132 ctxt
->record_info
= oldctxt
->record_info
;
13133 ctxt
->node_seq
.maximum
= oldctxt
->node_seq
.maximum
;
13134 ctxt
->node_seq
.length
= oldctxt
->node_seq
.length
;
13135 ctxt
->node_seq
.buffer
= oldctxt
->node_seq
.buffer
;
13138 * Doing validity checking on chunk without context
13139 * doesn't make sense
13141 ctxt
->_private
= NULL
;
13142 ctxt
->validate
= 0;
13143 ctxt
->external
= 2;
13144 ctxt
->loadsubset
= 0;
13147 xmlParseContent(ctxt
);
13149 if ((RAW
== '<') && (NXT(1) == '/')) {
13150 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13151 } else if (RAW
!= 0) {
13152 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13154 if (ctxt
->node
!= newDoc
->children
) {
13155 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13158 if (!ctxt
->wellFormed
) {
13159 if (ctxt
->errNo
== 0)
13160 ret
= XML_ERR_INTERNAL_ERROR
;
13162 ret
= (xmlParserErrors
)ctxt
->errNo
;
13164 if (list
!= NULL
) {
13168 * Return the newly created nodeset after unlinking it from
13169 * they pseudo parent.
13171 cur
= newDoc
->children
->children
;
13173 while (cur
!= NULL
) {
13174 cur
->parent
= NULL
;
13177 newDoc
->children
->children
= NULL
;
13183 * Record in the parent context the number of entities replacement
13184 * done when parsing that reference.
13186 if (oldctxt
!= NULL
)
13187 oldctxt
->nbentities
+= ctxt
->nbentities
;
13190 * Also record the size of the entity parsed
13192 if (ctxt
->input
!= NULL
&& oldctxt
!= NULL
) {
13193 oldctxt
->sizeentities
+= ctxt
->input
->consumed
;
13194 oldctxt
->sizeentities
+= (ctxt
->input
->cur
- ctxt
->input
->base
);
13197 * And record the last error if any
13199 if ((oldctxt
!= NULL
) && (ctxt
->lastError
.code
!= XML_ERR_OK
))
13200 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
13203 ctxt
->sax
= oldsax
;
13204 if (oldctxt
!= NULL
) {
13206 ctxt
->attsDefault
= NULL
;
13207 ctxt
->attsSpecial
= NULL
;
13208 oldctxt
->validate
= ctxt
->validate
;
13209 oldctxt
->valid
= ctxt
->valid
;
13210 oldctxt
->node_seq
.maximum
= ctxt
->node_seq
.maximum
;
13211 oldctxt
->node_seq
.length
= ctxt
->node_seq
.length
;
13212 oldctxt
->node_seq
.buffer
= ctxt
->node_seq
.buffer
;
13214 ctxt
->node_seq
.maximum
= 0;
13215 ctxt
->node_seq
.length
= 0;
13216 ctxt
->node_seq
.buffer
= NULL
;
13217 xmlFreeParserCtxt(ctxt
);
13218 newDoc
->intSubset
= NULL
;
13219 newDoc
->extSubset
= NULL
;
13220 xmlFreeDoc(newDoc
);
13225 #ifdef LIBXML_SAX1_ENABLED
13227 * xmlParseExternalEntity:
13228 * @doc: the document the chunk pertains to
13229 * @sax: the SAX handler block (possibly NULL)
13230 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13231 * @depth: Used for loop detection, use 0
13232 * @URL: the URL for the entity to load
13233 * @ID: the System ID for the entity to load
13234 * @lst: the return value for the set of parsed nodes
13236 * Parse an external general entity
13237 * An external general parsed entity is well-formed if it matches the
13238 * production labeled extParsedEnt.
13240 * [78] extParsedEnt ::= TextDecl? content
13242 * Returns 0 if the entity is well formed, -1 in case of args problem and
13243 * the parser error code otherwise
13247 xmlParseExternalEntity(xmlDocPtr doc
, xmlSAXHandlerPtr sax
, void *user_data
,
13248 int depth
, const xmlChar
*URL
, const xmlChar
*ID
, xmlNodePtr
*lst
) {
13249 return(xmlParseExternalEntityPrivate(doc
, NULL
, sax
, user_data
, depth
, URL
,
13254 * xmlParseBalancedChunkMemory:
13255 * @doc: the document the chunk pertains to (must not be NULL)
13256 * @sax: the SAX handler block (possibly NULL)
13257 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13258 * @depth: Used for loop detection, use 0
13259 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13260 * @lst: the return value for the set of parsed nodes
13262 * Parse a well-balanced chunk of an XML document
13263 * called by the parser
13264 * The allowed sequence for the Well Balanced Chunk is the one defined by
13265 * the content production in the XML grammar:
13267 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13269 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13270 * the parser error code otherwise
13274 xmlParseBalancedChunkMemory(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13275 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
) {
13276 return xmlParseBalancedChunkMemoryRecover( doc
, sax
, user_data
,
13277 depth
, string
, lst
, 0 );
13279 #endif /* LIBXML_SAX1_ENABLED */
13282 * xmlParseBalancedChunkMemoryInternal:
13283 * @oldctxt: the existing parsing context
13284 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13285 * @user_data: the user data field for the parser context
13286 * @lst: the return value for the set of parsed nodes
13289 * Parse a well-balanced chunk of an XML document
13290 * called by the parser
13291 * The allowed sequence for the Well Balanced Chunk is the one defined by
13292 * the content production in the XML grammar:
13294 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13296 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13297 * error code otherwise
13299 * In case recover is set to 1, the nodelist will not be empty even if
13300 * the parsed chunk is not well balanced.
13302 static xmlParserErrors
13303 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
13304 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
) {
13305 xmlParserCtxtPtr ctxt
;
13306 xmlDocPtr newDoc
= NULL
;
13307 xmlNodePtr newRoot
;
13308 xmlSAXHandlerPtr oldsax
= NULL
;
13309 xmlNodePtr content
= NULL
;
13310 xmlNodePtr last
= NULL
;
13312 xmlParserErrors ret
= XML_ERR_OK
;
13317 if (((oldctxt
->depth
> 40) && ((oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
13318 (oldctxt
->depth
> 1024)) {
13319 return(XML_ERR_ENTITY_LOOP
);
13325 if (string
== NULL
)
13326 return(XML_ERR_INTERNAL_ERROR
);
13328 size
= xmlStrlen(string
);
13330 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13331 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
13332 if (user_data
!= NULL
)
13333 ctxt
->userData
= user_data
;
13335 ctxt
->userData
= ctxt
;
13336 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
13337 ctxt
->dict
= oldctxt
->dict
;
13338 ctxt
->input_id
= oldctxt
->input_id
+ 1;
13339 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13340 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13341 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13344 /* propagate namespaces down the entity */
13345 for (i
= 0;i
< oldctxt
->nsNr
;i
+= 2) {
13346 nsPush(ctxt
, oldctxt
->nsTab
[i
], oldctxt
->nsTab
[i
+1]);
13350 oldsax
= ctxt
->sax
;
13351 ctxt
->sax
= oldctxt
->sax
;
13352 xmlDetectSAX2(ctxt
);
13353 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
13354 ctxt
->options
= oldctxt
->options
;
13356 ctxt
->_private
= oldctxt
->_private
;
13357 if (oldctxt
->myDoc
== NULL
) {
13358 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13359 if (newDoc
== NULL
) {
13360 ctxt
->sax
= oldsax
;
13362 xmlFreeParserCtxt(ctxt
);
13363 return(XML_ERR_INTERNAL_ERROR
);
13365 newDoc
->properties
= XML_DOC_INTERNAL
;
13366 newDoc
->dict
= ctxt
->dict
;
13367 xmlDictReference(newDoc
->dict
);
13368 ctxt
->myDoc
= newDoc
;
13370 ctxt
->myDoc
= oldctxt
->myDoc
;
13371 content
= ctxt
->myDoc
->children
;
13372 last
= ctxt
->myDoc
->last
;
13374 newRoot
= xmlNewDocNode(ctxt
->myDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13375 if (newRoot
== NULL
) {
13376 ctxt
->sax
= oldsax
;
13378 xmlFreeParserCtxt(ctxt
);
13379 if (newDoc
!= NULL
) {
13380 xmlFreeDoc(newDoc
);
13382 return(XML_ERR_INTERNAL_ERROR
);
13384 ctxt
->myDoc
->children
= NULL
;
13385 ctxt
->myDoc
->last
= NULL
;
13386 xmlAddChild((xmlNodePtr
) ctxt
->myDoc
, newRoot
);
13387 nodePush(ctxt
, ctxt
->myDoc
->children
);
13388 ctxt
->instate
= XML_PARSER_CONTENT
;
13389 ctxt
->depth
= oldctxt
->depth
+ 1;
13391 ctxt
->validate
= 0;
13392 ctxt
->loadsubset
= oldctxt
->loadsubset
;
13393 if ((oldctxt
->validate
) || (oldctxt
->replaceEntities
!= 0)) {
13395 * ID/IDREF registration will be done in xmlValidateElement below
13397 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13399 ctxt
->dictNames
= oldctxt
->dictNames
;
13400 ctxt
->attsDefault
= oldctxt
->attsDefault
;
13401 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
13403 xmlParseContent(ctxt
);
13404 if ((RAW
== '<') && (NXT(1) == '/')) {
13405 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13406 } else if (RAW
!= 0) {
13407 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13409 if (ctxt
->node
!= ctxt
->myDoc
->children
) {
13410 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13413 if (!ctxt
->wellFormed
) {
13414 if (ctxt
->errNo
== 0)
13415 ret
= XML_ERR_INTERNAL_ERROR
;
13417 ret
= (xmlParserErrors
)ctxt
->errNo
;
13422 if ((lst
!= NULL
) && (ret
== XML_ERR_OK
)) {
13426 * Return the newly created nodeset after unlinking it from
13427 * they pseudo parent.
13429 cur
= ctxt
->myDoc
->children
->children
;
13431 while (cur
!= NULL
) {
13432 #ifdef LIBXML_VALID_ENABLED
13433 if ((oldctxt
->validate
) && (oldctxt
->wellFormed
) &&
13434 (oldctxt
->myDoc
) && (oldctxt
->myDoc
->intSubset
) &&
13435 (cur
->type
== XML_ELEMENT_NODE
)) {
13436 oldctxt
->valid
&= xmlValidateElement(&oldctxt
->vctxt
,
13437 oldctxt
->myDoc
, cur
);
13439 #endif /* LIBXML_VALID_ENABLED */
13440 cur
->parent
= NULL
;
13443 ctxt
->myDoc
->children
->children
= NULL
;
13445 if (ctxt
->myDoc
!= NULL
) {
13446 xmlFreeNode(ctxt
->myDoc
->children
);
13447 ctxt
->myDoc
->children
= content
;
13448 ctxt
->myDoc
->last
= last
;
13452 * Record in the parent context the number of entities replacement
13453 * done when parsing that reference.
13455 if (oldctxt
!= NULL
)
13456 oldctxt
->nbentities
+= ctxt
->nbentities
;
13459 * Also record the last error if any
13461 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
13462 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
13464 ctxt
->sax
= oldsax
;
13466 ctxt
->attsDefault
= NULL
;
13467 ctxt
->attsSpecial
= NULL
;
13468 xmlFreeParserCtxt(ctxt
);
13469 if (newDoc
!= NULL
) {
13470 xmlFreeDoc(newDoc
);
13477 * xmlParseInNodeContext:
13478 * @node: the context node
13479 * @data: the input string
13480 * @datalen: the input string length in bytes
13481 * @options: a combination of xmlParserOption
13482 * @lst: the return value for the set of parsed nodes
13484 * Parse a well-balanced chunk of an XML document
13485 * within the context (DTD, namespaces, etc ...) of the given node.
13487 * The allowed sequence for the data is a Well Balanced Chunk defined by
13488 * the content production in the XML grammar:
13490 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13492 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13493 * error code otherwise
13496 xmlParseInNodeContext(xmlNodePtr node
, const char *data
, int datalen
,
13497 int options
, xmlNodePtr
*lst
) {
13499 xmlParserCtxtPtr ctxt
;
13500 xmlDocPtr doc
= NULL
;
13501 xmlNodePtr fake
, cur
;
13504 xmlParserErrors ret
= XML_ERR_OK
;
13507 * check all input parameters, grab the document
13509 if ((lst
== NULL
) || (node
== NULL
) || (data
== NULL
) || (datalen
< 0))
13510 return(XML_ERR_INTERNAL_ERROR
);
13511 switch (node
->type
) {
13512 case XML_ELEMENT_NODE
:
13513 case XML_ATTRIBUTE_NODE
:
13514 case XML_TEXT_NODE
:
13515 case XML_CDATA_SECTION_NODE
:
13516 case XML_ENTITY_REF_NODE
:
13518 case XML_COMMENT_NODE
:
13519 case XML_DOCUMENT_NODE
:
13520 case XML_HTML_DOCUMENT_NODE
:
13523 return(XML_ERR_INTERNAL_ERROR
);
13526 while ((node
!= NULL
) && (node
->type
!= XML_ELEMENT_NODE
) &&
13527 (node
->type
!= XML_DOCUMENT_NODE
) &&
13528 (node
->type
!= XML_HTML_DOCUMENT_NODE
))
13529 node
= node
->parent
;
13531 return(XML_ERR_INTERNAL_ERROR
);
13532 if (node
->type
== XML_ELEMENT_NODE
)
13535 doc
= (xmlDocPtr
) node
;
13537 return(XML_ERR_INTERNAL_ERROR
);
13540 * allocate a context and set-up everything not related to the
13541 * node position in the tree
13543 if (doc
->type
== XML_DOCUMENT_NODE
)
13544 ctxt
= xmlCreateMemoryParserCtxt((char *) data
, datalen
);
13545 #ifdef LIBXML_HTML_ENABLED
13546 else if (doc
->type
== XML_HTML_DOCUMENT_NODE
) {
13547 ctxt
= htmlCreateMemoryParserCtxt((char *) data
, datalen
);
13549 * When parsing in context, it makes no sense to add implied
13550 * elements like html/body/etc...
13552 options
|= HTML_PARSE_NOIMPLIED
;
13556 return(XML_ERR_INTERNAL_ERROR
);
13559 return(XML_ERR_NO_MEMORY
);
13562 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13563 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13564 * we must wait until the last moment to free the original one.
13566 if (doc
->dict
!= NULL
) {
13567 if (ctxt
->dict
!= NULL
)
13568 xmlDictFree(ctxt
->dict
);
13569 ctxt
->dict
= doc
->dict
;
13571 options
|= XML_PARSE_NODICT
;
13573 if (doc
->encoding
!= NULL
) {
13574 xmlCharEncodingHandlerPtr hdlr
;
13576 if (ctxt
->encoding
!= NULL
)
13577 xmlFree((xmlChar
*) ctxt
->encoding
);
13578 ctxt
->encoding
= xmlStrdup((const xmlChar
*) doc
->encoding
);
13580 hdlr
= xmlFindCharEncodingHandler((const char *) doc
->encoding
);
13581 if (hdlr
!= NULL
) {
13582 xmlSwitchToEncoding(ctxt
, hdlr
);
13584 return(XML_ERR_UNSUPPORTED_ENCODING
);
13588 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13589 xmlDetectSAX2(ctxt
);
13591 /* parsing in context, i.e. as within existing content */
13592 ctxt
->input_id
= 2;
13593 ctxt
->instate
= XML_PARSER_CONTENT
;
13595 fake
= xmlNewDocComment(node
->doc
, NULL
);
13596 if (fake
== NULL
) {
13597 xmlFreeParserCtxt(ctxt
);
13598 return(XML_ERR_NO_MEMORY
);
13600 xmlAddChild(node
, fake
);
13602 if (node
->type
== XML_ELEMENT_NODE
) {
13603 nodePush(ctxt
, node
);
13605 * initialize the SAX2 namespaces stack
13608 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_NODE
)) {
13609 xmlNsPtr ns
= cur
->nsDef
;
13610 const xmlChar
*iprefix
, *ihref
;
13612 while (ns
!= NULL
) {
13614 iprefix
= xmlDictLookup(ctxt
->dict
, ns
->prefix
, -1);
13615 ihref
= xmlDictLookup(ctxt
->dict
, ns
->href
, -1);
13617 iprefix
= ns
->prefix
;
13621 if (xmlGetNamespace(ctxt
, iprefix
) == NULL
) {
13622 nsPush(ctxt
, iprefix
, ihref
);
13631 if ((ctxt
->validate
) || (ctxt
->replaceEntities
!= 0)) {
13633 * ID/IDREF registration will be done in xmlValidateElement below
13635 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13638 #ifdef LIBXML_HTML_ENABLED
13639 if (doc
->type
== XML_HTML_DOCUMENT_NODE
)
13640 __htmlParseContent(ctxt
);
13643 xmlParseContent(ctxt
);
13646 if ((RAW
== '<') && (NXT(1) == '/')) {
13647 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13648 } else if (RAW
!= 0) {
13649 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13651 if ((ctxt
->node
!= NULL
) && (ctxt
->node
!= node
)) {
13652 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13653 ctxt
->wellFormed
= 0;
13656 if (!ctxt
->wellFormed
) {
13657 if (ctxt
->errNo
== 0)
13658 ret
= XML_ERR_INTERNAL_ERROR
;
13660 ret
= (xmlParserErrors
)ctxt
->errNo
;
13666 * Return the newly created nodeset after unlinking it from
13667 * the pseudo sibling.
13680 while (cur
!= NULL
) {
13681 cur
->parent
= NULL
;
13685 xmlUnlinkNode(fake
);
13689 if (ret
!= XML_ERR_OK
) {
13690 xmlFreeNodeList(*lst
);
13694 if (doc
->dict
!= NULL
)
13696 xmlFreeParserCtxt(ctxt
);
13700 return(XML_ERR_INTERNAL_ERROR
);
13704 #ifdef LIBXML_SAX1_ENABLED
13706 * xmlParseBalancedChunkMemoryRecover:
13707 * @doc: the document the chunk pertains to (must not be NULL)
13708 * @sax: the SAX handler block (possibly NULL)
13709 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13710 * @depth: Used for loop detection, use 0
13711 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13712 * @lst: the return value for the set of parsed nodes
13713 * @recover: return nodes even if the data is broken (use 0)
13716 * Parse a well-balanced chunk of an XML document
13717 * called by the parser
13718 * The allowed sequence for the Well Balanced Chunk is the one defined by
13719 * the content production in the XML grammar:
13721 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13723 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13724 * the parser error code otherwise
13726 * In case recover is set to 1, the nodelist will not be empty even if
13727 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13731 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13732 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
,
13734 xmlParserCtxtPtr ctxt
;
13736 xmlSAXHandlerPtr oldsax
= NULL
;
13737 xmlNodePtr content
, newRoot
;
13742 return(XML_ERR_ENTITY_LOOP
);
13748 if (string
== NULL
)
13751 size
= xmlStrlen(string
);
13753 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13754 if (ctxt
== NULL
) return(-1);
13755 ctxt
->userData
= ctxt
;
13757 oldsax
= ctxt
->sax
;
13759 if (user_data
!= NULL
)
13760 ctxt
->userData
= user_data
;
13762 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13763 if (newDoc
== NULL
) {
13764 xmlFreeParserCtxt(ctxt
);
13767 newDoc
->properties
= XML_DOC_INTERNAL
;
13768 if ((doc
!= NULL
) && (doc
->dict
!= NULL
)) {
13769 xmlDictFree(ctxt
->dict
);
13770 ctxt
->dict
= doc
->dict
;
13771 xmlDictReference(ctxt
->dict
);
13772 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13773 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13774 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13775 ctxt
->dictNames
= 1;
13777 xmlCtxtUseOptionsInternal(ctxt
, XML_PARSE_NODICT
, NULL
);
13779 /* doc == NULL is only supported for historic reasons */
13781 newDoc
->intSubset
= doc
->intSubset
;
13782 newDoc
->extSubset
= doc
->extSubset
;
13784 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13785 if (newRoot
== NULL
) {
13787 ctxt
->sax
= oldsax
;
13788 xmlFreeParserCtxt(ctxt
);
13789 newDoc
->intSubset
= NULL
;
13790 newDoc
->extSubset
= NULL
;
13791 xmlFreeDoc(newDoc
);
13794 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13795 nodePush(ctxt
, newRoot
);
13796 /* doc == NULL is only supported for historic reasons */
13798 ctxt
->myDoc
= newDoc
;
13800 ctxt
->myDoc
= newDoc
;
13801 newDoc
->children
->doc
= doc
;
13802 /* Ensure that doc has XML spec namespace */
13803 xmlSearchNsByHref(doc
, (xmlNodePtr
)doc
, XML_XML_NAMESPACE
);
13804 newDoc
->oldNs
= doc
->oldNs
;
13806 ctxt
->instate
= XML_PARSER_CONTENT
;
13807 ctxt
->input_id
= 2;
13808 ctxt
->depth
= depth
;
13811 * Doing validity checking on chunk doesn't make sense
13813 ctxt
->validate
= 0;
13814 ctxt
->loadsubset
= 0;
13815 xmlDetectSAX2(ctxt
);
13817 if ( doc
!= NULL
){
13818 content
= doc
->children
;
13819 doc
->children
= NULL
;
13820 xmlParseContent(ctxt
);
13821 doc
->children
= content
;
13824 xmlParseContent(ctxt
);
13826 if ((RAW
== '<') && (NXT(1) == '/')) {
13827 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13828 } else if (RAW
!= 0) {
13829 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13831 if (ctxt
->node
!= newDoc
->children
) {
13832 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13835 if (!ctxt
->wellFormed
) {
13836 if (ctxt
->errNo
== 0)
13844 if ((lst
!= NULL
) && ((ret
== 0) || (recover
== 1))) {
13848 * Return the newly created nodeset after unlinking it from
13849 * they pseudo parent.
13851 cur
= newDoc
->children
->children
;
13853 while (cur
!= NULL
) {
13854 xmlSetTreeDoc(cur
, doc
);
13855 cur
->parent
= NULL
;
13858 newDoc
->children
->children
= NULL
;
13862 ctxt
->sax
= oldsax
;
13863 xmlFreeParserCtxt(ctxt
);
13864 newDoc
->intSubset
= NULL
;
13865 newDoc
->extSubset
= NULL
;
13866 /* This leaks the namespace list if doc == NULL */
13867 newDoc
->oldNs
= NULL
;
13868 xmlFreeDoc(newDoc
);
13874 * xmlSAXParseEntity:
13875 * @sax: the SAX handler block
13876 * @filename: the filename
13878 * parse an XML external entity out of context and build a tree.
13879 * It use the given SAX function block to handle the parsing callback.
13880 * If sax is NULL, fallback to the default DOM tree building routines.
13882 * [78] extParsedEnt ::= TextDecl? content
13884 * This correspond to a "Well Balanced" chunk
13886 * Returns the resulting document tree
13890 xmlSAXParseEntity(xmlSAXHandlerPtr sax
, const char *filename
) {
13892 xmlParserCtxtPtr ctxt
;
13894 ctxt
= xmlCreateFileParserCtxt(filename
);
13895 if (ctxt
== NULL
) {
13899 if (ctxt
->sax
!= NULL
)
13900 xmlFree(ctxt
->sax
);
13902 ctxt
->userData
= NULL
;
13905 xmlParseExtParsedEnt(ctxt
);
13907 if (ctxt
->wellFormed
)
13911 xmlFreeDoc(ctxt
->myDoc
);
13912 ctxt
->myDoc
= NULL
;
13916 xmlFreeParserCtxt(ctxt
);
13923 * @filename: the filename
13925 * parse an XML external entity out of context and build a tree.
13927 * [78] extParsedEnt ::= TextDecl? content
13929 * This correspond to a "Well Balanced" chunk
13931 * Returns the resulting document tree
13935 xmlParseEntity(const char *filename
) {
13936 return(xmlSAXParseEntity(NULL
, filename
));
13938 #endif /* LIBXML_SAX1_ENABLED */
13941 * xmlCreateEntityParserCtxtInternal:
13942 * @URL: the entity URL
13943 * @ID: the entity PUBLIC ID
13944 * @base: a possible base for the target URI
13945 * @pctx: parser context used to set options on new context
13947 * Create a parser context for an external entity
13948 * Automatic support for ZLIB/Compress compressed document is provided
13949 * by default if found at compile-time.
13951 * Returns the new parser context or NULL
13953 static xmlParserCtxtPtr
13954 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
13955 const xmlChar
*base
, xmlParserCtxtPtr pctx
) {
13956 xmlParserCtxtPtr ctxt
;
13957 xmlParserInputPtr inputStream
;
13958 char *directory
= NULL
;
13961 ctxt
= xmlNewParserCtxt();
13962 if (ctxt
== NULL
) {
13966 if (pctx
!= NULL
) {
13967 ctxt
->options
= pctx
->options
;
13968 ctxt
->_private
= pctx
->_private
;
13970 * this is a subparser of pctx, so the input_id should be
13971 * incremented to distinguish from main entity
13973 ctxt
->input_id
= pctx
->input_id
+ 1;
13976 /* Don't read from stdin. */
13977 if (xmlStrcmp(URL
, BAD_CAST
"-") == 0)
13978 URL
= BAD_CAST
"./-";
13980 uri
= xmlBuildURI(URL
, base
);
13983 inputStream
= xmlLoadExternalEntity((char *)URL
, (char *)ID
, ctxt
);
13984 if (inputStream
== NULL
) {
13985 xmlFreeParserCtxt(ctxt
);
13989 inputPush(ctxt
, inputStream
);
13991 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13992 directory
= xmlParserGetDirectory((char *)URL
);
13993 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13994 ctxt
->directory
= directory
;
13996 inputStream
= xmlLoadExternalEntity((char *)uri
, (char *)ID
, ctxt
);
13997 if (inputStream
== NULL
) {
13999 xmlFreeParserCtxt(ctxt
);
14003 inputPush(ctxt
, inputStream
);
14005 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
14006 directory
= xmlParserGetDirectory((char *)uri
);
14007 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
14008 ctxt
->directory
= directory
;
14015 * xmlCreateEntityParserCtxt:
14016 * @URL: the entity URL
14017 * @ID: the entity PUBLIC ID
14018 * @base: a possible base for the target URI
14020 * Create a parser context for an external entity
14021 * Automatic support for ZLIB/Compress compressed document is provided
14022 * by default if found at compile-time.
14024 * Returns the new parser context or NULL
14027 xmlCreateEntityParserCtxt(const xmlChar
*URL
, const xmlChar
*ID
,
14028 const xmlChar
*base
) {
14029 return xmlCreateEntityParserCtxtInternal(URL
, ID
, base
, NULL
);
14033 /************************************************************************
14035 * Front ends when parsing from a file *
14037 ************************************************************************/
14040 * xmlCreateURLParserCtxt:
14041 * @filename: the filename or URL
14042 * @options: a combination of xmlParserOption
14044 * Create a parser context for a file or URL content.
14045 * Automatic support for ZLIB/Compress compressed document is provided
14046 * by default if found at compile-time and for file accesses
14048 * Returns the new parser context or NULL
14051 xmlCreateURLParserCtxt(const char *filename
, int options
)
14053 xmlParserCtxtPtr ctxt
;
14054 xmlParserInputPtr inputStream
;
14055 char *directory
= NULL
;
14057 ctxt
= xmlNewParserCtxt();
14058 if (ctxt
== NULL
) {
14059 xmlErrMemory(NULL
, "cannot allocate parser context");
14064 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
14065 ctxt
->linenumbers
= 1;
14067 inputStream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
14068 if (inputStream
== NULL
) {
14069 xmlFreeParserCtxt(ctxt
);
14073 inputPush(ctxt
, inputStream
);
14074 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
14075 directory
= xmlParserGetDirectory(filename
);
14076 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
14077 ctxt
->directory
= directory
;
14083 * xmlCreateFileParserCtxt:
14084 * @filename: the filename
14086 * Create a parser context for a file content.
14087 * Automatic support for ZLIB/Compress compressed document is provided
14088 * by default if found at compile-time.
14090 * Returns the new parser context or NULL
14093 xmlCreateFileParserCtxt(const char *filename
)
14095 return(xmlCreateURLParserCtxt(filename
, 0));
14098 #ifdef LIBXML_SAX1_ENABLED
14100 * xmlSAXParseFileWithData:
14101 * @sax: the SAX handler block
14102 * @filename: the filename
14103 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14105 * @data: the userdata
14107 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14108 * compressed document is provided by default if found at compile-time.
14109 * It use the given SAX function block to handle the parsing callback.
14110 * If sax is NULL, fallback to the default DOM tree building routines.
14112 * User data (void *) is stored within the parser context in the
14113 * context's _private member, so it is available nearly everywhere in libxml
14115 * Returns the resulting document tree
14119 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax
, const char *filename
,
14120 int recovery
, void *data
) {
14122 xmlParserCtxtPtr ctxt
;
14126 ctxt
= xmlCreateFileParserCtxt(filename
);
14127 if (ctxt
== NULL
) {
14131 if (ctxt
->sax
!= NULL
)
14132 xmlFree(ctxt
->sax
);
14135 xmlDetectSAX2(ctxt
);
14137 ctxt
->_private
= data
;
14140 if (ctxt
->directory
== NULL
)
14141 ctxt
->directory
= xmlParserGetDirectory(filename
);
14143 ctxt
->recovery
= recovery
;
14145 xmlParseDocument(ctxt
);
14147 if ((ctxt
->wellFormed
) || recovery
) {
14149 if ((ret
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
14150 if (ctxt
->input
->buf
->compressed
> 0)
14151 ret
->compression
= 9;
14153 ret
->compression
= ctxt
->input
->buf
->compressed
;
14158 xmlFreeDoc(ctxt
->myDoc
);
14159 ctxt
->myDoc
= NULL
;
14163 xmlFreeParserCtxt(ctxt
);
14170 * @sax: the SAX handler block
14171 * @filename: the filename
14172 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14175 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14176 * compressed document is provided by default if found at compile-time.
14177 * It use the given SAX function block to handle the parsing callback.
14178 * If sax is NULL, fallback to the default DOM tree building routines.
14180 * Returns the resulting document tree
14184 xmlSAXParseFile(xmlSAXHandlerPtr sax
, const char *filename
,
14186 return(xmlSAXParseFileWithData(sax
,filename
,recovery
,NULL
));
14191 * @cur: a pointer to an array of xmlChar
14193 * parse an XML in-memory document and build a tree.
14194 * In the case the document is not Well Formed, a attempt to build a
14195 * tree is tried anyway
14197 * Returns the resulting document tree or NULL in case of failure
14201 xmlRecoverDoc(const xmlChar
*cur
) {
14202 return(xmlSAXParseDoc(NULL
, cur
, 1));
14207 * @filename: the filename
14209 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14210 * compressed document is provided by default if found at compile-time.
14212 * Returns the resulting document tree if the file was wellformed,
14217 xmlParseFile(const char *filename
) {
14218 return(xmlSAXParseFile(NULL
, filename
, 0));
14223 * @filename: the filename
14225 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14226 * compressed document is provided by default if found at compile-time.
14227 * In the case the document is not Well Formed, it attempts to build
14230 * Returns the resulting document tree or NULL in case of failure
14234 xmlRecoverFile(const char *filename
) {
14235 return(xmlSAXParseFile(NULL
, filename
, 1));
14240 * xmlSetupParserForBuffer:
14241 * @ctxt: an XML parser context
14242 * @buffer: a xmlChar * buffer
14243 * @filename: a file name
14245 * Setup the parser context to parse a new buffer; Clears any prior
14246 * contents from the parser context. The buffer parameter must not be
14247 * NULL, but the filename parameter can be
14250 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt
, const xmlChar
* buffer
,
14251 const char* filename
)
14253 xmlParserInputPtr input
;
14255 if ((ctxt
== NULL
) || (buffer
== NULL
))
14258 input
= xmlNewInputStream(ctxt
);
14259 if (input
== NULL
) {
14260 xmlErrMemory(NULL
, "parsing new buffer: out of memory\n");
14261 xmlClearParserCtxt(ctxt
);
14265 xmlClearParserCtxt(ctxt
);
14266 if (filename
!= NULL
)
14267 input
->filename
= (char *) xmlCanonicPath((const xmlChar
*)filename
);
14268 input
->base
= buffer
;
14269 input
->cur
= buffer
;
14270 input
->end
= &buffer
[xmlStrlen(buffer
)];
14271 inputPush(ctxt
, input
);
14275 * xmlSAXUserParseFile:
14276 * @sax: a SAX handler
14277 * @user_data: The user data returned on SAX callbacks
14278 * @filename: a file name
14280 * parse an XML file and call the given SAX handler routines.
14281 * Automatic support for ZLIB/Compress compressed document is provided
14283 * Returns 0 in case of success or a error number otherwise
14286 xmlSAXUserParseFile(xmlSAXHandlerPtr sax
, void *user_data
,
14287 const char *filename
) {
14289 xmlParserCtxtPtr ctxt
;
14291 ctxt
= xmlCreateFileParserCtxt(filename
);
14292 if (ctxt
== NULL
) return -1;
14293 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
14294 xmlFree(ctxt
->sax
);
14296 xmlDetectSAX2(ctxt
);
14298 if (user_data
!= NULL
)
14299 ctxt
->userData
= user_data
;
14301 xmlParseDocument(ctxt
);
14303 if (ctxt
->wellFormed
)
14306 if (ctxt
->errNo
!= 0)
14313 if (ctxt
->myDoc
!= NULL
) {
14314 xmlFreeDoc(ctxt
->myDoc
);
14315 ctxt
->myDoc
= NULL
;
14317 xmlFreeParserCtxt(ctxt
);
14321 #endif /* LIBXML_SAX1_ENABLED */
14323 /************************************************************************
14325 * Front ends when parsing from memory *
14327 ************************************************************************/
14330 * xmlCreateMemoryParserCtxt:
14331 * @buffer: a pointer to a char array
14332 * @size: the size of the array
14334 * Create a parser context for an XML in-memory document.
14336 * Returns the new parser context or NULL
14339 xmlCreateMemoryParserCtxt(const char *buffer
, int size
) {
14340 xmlParserCtxtPtr ctxt
;
14341 xmlParserInputPtr input
;
14342 xmlParserInputBufferPtr buf
;
14344 if (buffer
== NULL
)
14349 ctxt
= xmlNewParserCtxt();
14353 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14354 buf
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
14356 xmlFreeParserCtxt(ctxt
);
14360 input
= xmlNewInputStream(ctxt
);
14361 if (input
== NULL
) {
14362 xmlFreeParserInputBuffer(buf
);
14363 xmlFreeParserCtxt(ctxt
);
14367 input
->filename
= NULL
;
14369 xmlBufResetInput(input
->buf
->buffer
, input
);
14371 inputPush(ctxt
, input
);
14375 #ifdef LIBXML_SAX1_ENABLED
14377 * xmlSAXParseMemoryWithData:
14378 * @sax: the SAX handler block
14379 * @buffer: an pointer to a char array
14380 * @size: the size of the array
14381 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14383 * @data: the userdata
14385 * parse an XML in-memory block and use the given SAX function block
14386 * to handle the parsing callback. If sax is NULL, fallback to the default
14387 * DOM tree building routines.
14389 * User data (void *) is stored within the parser context in the
14390 * context's _private member, so it is available nearly everywhere in libxml
14392 * Returns the resulting document tree
14396 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax
, const char *buffer
,
14397 int size
, int recovery
, void *data
) {
14399 xmlParserCtxtPtr ctxt
;
14403 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14404 if (ctxt
== NULL
) return(NULL
);
14406 if (ctxt
->sax
!= NULL
)
14407 xmlFree(ctxt
->sax
);
14410 xmlDetectSAX2(ctxt
);
14412 ctxt
->_private
=data
;
14415 ctxt
->recovery
= recovery
;
14417 xmlParseDocument(ctxt
);
14419 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14422 xmlFreeDoc(ctxt
->myDoc
);
14423 ctxt
->myDoc
= NULL
;
14427 xmlFreeParserCtxt(ctxt
);
14433 * xmlSAXParseMemory:
14434 * @sax: the SAX handler block
14435 * @buffer: an pointer to a char array
14436 * @size: the size of the array
14437 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14440 * parse an XML in-memory block and use the given SAX function block
14441 * to handle the parsing callback. If sax is NULL, fallback to the default
14442 * DOM tree building routines.
14444 * Returns the resulting document tree
14447 xmlSAXParseMemory(xmlSAXHandlerPtr sax
, const char *buffer
,
14448 int size
, int recovery
) {
14449 return xmlSAXParseMemoryWithData(sax
, buffer
, size
, recovery
, NULL
);
14454 * @buffer: an pointer to a char array
14455 * @size: the size of the array
14457 * parse an XML in-memory block and build a tree.
14459 * Returns the resulting document tree
14462 xmlDocPtr
xmlParseMemory(const char *buffer
, int size
) {
14463 return(xmlSAXParseMemory(NULL
, buffer
, size
, 0));
14467 * xmlRecoverMemory:
14468 * @buffer: an pointer to a char array
14469 * @size: the size of the array
14471 * parse an XML in-memory block and build a tree.
14472 * In the case the document is not Well Formed, an attempt to
14473 * build a tree is tried anyway
14475 * Returns the resulting document tree or NULL in case of error
14478 xmlDocPtr
xmlRecoverMemory(const char *buffer
, int size
) {
14479 return(xmlSAXParseMemory(NULL
, buffer
, size
, 1));
14483 * xmlSAXUserParseMemory:
14484 * @sax: a SAX handler
14485 * @user_data: The user data returned on SAX callbacks
14486 * @buffer: an in-memory XML document input
14487 * @size: the length of the XML document in bytes
14489 * A better SAX parsing routine.
14490 * parse an XML in-memory buffer and call the given SAX handler routines.
14492 * Returns 0 in case of success or a error number otherwise
14494 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax
, void *user_data
,
14495 const char *buffer
, int size
) {
14497 xmlParserCtxtPtr ctxt
;
14501 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14502 if (ctxt
== NULL
) return -1;
14503 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
14504 xmlFree(ctxt
->sax
);
14506 xmlDetectSAX2(ctxt
);
14508 if (user_data
!= NULL
)
14509 ctxt
->userData
= user_data
;
14511 xmlParseDocument(ctxt
);
14513 if (ctxt
->wellFormed
)
14516 if (ctxt
->errNo
!= 0)
14523 if (ctxt
->myDoc
!= NULL
) {
14524 xmlFreeDoc(ctxt
->myDoc
);
14525 ctxt
->myDoc
= NULL
;
14527 xmlFreeParserCtxt(ctxt
);
14531 #endif /* LIBXML_SAX1_ENABLED */
14534 * xmlCreateDocParserCtxt:
14535 * @cur: a pointer to an array of xmlChar
14537 * Creates a parser context for an XML in-memory document.
14539 * Returns the new parser context or NULL
14542 xmlCreateDocParserCtxt(const xmlChar
*cur
) {
14547 len
= xmlStrlen(cur
);
14548 return(xmlCreateMemoryParserCtxt((const char *)cur
, len
));
14551 #ifdef LIBXML_SAX1_ENABLED
14554 * @sax: the SAX handler block
14555 * @cur: a pointer to an array of xmlChar
14556 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14559 * parse an XML in-memory document and build a tree.
14560 * It use the given SAX function block to handle the parsing callback.
14561 * If sax is NULL, fallback to the default DOM tree building routines.
14563 * Returns the resulting document tree
14567 xmlSAXParseDoc(xmlSAXHandlerPtr sax
, const xmlChar
*cur
, int recovery
) {
14569 xmlParserCtxtPtr ctxt
;
14570 xmlSAXHandlerPtr oldsax
= NULL
;
14572 if (cur
== NULL
) return(NULL
);
14575 ctxt
= xmlCreateDocParserCtxt(cur
);
14576 if (ctxt
== NULL
) return(NULL
);
14578 oldsax
= ctxt
->sax
;
14580 ctxt
->userData
= NULL
;
14582 xmlDetectSAX2(ctxt
);
14584 xmlParseDocument(ctxt
);
14585 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14588 xmlFreeDoc(ctxt
->myDoc
);
14589 ctxt
->myDoc
= NULL
;
14592 ctxt
->sax
= oldsax
;
14593 xmlFreeParserCtxt(ctxt
);
14600 * @cur: a pointer to an array of xmlChar
14602 * parse an XML in-memory document and build a tree.
14604 * Returns the resulting document tree
14608 xmlParseDoc(const xmlChar
*cur
) {
14609 return(xmlSAXParseDoc(NULL
, cur
, 0));
14611 #endif /* LIBXML_SAX1_ENABLED */
14613 #ifdef LIBXML_LEGACY_ENABLED
14614 /************************************************************************
14616 * Specific function to keep track of entities references *
14617 * and used by the XSLT debugger *
14619 ************************************************************************/
14621 static xmlEntityReferenceFunc xmlEntityRefFunc
= NULL
;
14624 * xmlAddEntityReference:
14625 * @ent : A valid entity
14626 * @firstNode : A valid first node for children of entity
14627 * @lastNode : A valid last node of children entity
14629 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14632 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
14633 xmlNodePtr lastNode
)
14635 if (xmlEntityRefFunc
!= NULL
) {
14636 (*xmlEntityRefFunc
) (ent
, firstNode
, lastNode
);
14642 * xmlSetEntityReferenceFunc:
14643 * @func: A valid function
14645 * Set the function to call call back when a xml reference has been made
14648 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func
)
14650 xmlEntityRefFunc
= func
;
14652 #endif /* LIBXML_LEGACY_ENABLED */
14654 /************************************************************************
14658 ************************************************************************/
14660 #ifdef LIBXML_XPATH_ENABLED
14661 #include <libxml/xpath.h>
14664 extern void XMLCDECL
xmlGenericErrorDefaultFunc(void *ctx
, const char *msg
, ...);
14665 static int xmlParserInitialized
= 0;
14670 * Initialization function for the XML parser.
14671 * This is not reentrant. Call once before processing in case of
14672 * use in multithreaded programs.
14676 xmlInitParser(void) {
14677 if (xmlParserInitialized
!= 0)
14680 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14681 if (xmlFree
== free
)
14682 atexit(xmlCleanupParser
);
14685 #ifdef LIBXML_THREAD_ENABLED
14686 __xmlGlobalInitMutexLock();
14687 if (xmlParserInitialized
== 0) {
14691 if ((xmlGenericError
== xmlGenericErrorDefaultFunc
) ||
14692 (xmlGenericError
== NULL
))
14693 initGenericErrorDefaultFunc(NULL
);
14695 xmlInitializeDict();
14696 xmlInitCharEncodingHandlers();
14697 xmlDefaultSAXHandlerInit();
14698 xmlRegisterDefaultInputCallbacks();
14699 #ifdef LIBXML_OUTPUT_ENABLED
14700 xmlRegisterDefaultOutputCallbacks();
14701 #endif /* LIBXML_OUTPUT_ENABLED */
14702 #ifdef LIBXML_HTML_ENABLED
14703 htmlInitAutoClose();
14704 htmlDefaultSAXHandlerInit();
14706 #ifdef LIBXML_XPATH_ENABLED
14709 xmlParserInitialized
= 1;
14710 #ifdef LIBXML_THREAD_ENABLED
14712 __xmlGlobalInitMutexUnlock();
14717 * xmlCleanupParser:
14719 * This function name is somewhat misleading. It does not clean up
14720 * parser state, it cleans up memory allocated by the library itself.
14721 * It is a cleanup function for the XML library. It tries to reclaim all
14722 * related global memory allocated for the library processing.
14723 * It doesn't deallocate any document related memory. One should
14724 * call xmlCleanupParser() only when the process has finished using
14725 * the library and all XML/HTML documents built with it.
14726 * See also xmlInitParser() which has the opposite function of preparing
14727 * the library for operations.
14729 * WARNING: if your application is multithreaded or has plugin support
14730 * calling this may crash the application if another thread or
14731 * a plugin is still using libxml2. It's sometimes very hard to
14732 * guess if libxml2 is in use in the application, some libraries
14733 * or plugins may use it without notice. In case of doubt abstain
14734 * from calling this function or do it just before calling exit()
14735 * to avoid leak reports from valgrind !
14739 xmlCleanupParser(void) {
14740 if (!xmlParserInitialized
)
14743 xmlCleanupCharEncodingHandlers();
14744 #ifdef LIBXML_CATALOG_ENABLED
14745 xmlCatalogCleanup();
14748 xmlCleanupInputCallbacks();
14749 #ifdef LIBXML_OUTPUT_ENABLED
14750 xmlCleanupOutputCallbacks();
14752 #ifdef LIBXML_SCHEMAS_ENABLED
14753 xmlSchemaCleanupTypes();
14754 xmlRelaxNGCleanupTypes();
14756 xmlCleanupGlobals();
14757 xmlCleanupThreads(); /* must be last if called not from the main thread */
14758 xmlCleanupMemory();
14759 xmlParserInitialized
= 0;
14762 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14765 ATTRIBUTE_DESTRUCTOR
14766 xmlDestructor(void) {
14768 * Calling custom deallocation functions in a destructor can cause
14769 * problems, for example with Nokogiri.
14771 if (xmlFree
== free
)
14772 xmlCleanupParser();
14776 /************************************************************************
14778 * New set (2.6.0) of simpler and more flexible APIs *
14780 ************************************************************************/
14786 * Free a string if it is not owned by the "dict" dictionary in the
14789 #define DICT_FREE(str) \
14790 if ((str) && ((!dict) || \
14791 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14792 xmlFree((char *)(str));
14796 * @ctxt: an XML parser context
14798 * Reset a parser context
14801 xmlCtxtReset(xmlParserCtxtPtr ctxt
)
14803 xmlParserInputPtr input
;
14811 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
14812 xmlFreeInputStream(input
);
14815 ctxt
->input
= NULL
;
14818 if (ctxt
->spaceTab
!= NULL
) {
14819 ctxt
->spaceTab
[0] = -1;
14820 ctxt
->space
= &ctxt
->spaceTab
[0];
14822 ctxt
->space
= NULL
;
14834 DICT_FREE(ctxt
->version
);
14835 ctxt
->version
= NULL
;
14836 DICT_FREE(ctxt
->encoding
);
14837 ctxt
->encoding
= NULL
;
14838 DICT_FREE(ctxt
->directory
);
14839 ctxt
->directory
= NULL
;
14840 DICT_FREE(ctxt
->extSubURI
);
14841 ctxt
->extSubURI
= NULL
;
14842 DICT_FREE(ctxt
->extSubSystem
);
14843 ctxt
->extSubSystem
= NULL
;
14844 if (ctxt
->myDoc
!= NULL
)
14845 xmlFreeDoc(ctxt
->myDoc
);
14846 ctxt
->myDoc
= NULL
;
14848 ctxt
->standalone
= -1;
14849 ctxt
->hasExternalSubset
= 0;
14850 ctxt
->hasPErefs
= 0;
14852 ctxt
->external
= 0;
14853 ctxt
->instate
= XML_PARSER_START
;
14856 ctxt
->wellFormed
= 1;
14857 ctxt
->nsWellFormed
= 1;
14858 ctxt
->disableSAX
= 0;
14861 ctxt
->vctxt
.userData
= ctxt
;
14862 ctxt
->vctxt
.error
= xmlParserValidityError
;
14863 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
14865 ctxt
->record_info
= 0;
14866 ctxt
->checkIndex
= 0;
14867 ctxt
->inSubset
= 0;
14868 ctxt
->errNo
= XML_ERR_OK
;
14870 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
14871 ctxt
->catalogs
= NULL
;
14872 ctxt
->nbentities
= 0;
14873 ctxt
->sizeentities
= 0;
14874 ctxt
->sizeentcopy
= 0;
14875 xmlInitNodeInfoSeq(&ctxt
->node_seq
);
14877 if (ctxt
->attsDefault
!= NULL
) {
14878 xmlHashFree(ctxt
->attsDefault
, xmlHashDefaultDeallocator
);
14879 ctxt
->attsDefault
= NULL
;
14881 if (ctxt
->attsSpecial
!= NULL
) {
14882 xmlHashFree(ctxt
->attsSpecial
, NULL
);
14883 ctxt
->attsSpecial
= NULL
;
14886 #ifdef LIBXML_CATALOG_ENABLED
14887 if (ctxt
->catalogs
!= NULL
)
14888 xmlCatalogFreeLocal(ctxt
->catalogs
);
14890 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
14891 xmlResetError(&ctxt
->lastError
);
14895 * xmlCtxtResetPush:
14896 * @ctxt: an XML parser context
14897 * @chunk: a pointer to an array of chars
14898 * @size: number of chars in the array
14899 * @filename: an optional file name or URI
14900 * @encoding: the document encoding, or NULL
14902 * Reset a push parser context
14904 * Returns 0 in case of success and 1 in case of error
14907 xmlCtxtResetPush(xmlParserCtxtPtr ctxt
, const char *chunk
,
14908 int size
, const char *filename
, const char *encoding
)
14910 xmlParserInputPtr inputStream
;
14911 xmlParserInputBufferPtr buf
;
14912 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
14917 if ((encoding
== NULL
) && (chunk
!= NULL
) && (size
>= 4))
14918 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
14920 buf
= xmlAllocParserInputBuffer(enc
);
14924 if (ctxt
== NULL
) {
14925 xmlFreeParserInputBuffer(buf
);
14929 xmlCtxtReset(ctxt
);
14931 if (filename
== NULL
) {
14932 ctxt
->directory
= NULL
;
14934 ctxt
->directory
= xmlParserGetDirectory(filename
);
14937 inputStream
= xmlNewInputStream(ctxt
);
14938 if (inputStream
== NULL
) {
14939 xmlFreeParserInputBuffer(buf
);
14943 if (filename
== NULL
)
14944 inputStream
->filename
= NULL
;
14946 inputStream
->filename
= (char *)
14947 xmlCanonicPath((const xmlChar
*) filename
);
14948 inputStream
->buf
= buf
;
14949 xmlBufResetInput(buf
->buffer
, inputStream
);
14951 inputPush(ctxt
, inputStream
);
14953 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
14954 (ctxt
->input
->buf
!= NULL
)) {
14955 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
14956 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
14958 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
14960 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
14962 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
14966 if (encoding
!= NULL
) {
14967 xmlCharEncodingHandlerPtr hdlr
;
14969 if (ctxt
->encoding
!= NULL
)
14970 xmlFree((xmlChar
*) ctxt
->encoding
);
14971 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14973 hdlr
= xmlFindCharEncodingHandler(encoding
);
14974 if (hdlr
!= NULL
) {
14975 xmlSwitchToEncoding(ctxt
, hdlr
);
14977 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
14978 "Unsupported encoding %s\n", BAD_CAST encoding
);
14980 } else if (enc
!= XML_CHAR_ENCODING_NONE
) {
14981 xmlSwitchEncoding(ctxt
, enc
);
14989 * xmlCtxtUseOptionsInternal:
14990 * @ctxt: an XML parser context
14991 * @options: a combination of xmlParserOption
14992 * @encoding: the user provided encoding to use
14994 * Applies the options to the parser context
14996 * Returns 0 in case of success, the set of unknown or unimplemented options
14997 * in case of error.
15000 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
, const char *encoding
)
15004 if (encoding
!= NULL
) {
15005 if (ctxt
->encoding
!= NULL
)
15006 xmlFree((xmlChar
*) ctxt
->encoding
);
15007 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
15009 if (options
& XML_PARSE_RECOVER
) {
15010 ctxt
->recovery
= 1;
15011 options
-= XML_PARSE_RECOVER
;
15012 ctxt
->options
|= XML_PARSE_RECOVER
;
15014 ctxt
->recovery
= 0;
15015 if (options
& XML_PARSE_DTDLOAD
) {
15016 ctxt
->loadsubset
= XML_DETECT_IDS
;
15017 options
-= XML_PARSE_DTDLOAD
;
15018 ctxt
->options
|= XML_PARSE_DTDLOAD
;
15020 ctxt
->loadsubset
= 0;
15021 if (options
& XML_PARSE_DTDATTR
) {
15022 ctxt
->loadsubset
|= XML_COMPLETE_ATTRS
;
15023 options
-= XML_PARSE_DTDATTR
;
15024 ctxt
->options
|= XML_PARSE_DTDATTR
;
15026 if (options
& XML_PARSE_NOENT
) {
15027 ctxt
->replaceEntities
= 1;
15028 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15029 options
-= XML_PARSE_NOENT
;
15030 ctxt
->options
|= XML_PARSE_NOENT
;
15032 ctxt
->replaceEntities
= 0;
15033 if (options
& XML_PARSE_PEDANTIC
) {
15034 ctxt
->pedantic
= 1;
15035 options
-= XML_PARSE_PEDANTIC
;
15036 ctxt
->options
|= XML_PARSE_PEDANTIC
;
15038 ctxt
->pedantic
= 0;
15039 if (options
& XML_PARSE_NOBLANKS
) {
15040 ctxt
->keepBlanks
= 0;
15041 ctxt
->sax
->ignorableWhitespace
= xmlSAX2IgnorableWhitespace
;
15042 options
-= XML_PARSE_NOBLANKS
;
15043 ctxt
->options
|= XML_PARSE_NOBLANKS
;
15045 ctxt
->keepBlanks
= 1;
15046 if (options
& XML_PARSE_DTDVALID
) {
15047 ctxt
->validate
= 1;
15048 if (options
& XML_PARSE_NOWARNING
)
15049 ctxt
->vctxt
.warning
= NULL
;
15050 if (options
& XML_PARSE_NOERROR
)
15051 ctxt
->vctxt
.error
= NULL
;
15052 options
-= XML_PARSE_DTDVALID
;
15053 ctxt
->options
|= XML_PARSE_DTDVALID
;
15055 ctxt
->validate
= 0;
15056 if (options
& XML_PARSE_NOWARNING
) {
15057 ctxt
->sax
->warning
= NULL
;
15058 options
-= XML_PARSE_NOWARNING
;
15060 if (options
& XML_PARSE_NOERROR
) {
15061 ctxt
->sax
->error
= NULL
;
15062 ctxt
->sax
->fatalError
= NULL
;
15063 options
-= XML_PARSE_NOERROR
;
15065 #ifdef LIBXML_SAX1_ENABLED
15066 if (options
& XML_PARSE_SAX1
) {
15067 ctxt
->sax
->startElement
= xmlSAX2StartElement
;
15068 ctxt
->sax
->endElement
= xmlSAX2EndElement
;
15069 ctxt
->sax
->startElementNs
= NULL
;
15070 ctxt
->sax
->endElementNs
= NULL
;
15071 ctxt
->sax
->initialized
= 1;
15072 options
-= XML_PARSE_SAX1
;
15073 ctxt
->options
|= XML_PARSE_SAX1
;
15075 #endif /* LIBXML_SAX1_ENABLED */
15076 if (options
& XML_PARSE_NODICT
) {
15077 ctxt
->dictNames
= 0;
15078 options
-= XML_PARSE_NODICT
;
15079 ctxt
->options
|= XML_PARSE_NODICT
;
15081 ctxt
->dictNames
= 1;
15083 if (options
& XML_PARSE_NOCDATA
) {
15084 ctxt
->sax
->cdataBlock
= NULL
;
15085 options
-= XML_PARSE_NOCDATA
;
15086 ctxt
->options
|= XML_PARSE_NOCDATA
;
15088 if (options
& XML_PARSE_NSCLEAN
) {
15089 ctxt
->options
|= XML_PARSE_NSCLEAN
;
15090 options
-= XML_PARSE_NSCLEAN
;
15092 if (options
& XML_PARSE_NONET
) {
15093 ctxt
->options
|= XML_PARSE_NONET
;
15094 options
-= XML_PARSE_NONET
;
15096 if (options
& XML_PARSE_COMPACT
) {
15097 ctxt
->options
|= XML_PARSE_COMPACT
;
15098 options
-= XML_PARSE_COMPACT
;
15100 if (options
& XML_PARSE_OLD10
) {
15101 ctxt
->options
|= XML_PARSE_OLD10
;
15102 options
-= XML_PARSE_OLD10
;
15104 if (options
& XML_PARSE_NOBASEFIX
) {
15105 ctxt
->options
|= XML_PARSE_NOBASEFIX
;
15106 options
-= XML_PARSE_NOBASEFIX
;
15108 if (options
& XML_PARSE_HUGE
) {
15109 ctxt
->options
|= XML_PARSE_HUGE
;
15110 options
-= XML_PARSE_HUGE
;
15111 if (ctxt
->dict
!= NULL
)
15112 xmlDictSetLimit(ctxt
->dict
, 0);
15114 if (options
& XML_PARSE_OLDSAX
) {
15115 ctxt
->options
|= XML_PARSE_OLDSAX
;
15116 options
-= XML_PARSE_OLDSAX
;
15118 if (options
& XML_PARSE_IGNORE_ENC
) {
15119 ctxt
->options
|= XML_PARSE_IGNORE_ENC
;
15120 options
-= XML_PARSE_IGNORE_ENC
;
15122 if (options
& XML_PARSE_BIG_LINES
) {
15123 ctxt
->options
|= XML_PARSE_BIG_LINES
;
15124 options
-= XML_PARSE_BIG_LINES
;
15126 ctxt
->linenumbers
= 1;
15131 * xmlCtxtUseOptions:
15132 * @ctxt: an XML parser context
15133 * @options: a combination of xmlParserOption
15135 * Applies the options to the parser context
15137 * Returns 0 in case of success, the set of unknown or unimplemented options
15138 * in case of error.
15141 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt
, int options
)
15143 return(xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
));
15148 * @ctxt: an XML parser context
15149 * @URL: the base URL to use for the document
15150 * @encoding: the document encoding, or NULL
15151 * @options: a combination of xmlParserOption
15152 * @reuse: keep the context for reuse
15154 * Common front-end for the xmlRead functions
15156 * Returns the resulting document tree or NULL
15159 xmlDoRead(xmlParserCtxtPtr ctxt
, const char *URL
, const char *encoding
,
15160 int options
, int reuse
)
15164 xmlCtxtUseOptionsInternal(ctxt
, options
, encoding
);
15165 if (encoding
!= NULL
) {
15166 xmlCharEncodingHandlerPtr hdlr
;
15168 hdlr
= xmlFindCharEncodingHandler(encoding
);
15170 xmlSwitchToEncoding(ctxt
, hdlr
);
15172 if ((URL
!= NULL
) && (ctxt
->input
!= NULL
) &&
15173 (ctxt
->input
->filename
== NULL
))
15174 ctxt
->input
->filename
= (char *) xmlStrdup((const xmlChar
*) URL
);
15175 xmlParseDocument(ctxt
);
15176 if ((ctxt
->wellFormed
) || ctxt
->recovery
)
15180 if (ctxt
->myDoc
!= NULL
) {
15181 xmlFreeDoc(ctxt
->myDoc
);
15184 ctxt
->myDoc
= NULL
;
15186 xmlFreeParserCtxt(ctxt
);
15194 * @cur: a pointer to a zero terminated string
15195 * @URL: the base URL to use for the document
15196 * @encoding: the document encoding, or NULL
15197 * @options: a combination of xmlParserOption
15199 * parse an XML in-memory document and build a tree.
15201 * Returns the resulting document tree
15204 xmlReadDoc(const xmlChar
* cur
, const char *URL
, const char *encoding
, int options
)
15206 xmlParserCtxtPtr ctxt
;
15212 ctxt
= xmlCreateDocParserCtxt(cur
);
15215 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15220 * @filename: a file or URL
15221 * @encoding: the document encoding, or NULL
15222 * @options: a combination of xmlParserOption
15224 * parse an XML file from the filesystem or the network.
15226 * Returns the resulting document tree
15229 xmlReadFile(const char *filename
, const char *encoding
, int options
)
15231 xmlParserCtxtPtr ctxt
;
15234 ctxt
= xmlCreateURLParserCtxt(filename
, options
);
15237 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 0));
15242 * @buffer: a pointer to a char array
15243 * @size: the size of the array
15244 * @URL: the base URL to use for the document
15245 * @encoding: the document encoding, or NULL
15246 * @options: a combination of xmlParserOption
15248 * parse an XML in-memory document and build a tree.
15250 * Returns the resulting document tree
15253 xmlReadMemory(const char *buffer
, int size
, const char *URL
, const char *encoding
, int options
)
15255 xmlParserCtxtPtr ctxt
;
15258 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
15261 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15266 * @fd: an open file descriptor
15267 * @URL: the base URL to use for the document
15268 * @encoding: the document encoding, or NULL
15269 * @options: a combination of xmlParserOption
15271 * parse an XML from a file descriptor and build a tree.
15272 * NOTE that the file descriptor will not be closed when the
15273 * reader is closed or reset.
15275 * Returns the resulting document tree
15278 xmlReadFd(int fd
, const char *URL
, const char *encoding
, int options
)
15280 xmlParserCtxtPtr ctxt
;
15281 xmlParserInputBufferPtr input
;
15282 xmlParserInputPtr stream
;
15288 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
15291 input
->closecallback
= NULL
;
15292 ctxt
= xmlNewParserCtxt();
15293 if (ctxt
== NULL
) {
15294 xmlFreeParserInputBuffer(input
);
15297 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15298 if (stream
== NULL
) {
15299 xmlFreeParserInputBuffer(input
);
15300 xmlFreeParserCtxt(ctxt
);
15303 inputPush(ctxt
, stream
);
15304 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15309 * @ioread: an I/O read function
15310 * @ioclose: an I/O close function
15311 * @ioctx: an I/O handler
15312 * @URL: the base URL to use for the document
15313 * @encoding: the document encoding, or NULL
15314 * @options: a combination of xmlParserOption
15316 * parse an XML document from I/O functions and source and build a tree.
15318 * Returns the resulting document tree
15321 xmlReadIO(xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
15322 void *ioctx
, const char *URL
, const char *encoding
, int options
)
15324 xmlParserCtxtPtr ctxt
;
15325 xmlParserInputBufferPtr input
;
15326 xmlParserInputPtr stream
;
15328 if (ioread
== NULL
)
15332 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
15333 XML_CHAR_ENCODING_NONE
);
15334 if (input
== NULL
) {
15335 if (ioclose
!= NULL
)
15339 ctxt
= xmlNewParserCtxt();
15340 if (ctxt
== NULL
) {
15341 xmlFreeParserInputBuffer(input
);
15344 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15345 if (stream
== NULL
) {
15346 xmlFreeParserInputBuffer(input
);
15347 xmlFreeParserCtxt(ctxt
);
15350 inputPush(ctxt
, stream
);
15351 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15356 * @ctxt: an XML parser context
15357 * @cur: a pointer to a zero terminated string
15358 * @URL: the base URL to use for the document
15359 * @encoding: the document encoding, or NULL
15360 * @options: a combination of xmlParserOption
15362 * parse an XML in-memory document and build a tree.
15363 * This reuses the existing @ctxt parser context
15365 * Returns the resulting document tree
15368 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt
, const xmlChar
* cur
,
15369 const char *URL
, const char *encoding
, int options
)
15373 return (xmlCtxtReadMemory(ctxt
, (const char *) cur
, xmlStrlen(cur
), URL
,
15374 encoding
, options
));
15379 * @ctxt: an XML parser context
15380 * @filename: a file or URL
15381 * @encoding: the document encoding, or NULL
15382 * @options: a combination of xmlParserOption
15384 * parse an XML file from the filesystem or the network.
15385 * This reuses the existing @ctxt parser context
15387 * Returns the resulting document tree
15390 xmlCtxtReadFile(xmlParserCtxtPtr ctxt
, const char *filename
,
15391 const char *encoding
, int options
)
15393 xmlParserInputPtr stream
;
15395 if (filename
== NULL
)
15401 xmlCtxtReset(ctxt
);
15403 stream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
15404 if (stream
== NULL
) {
15407 inputPush(ctxt
, stream
);
15408 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 1));
15412 * xmlCtxtReadMemory:
15413 * @ctxt: an XML parser context
15414 * @buffer: a pointer to a char array
15415 * @size: the size of the array
15416 * @URL: the base URL to use for the document
15417 * @encoding: the document encoding, or NULL
15418 * @options: a combination of xmlParserOption
15420 * parse an XML in-memory document and build a tree.
15421 * This reuses the existing @ctxt parser context
15423 * Returns the resulting document tree
15426 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt
, const char *buffer
, int size
,
15427 const char *URL
, const char *encoding
, int options
)
15429 xmlParserInputBufferPtr input
;
15430 xmlParserInputPtr stream
;
15434 if (buffer
== NULL
)
15438 xmlCtxtReset(ctxt
);
15440 input
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
15441 if (input
== NULL
) {
15445 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15446 if (stream
== NULL
) {
15447 xmlFreeParserInputBuffer(input
);
15451 inputPush(ctxt
, stream
);
15452 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15457 * @ctxt: an XML parser context
15458 * @fd: an open file descriptor
15459 * @URL: the base URL to use for the document
15460 * @encoding: the document encoding, or NULL
15461 * @options: a combination of xmlParserOption
15463 * parse an XML from a file descriptor and build a tree.
15464 * This reuses the existing @ctxt parser context
15465 * NOTE that the file descriptor will not be closed when the
15466 * reader is closed or reset.
15468 * Returns the resulting document tree
15471 xmlCtxtReadFd(xmlParserCtxtPtr ctxt
, int fd
,
15472 const char *URL
, const char *encoding
, int options
)
15474 xmlParserInputBufferPtr input
;
15475 xmlParserInputPtr stream
;
15483 xmlCtxtReset(ctxt
);
15486 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
15489 input
->closecallback
= NULL
;
15490 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15491 if (stream
== NULL
) {
15492 xmlFreeParserInputBuffer(input
);
15495 inputPush(ctxt
, stream
);
15496 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15501 * @ctxt: an XML parser context
15502 * @ioread: an I/O read function
15503 * @ioclose: an I/O close function
15504 * @ioctx: an I/O handler
15505 * @URL: the base URL to use for the document
15506 * @encoding: the document encoding, or NULL
15507 * @options: a combination of xmlParserOption
15509 * parse an XML document from I/O functions and source and build a tree.
15510 * This reuses the existing @ctxt parser context
15512 * Returns the resulting document tree
15515 xmlCtxtReadIO(xmlParserCtxtPtr ctxt
, xmlInputReadCallback ioread
,
15516 xmlInputCloseCallback ioclose
, void *ioctx
,
15518 const char *encoding
, int options
)
15520 xmlParserInputBufferPtr input
;
15521 xmlParserInputPtr stream
;
15523 if (ioread
== NULL
)
15529 xmlCtxtReset(ctxt
);
15531 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
15532 XML_CHAR_ENCODING_NONE
);
15533 if (input
== NULL
) {
15534 if (ioclose
!= NULL
)
15538 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15539 if (stream
== NULL
) {
15540 xmlFreeParserInputBuffer(input
);
15543 inputPush(ctxt
, stream
);
15544 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));