2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
33 /* To avoid EBCDIC trouble when parsing on zOS */
35 #pragma convert("ISO8859-1")
42 #define XML_DIR_SEP '\\'
44 #define XML_DIR_SEP '/'
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/valid.h>
61 #include <libxml/entities.h>
62 #include <libxml/xmlerror.h>
63 #include <libxml/encoding.h>
64 #include <libxml/xmlIO.h>
65 #include <libxml/uri.h>
66 #ifdef LIBXML_CATALOG_ENABLED
67 #include <libxml/catalog.h>
69 #ifdef LIBXML_SCHEMAS_ENABLED
70 #include <libxml/xmlschemastypes.h>
71 #include <libxml/relaxng.h>
78 const xmlChar
*prefix
;
85 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
);
87 static xmlParserCtxtPtr
88 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
89 const xmlChar
*base
, xmlParserCtxtPtr pctx
);
91 static void xmlHaltParser(xmlParserCtxtPtr ctxt
);
94 xmlParseElementStart(xmlParserCtxtPtr ctxt
);
97 xmlParseElementEnd(xmlParserCtxtPtr ctxt
);
99 /************************************************************************
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
103 ************************************************************************/
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and exponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
114 #define XML_PARSER_NON_LINEAR 10
117 * xmlParserEntityCheck
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt
, size_t size
,
127 xmlEntityPtr ent
, size_t replacement
)
132 if ((ctxt
== NULL
) || (ctxt
->options
& XML_PARSE_HUGE
))
134 if (ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
)
138 * This may look absurd but is needed to detect
141 if ((ent
!= NULL
) && (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
142 (ent
->content
!= NULL
) && (ent
->checked
== 0) &&
143 (ctxt
->errNo
!= XML_ERR_ENTITY_LOOP
)) {
144 unsigned long oldnbent
= ctxt
->nbentities
, diff
;
150 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
151 XML_SUBSTITUTE_REF
, 0, 0, 0);
153 if ((rep
== NULL
) || (ctxt
->errNo
== XML_ERR_ENTITY_LOOP
)) {
157 diff
= ctxt
->nbentities
- oldnbent
+ 1;
158 if (diff
> INT_MAX
/ 2)
160 ent
->checked
= diff
* 2;
162 if (xmlStrchr(rep
, '<'))
170 * Prevent entity exponential check, not just replacement while
172 * The check is potentially costly so do that only once in a thousand
174 if ((ctxt
->instate
== XML_PARSER_DTD
) && (ctxt
->nbentities
> 10000) &&
175 (ctxt
->nbentities
% 1024 == 0)) {
176 for (i
= 0;i
< ctxt
->inputNr
;i
++) {
177 consumed
+= ctxt
->inputTab
[i
]->consumed
+
178 (ctxt
->inputTab
[i
]->cur
- ctxt
->inputTab
[i
]->base
);
180 if (ctxt
->nbentities
> consumed
* XML_PARSER_NON_LINEAR
) {
181 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
182 ctxt
->instate
= XML_PARSER_EOF
;
190 if (replacement
!= 0) {
191 if (replacement
< XML_MAX_TEXT_LENGTH
)
195 * If the volume of entity copy reaches 10 times the
196 * amount of parsed data and over the large text threshold
197 * then that's very likely to be an abuse.
199 if (ctxt
->input
!= NULL
) {
200 consumed
= ctxt
->input
->consumed
+
201 (ctxt
->input
->cur
- ctxt
->input
->base
);
203 consumed
+= ctxt
->sizeentities
;
205 if (replacement
< XML_PARSER_NON_LINEAR
* consumed
)
207 } else if (size
!= 0) {
209 * Do the check based on the replacement size of the entity
211 if (size
< XML_PARSER_BIG_ENTITY
)
215 * A limit on the amount of text data reasonably used
217 if (ctxt
->input
!= NULL
) {
218 consumed
= ctxt
->input
->consumed
+
219 (ctxt
->input
->cur
- ctxt
->input
->base
);
221 consumed
+= ctxt
->sizeentities
;
223 if ((size
< XML_PARSER_NON_LINEAR
* consumed
) &&
224 (ctxt
->nbentities
* 3 < XML_PARSER_NON_LINEAR
* consumed
))
226 } else if (ent
!= NULL
) {
228 * use the number of parsed entities in the replacement
230 size
= ent
->checked
/ 2;
233 * The amount of data parsed counting entities size only once
235 if (ctxt
->input
!= NULL
) {
236 consumed
= ctxt
->input
->consumed
+
237 (ctxt
->input
->cur
- ctxt
->input
->base
);
239 consumed
+= ctxt
->sizeentities
;
242 * Check the density of entities for the amount of data
243 * knowing an entity reference will take at least 3 bytes
245 if (size
* 3 < consumed
* XML_PARSER_NON_LINEAR
)
249 * strange we got no data for checking
251 if (((ctxt
->lastError
.code
!= XML_ERR_UNDECLARED_ENTITY
) &&
252 (ctxt
->lastError
.code
!= XML_WAR_UNDECLARED_ENTITY
)) ||
253 (ctxt
->nbentities
<= 10000))
256 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
263 * arbitrary depth limit for the XML documents that we allow to
264 * process. This is not a limitation of the parser but a safety
265 * boundary feature. It can be disabled with the XML_PARSE_HUGE
268 unsigned int xmlParserMaxDepth
= 256;
273 #define XML_PARSER_BIG_BUFFER_SIZE 300
274 #define XML_PARSER_BUFFER_SIZE 100
275 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
278 * XML_PARSER_CHUNK_SIZE
280 * When calling GROW that's the minimal amount of data
281 * the parser expected to have received. It is not a hard
282 * limit but an optimization when reading strings like Names
283 * It is not strictly needed as long as inputs available characters
284 * are followed by 0, which should be provided by the I/O level
286 #define XML_PARSER_CHUNK_SIZE 100
289 * List of XML prefixed PI allowed by W3C specs
292 static const char* const xmlW3CPIs
[] = {
299 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
300 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt
,
301 const xmlChar
**str
);
303 static xmlParserErrors
304 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
305 xmlSAXHandlerPtr sax
,
306 void *user_data
, int depth
, const xmlChar
*URL
,
307 const xmlChar
*ID
, xmlNodePtr
*list
);
310 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
,
311 const char *encoding
);
312 #ifdef LIBXML_LEGACY_ENABLED
314 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
315 xmlNodePtr lastNode
);
316 #endif /* LIBXML_LEGACY_ENABLED */
318 static xmlParserErrors
319 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
320 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
);
323 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
);
325 /************************************************************************
327 * Some factorized error routines *
329 ************************************************************************/
332 * xmlErrAttributeDup:
333 * @ctxt: an XML parser context
334 * @prefix: the attribute prefix
335 * @localname: the attribute localname
337 * Handle a redefinition of attribute error
340 xmlErrAttributeDup(xmlParserCtxtPtr ctxt
, const xmlChar
* prefix
,
341 const xmlChar
* localname
)
343 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
344 (ctxt
->instate
== XML_PARSER_EOF
))
347 ctxt
->errNo
= XML_ERR_ATTRIBUTE_REDEFINED
;
350 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
351 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
352 (const char *) localname
, NULL
, NULL
, 0, 0,
353 "Attribute %s redefined\n", localname
);
355 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
356 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
357 (const char *) prefix
, (const char *) localname
,
358 NULL
, 0, 0, "Attribute %s:%s redefined\n", prefix
,
361 ctxt
->wellFormed
= 0;
362 if (ctxt
->recovery
== 0)
363 ctxt
->disableSAX
= 1;
369 * @ctxt: an XML parser context
370 * @error: the error number
371 * @extra: extra information string
373 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
376 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
)
380 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
381 (ctxt
->instate
== XML_PARSER_EOF
))
384 case XML_ERR_INVALID_HEX_CHARREF
:
385 errmsg
= "CharRef: invalid hexadecimal value";
387 case XML_ERR_INVALID_DEC_CHARREF
:
388 errmsg
= "CharRef: invalid decimal value";
390 case XML_ERR_INVALID_CHARREF
:
391 errmsg
= "CharRef: invalid value";
393 case XML_ERR_INTERNAL_ERROR
:
394 errmsg
= "internal error";
396 case XML_ERR_PEREF_AT_EOF
:
397 errmsg
= "PEReference at end of document";
399 case XML_ERR_PEREF_IN_PROLOG
:
400 errmsg
= "PEReference in prolog";
402 case XML_ERR_PEREF_IN_EPILOG
:
403 errmsg
= "PEReference in epilog";
405 case XML_ERR_PEREF_NO_NAME
:
406 errmsg
= "PEReference: no name";
408 case XML_ERR_PEREF_SEMICOL_MISSING
:
409 errmsg
= "PEReference: expecting ';'";
411 case XML_ERR_ENTITY_LOOP
:
412 errmsg
= "Detected an entity reference loop";
414 case XML_ERR_ENTITY_NOT_STARTED
:
415 errmsg
= "EntityValue: \" or ' expected";
417 case XML_ERR_ENTITY_PE_INTERNAL
:
418 errmsg
= "PEReferences forbidden in internal subset";
420 case XML_ERR_ENTITY_NOT_FINISHED
:
421 errmsg
= "EntityValue: \" or ' expected";
423 case XML_ERR_ATTRIBUTE_NOT_STARTED
:
424 errmsg
= "AttValue: \" or ' expected";
426 case XML_ERR_LT_IN_ATTRIBUTE
:
427 errmsg
= "Unescaped '<' not allowed in attributes values";
429 case XML_ERR_LITERAL_NOT_STARTED
:
430 errmsg
= "SystemLiteral \" or ' expected";
432 case XML_ERR_LITERAL_NOT_FINISHED
:
433 errmsg
= "Unfinished System or Public ID \" or ' expected";
435 case XML_ERR_MISPLACED_CDATA_END
:
436 errmsg
= "Sequence ']]>' not allowed in content";
438 case XML_ERR_URI_REQUIRED
:
439 errmsg
= "SYSTEM or PUBLIC, the URI is missing";
441 case XML_ERR_PUBID_REQUIRED
:
442 errmsg
= "PUBLIC, the Public Identifier is missing";
444 case XML_ERR_HYPHEN_IN_COMMENT
:
445 errmsg
= "Comment must not contain '--' (double-hyphen)";
447 case XML_ERR_PI_NOT_STARTED
:
448 errmsg
= "xmlParsePI : no target name";
450 case XML_ERR_RESERVED_XML_NAME
:
451 errmsg
= "Invalid PI name";
453 case XML_ERR_NOTATION_NOT_STARTED
:
454 errmsg
= "NOTATION: Name expected here";
456 case XML_ERR_NOTATION_NOT_FINISHED
:
457 errmsg
= "'>' required to close NOTATION declaration";
459 case XML_ERR_VALUE_REQUIRED
:
460 errmsg
= "Entity value required";
462 case XML_ERR_URI_FRAGMENT
:
463 errmsg
= "Fragment not allowed";
465 case XML_ERR_ATTLIST_NOT_STARTED
:
466 errmsg
= "'(' required to start ATTLIST enumeration";
468 case XML_ERR_NMTOKEN_REQUIRED
:
469 errmsg
= "NmToken expected in ATTLIST enumeration";
471 case XML_ERR_ATTLIST_NOT_FINISHED
:
472 errmsg
= "')' required to finish ATTLIST enumeration";
474 case XML_ERR_MIXED_NOT_STARTED
:
475 errmsg
= "MixedContentDecl : '|' or ')*' expected";
477 case XML_ERR_PCDATA_REQUIRED
:
478 errmsg
= "MixedContentDecl : '#PCDATA' expected";
480 case XML_ERR_ELEMCONTENT_NOT_STARTED
:
481 errmsg
= "ContentDecl : Name or '(' expected";
483 case XML_ERR_ELEMCONTENT_NOT_FINISHED
:
484 errmsg
= "ContentDecl : ',' '|' or ')' expected";
486 case XML_ERR_PEREF_IN_INT_SUBSET
:
488 "PEReference: forbidden within markup decl in internal subset";
490 case XML_ERR_GT_REQUIRED
:
491 errmsg
= "expected '>'";
493 case XML_ERR_CONDSEC_INVALID
:
494 errmsg
= "XML conditional section '[' expected";
496 case XML_ERR_EXT_SUBSET_NOT_FINISHED
:
497 errmsg
= "Content error in the external subset";
499 case XML_ERR_CONDSEC_INVALID_KEYWORD
:
501 "conditional section INCLUDE or IGNORE keyword expected";
503 case XML_ERR_CONDSEC_NOT_FINISHED
:
504 errmsg
= "XML conditional section not closed";
506 case XML_ERR_XMLDECL_NOT_STARTED
:
507 errmsg
= "Text declaration '<?xml' required";
509 case XML_ERR_XMLDECL_NOT_FINISHED
:
510 errmsg
= "parsing XML declaration: '?>' expected";
512 case XML_ERR_EXT_ENTITY_STANDALONE
:
513 errmsg
= "external parsed entities cannot be standalone";
515 case XML_ERR_ENTITYREF_SEMICOL_MISSING
:
516 errmsg
= "EntityRef: expecting ';'";
518 case XML_ERR_DOCTYPE_NOT_FINISHED
:
519 errmsg
= "DOCTYPE improperly terminated";
521 case XML_ERR_LTSLASH_REQUIRED
:
522 errmsg
= "EndTag: '</' not found";
524 case XML_ERR_EQUAL_REQUIRED
:
525 errmsg
= "expected '='";
527 case XML_ERR_STRING_NOT_CLOSED
:
528 errmsg
= "String not closed expecting \" or '";
530 case XML_ERR_STRING_NOT_STARTED
:
531 errmsg
= "String not started expecting ' or \"";
533 case XML_ERR_ENCODING_NAME
:
534 errmsg
= "Invalid XML encoding name";
536 case XML_ERR_STANDALONE_VALUE
:
537 errmsg
= "standalone accepts only 'yes' or 'no'";
539 case XML_ERR_DOCUMENT_EMPTY
:
540 errmsg
= "Document is empty";
542 case XML_ERR_DOCUMENT_END
:
543 errmsg
= "Extra content at the end of the document";
545 case XML_ERR_NOT_WELL_BALANCED
:
546 errmsg
= "chunk is not well balanced";
548 case XML_ERR_EXTRA_CONTENT
:
549 errmsg
= "extra content at the end of well balanced chunk";
551 case XML_ERR_VERSION_MISSING
:
552 errmsg
= "Malformed declaration expecting version";
554 case XML_ERR_NAME_TOO_LONG
:
555 errmsg
= "Name too long use XML_PARSE_HUGE option";
563 errmsg
= "Unregistered error message";
568 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
569 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, "%s\n",
572 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
573 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, "%s: %s\n",
577 ctxt
->wellFormed
= 0;
578 if (ctxt
->recovery
== 0)
579 ctxt
->disableSAX
= 1;
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
591 static void LIBXML_ATTR_FORMAT(3,0)
592 xmlFatalErrMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
595 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
596 (ctxt
->instate
== XML_PARSER_EOF
))
600 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
601 XML_ERR_FATAL
, NULL
, 0, NULL
, NULL
, NULL
, 0, 0, "%s", msg
);
603 ctxt
->wellFormed
= 0;
604 if (ctxt
->recovery
== 0)
605 ctxt
->disableSAX
= 1;
611 * @ctxt: an XML parser context
612 * @error: the error number
613 * @msg: the error message
619 static void LIBXML_ATTR_FORMAT(3,0)
620 xmlWarningMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
621 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
623 xmlStructuredErrorFunc schannel
= NULL
;
625 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
626 (ctxt
->instate
== XML_PARSER_EOF
))
628 if ((ctxt
!= NULL
) && (ctxt
->sax
!= NULL
) &&
629 (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
630 schannel
= ctxt
->sax
->serror
;
632 __xmlRaiseError(schannel
,
633 (ctxt
->sax
) ? ctxt
->sax
->warning
: NULL
,
635 ctxt
, NULL
, XML_FROM_PARSER
, error
,
636 XML_ERR_WARNING
, NULL
, 0,
637 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
638 msg
, (const char *) str1
, (const char *) str2
);
640 __xmlRaiseError(schannel
, NULL
, NULL
,
641 ctxt
, NULL
, XML_FROM_PARSER
, error
,
642 XML_ERR_WARNING
, NULL
, 0,
643 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
644 msg
, (const char *) str1
, (const char *) str2
);
650 * @ctxt: an XML parser context
651 * @error: the error number
652 * @msg: the error message
655 * Handle a validity error.
657 static void LIBXML_ATTR_FORMAT(3,0)
658 xmlValidityError(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
659 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
661 xmlStructuredErrorFunc schannel
= NULL
;
663 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
664 (ctxt
->instate
== XML_PARSER_EOF
))
668 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
669 schannel
= ctxt
->sax
->serror
;
672 __xmlRaiseError(schannel
,
673 ctxt
->vctxt
.error
, ctxt
->vctxt
.userData
,
674 ctxt
, NULL
, XML_FROM_DTD
, error
,
675 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
676 (const char *) str2
, NULL
, 0, 0,
677 msg
, (const char *) str1
, (const char *) str2
);
680 __xmlRaiseError(schannel
, NULL
, NULL
,
681 ctxt
, NULL
, XML_FROM_DTD
, error
,
682 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
683 (const char *) str2
, NULL
, 0, 0,
684 msg
, (const char *) str1
, (const char *) str2
);
690 * @ctxt: an XML parser context
691 * @error: the error number
692 * @msg: the error message
693 * @val: an integer value
695 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
697 static void LIBXML_ATTR_FORMAT(3,0)
698 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
699 const char *msg
, int val
)
701 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
702 (ctxt
->instate
== XML_PARSER_EOF
))
706 __xmlRaiseError(NULL
, NULL
, NULL
,
707 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
708 NULL
, 0, NULL
, NULL
, NULL
, val
, 0, msg
, val
);
710 ctxt
->wellFormed
= 0;
711 if (ctxt
->recovery
== 0)
712 ctxt
->disableSAX
= 1;
717 * xmlFatalErrMsgStrIntStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @str1: an string info
722 * @val: an integer value
723 * @str2: an string info
725 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
727 static void LIBXML_ATTR_FORMAT(3,0)
728 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
729 const char *msg
, const xmlChar
*str1
, int val
,
732 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
733 (ctxt
->instate
== XML_PARSER_EOF
))
737 __xmlRaiseError(NULL
, NULL
, NULL
,
738 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
739 NULL
, 0, (const char *) str1
, (const char *) str2
,
740 NULL
, val
, 0, msg
, str1
, val
, str2
);
742 ctxt
->wellFormed
= 0;
743 if (ctxt
->recovery
== 0)
744 ctxt
->disableSAX
= 1;
750 * @ctxt: an XML parser context
751 * @error: the error number
752 * @msg: the error message
753 * @val: a string value
755 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
757 static void LIBXML_ATTR_FORMAT(3,0)
758 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
759 const char *msg
, const xmlChar
* val
)
761 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
762 (ctxt
->instate
== XML_PARSER_EOF
))
766 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
767 XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
768 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
771 ctxt
->wellFormed
= 0;
772 if (ctxt
->recovery
== 0)
773 ctxt
->disableSAX
= 1;
779 * @ctxt: an XML parser context
780 * @error: the error number
781 * @msg: the error message
782 * @val: a string value
784 * Handle a non fatal parser error
786 static void LIBXML_ATTR_FORMAT(3,0)
787 xmlErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
788 const char *msg
, const xmlChar
* val
)
790 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
791 (ctxt
->instate
== XML_PARSER_EOF
))
795 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
796 XML_FROM_PARSER
, error
, XML_ERR_ERROR
,
797 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
803 * @ctxt: an XML parser context
804 * @error: the error number
806 * @info1: extra information string
807 * @info2: extra information string
809 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
811 static void LIBXML_ATTR_FORMAT(3,0)
812 xmlNsErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
814 const xmlChar
* info1
, const xmlChar
* info2
,
815 const xmlChar
* info3
)
817 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
818 (ctxt
->instate
== XML_PARSER_EOF
))
822 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
823 XML_ERR_ERROR
, NULL
, 0, (const char *) info1
,
824 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
825 info1
, info2
, info3
);
827 ctxt
->nsWellFormed
= 0;
832 * @ctxt: an XML parser context
833 * @error: the error number
835 * @info1: extra information string
836 * @info2: extra information string
838 * Handle a namespace warning error
840 static void LIBXML_ATTR_FORMAT(3,0)
841 xmlNsWarn(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
843 const xmlChar
* info1
, const xmlChar
* info2
,
844 const xmlChar
* info3
)
846 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
847 (ctxt
->instate
== XML_PARSER_EOF
))
849 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
850 XML_ERR_WARNING
, NULL
, 0, (const char *) info1
,
851 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
852 info1
, info2
, info3
);
855 /************************************************************************
857 * Library wide options *
859 ************************************************************************/
863 * @feature: the feature to be examined
865 * Examines if the library has been compiled with a given feature.
867 * Returns a non-zero value if the feature exist, otherwise zero.
868 * Returns zero (0) if the feature does not exist or an unknown
869 * unknown feature is requested, non-zero otherwise.
872 xmlHasFeature(xmlFeature feature
)
875 case XML_WITH_THREAD
:
876 #ifdef LIBXML_THREAD_ENABLED
882 #ifdef LIBXML_TREE_ENABLED
887 case XML_WITH_OUTPUT
:
888 #ifdef LIBXML_OUTPUT_ENABLED
894 #ifdef LIBXML_PUSH_ENABLED
899 case XML_WITH_READER
:
900 #ifdef LIBXML_READER_ENABLED
905 case XML_WITH_PATTERN
:
906 #ifdef LIBXML_PATTERN_ENABLED
911 case XML_WITH_WRITER
:
912 #ifdef LIBXML_WRITER_ENABLED
918 #ifdef LIBXML_SAX1_ENABLED
924 #ifdef LIBXML_FTP_ENABLED
930 #ifdef LIBXML_HTTP_ENABLED
936 #ifdef LIBXML_VALID_ENABLED
942 #ifdef LIBXML_HTML_ENABLED
947 case XML_WITH_LEGACY
:
948 #ifdef LIBXML_LEGACY_ENABLED
954 #ifdef LIBXML_C14N_ENABLED
959 case XML_WITH_CATALOG
:
960 #ifdef LIBXML_CATALOG_ENABLED
966 #ifdef LIBXML_XPATH_ENABLED
972 #ifdef LIBXML_XPTR_ENABLED
977 case XML_WITH_XINCLUDE
:
978 #ifdef LIBXML_XINCLUDE_ENABLED
984 #ifdef LIBXML_ICONV_ENABLED
989 case XML_WITH_ISO8859X
:
990 #ifdef LIBXML_ISO8859X_ENABLED
995 case XML_WITH_UNICODE
:
996 #ifdef LIBXML_UNICODE_ENABLED
1001 case XML_WITH_REGEXP
:
1002 #ifdef LIBXML_REGEXP_ENABLED
1007 case XML_WITH_AUTOMATA
:
1008 #ifdef LIBXML_AUTOMATA_ENABLED
1014 #ifdef LIBXML_EXPR_ENABLED
1019 case XML_WITH_SCHEMAS
:
1020 #ifdef LIBXML_SCHEMAS_ENABLED
1025 case XML_WITH_SCHEMATRON
:
1026 #ifdef LIBXML_SCHEMATRON_ENABLED
1031 case XML_WITH_MODULES
:
1032 #ifdef LIBXML_MODULES_ENABLED
1037 case XML_WITH_DEBUG
:
1038 #ifdef LIBXML_DEBUG_ENABLED
1043 case XML_WITH_DEBUG_MEM
:
1044 #ifdef DEBUG_MEMORY_LOCATION
1049 case XML_WITH_DEBUG_RUN
:
1050 #ifdef LIBXML_DEBUG_RUNTIME
1056 #ifdef LIBXML_ZLIB_ENABLED
1062 #ifdef LIBXML_LZMA_ENABLED
1068 #ifdef LIBXML_ICU_ENABLED
1079 /************************************************************************
1081 * SAX2 defaulted attributes handling *
1083 ************************************************************************/
1087 * @ctxt: an XML parser context
1089 * Do the SAX2 detection and specific initialization
1092 xmlDetectSAX2(xmlParserCtxtPtr ctxt
) {
1093 xmlSAXHandlerPtr sax
;
1095 /* Avoid unused variable warning if features are disabled. */
1098 if (ctxt
== NULL
) return;
1100 #ifdef LIBXML_SAX1_ENABLED
1101 if ((sax
) && (sax
->initialized
== XML_SAX2_MAGIC
) &&
1102 ((sax
->startElementNs
!= NULL
) ||
1103 (sax
->endElementNs
!= NULL
) ||
1104 ((sax
->startElement
== NULL
) && (sax
->endElement
== NULL
))))
1108 #endif /* LIBXML_SAX1_ENABLED */
1110 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
1111 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
1112 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
1113 if ((ctxt
->str_xml
==NULL
) || (ctxt
->str_xmlns
==NULL
) ||
1114 (ctxt
->str_xml_ns
== NULL
)) {
1115 xmlErrMemory(ctxt
, NULL
);
1119 typedef struct _xmlDefAttrs xmlDefAttrs
;
1120 typedef xmlDefAttrs
*xmlDefAttrsPtr
;
1121 struct _xmlDefAttrs
{
1122 int nbAttrs
; /* number of defaulted attributes on that element */
1123 int maxAttrs
; /* the size of the array */
1124 #if __STDC_VERSION__ >= 199901L
1125 /* Using a C99 flexible array member avoids UBSan errors. */
1126 const xmlChar
*values
[]; /* array of localname/prefix/values/external */
1128 const xmlChar
*values
[5];
1133 * xmlAttrNormalizeSpace:
1134 * @src: the source string
1135 * @dst: the target string
1137 * Normalize the space in non CDATA attribute values:
1138 * If the attribute type is not CDATA, then the XML processor MUST further
1139 * process the normalized attribute value by discarding any leading and
1140 * trailing space (#x20) characters, and by replacing sequences of space
1141 * (#x20) characters by a single space (#x20) character.
1142 * Note that the size of dst need to be at least src, and if one doesn't need
1143 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1144 * passing src as dst is just fine.
1146 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1150 xmlAttrNormalizeSpace(const xmlChar
*src
, xmlChar
*dst
)
1152 if ((src
== NULL
) || (dst
== NULL
))
1155 while (*src
== 0x20) src
++;
1158 while (*src
== 0x20) src
++;
1172 * xmlAttrNormalizeSpace2:
1173 * @src: the source string
1175 * Normalize the space in non CDATA attribute values, a slightly more complex
1176 * front end to avoid allocation problems when running on attribute values
1177 * coming from the input.
1179 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1182 static const xmlChar
*
1183 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt
, xmlChar
*src
, int *len
)
1186 int remove_head
= 0;
1187 int need_realloc
= 0;
1190 if ((ctxt
== NULL
) || (src
== NULL
) || (len
== NULL
))
1197 while (*cur
== 0x20) {
1204 if ((*cur
== 0x20) || (*cur
== 0)) {
1214 ret
= xmlStrndup(src
+ remove_head
, i
- remove_head
+ 1);
1216 xmlErrMemory(ctxt
, NULL
);
1219 xmlAttrNormalizeSpace(ret
, ret
);
1220 *len
= (int) strlen((const char *)ret
);
1222 } else if (remove_head
) {
1223 *len
-= remove_head
;
1224 memmove(src
, src
+ remove_head
, 1 + *len
);
1232 * @ctxt: an XML parser context
1233 * @fullname: the element fullname
1234 * @fullattr: the attribute fullname
1235 * @value: the attribute value
1237 * Add a defaulted attribute for an element
1240 xmlAddDefAttrs(xmlParserCtxtPtr ctxt
,
1241 const xmlChar
*fullname
,
1242 const xmlChar
*fullattr
,
1243 const xmlChar
*value
) {
1244 xmlDefAttrsPtr defaults
;
1246 const xmlChar
*name
;
1247 const xmlChar
*prefix
;
1250 * Allows to detect attribute redefinitions
1252 if (ctxt
->attsSpecial
!= NULL
) {
1253 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1257 if (ctxt
->attsDefault
== NULL
) {
1258 ctxt
->attsDefault
= xmlHashCreateDict(10, ctxt
->dict
);
1259 if (ctxt
->attsDefault
== NULL
)
1264 * split the element name into prefix:localname , the string found
1265 * are within the DTD and then not associated to namespace names.
1267 name
= xmlSplitQName3(fullname
, &len
);
1269 name
= xmlDictLookup(ctxt
->dict
, fullname
, -1);
1272 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1273 prefix
= xmlDictLookup(ctxt
->dict
, fullname
, len
);
1277 * make sure there is some storage
1279 defaults
= xmlHashLookup2(ctxt
->attsDefault
, name
, prefix
);
1280 if (defaults
== NULL
) {
1281 defaults
= (xmlDefAttrsPtr
) xmlMalloc(sizeof(xmlDefAttrs
) +
1282 (4 * 5) * sizeof(const xmlChar
*));
1283 if (defaults
== NULL
)
1285 defaults
->nbAttrs
= 0;
1286 defaults
->maxAttrs
= 4;
1287 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1288 defaults
, NULL
) < 0) {
1292 } else if (defaults
->nbAttrs
>= defaults
->maxAttrs
) {
1293 xmlDefAttrsPtr temp
;
1295 temp
= (xmlDefAttrsPtr
) xmlRealloc(defaults
, sizeof(xmlDefAttrs
) +
1296 (2 * defaults
->maxAttrs
* 5) * sizeof(const xmlChar
*));
1300 defaults
->maxAttrs
*= 2;
1301 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1302 defaults
, NULL
) < 0) {
1309 * Split the element name into prefix:localname , the string found
1310 * are within the DTD and hen not associated to namespace names.
1312 name
= xmlSplitQName3(fullattr
, &len
);
1314 name
= xmlDictLookup(ctxt
->dict
, fullattr
, -1);
1317 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1318 prefix
= xmlDictLookup(ctxt
->dict
, fullattr
, len
);
1321 defaults
->values
[5 * defaults
->nbAttrs
] = name
;
1322 defaults
->values
[5 * defaults
->nbAttrs
+ 1] = prefix
;
1323 /* intern the string and precompute the end */
1324 len
= xmlStrlen(value
);
1325 value
= xmlDictLookup(ctxt
->dict
, value
, len
);
1326 defaults
->values
[5 * defaults
->nbAttrs
+ 2] = value
;
1327 defaults
->values
[5 * defaults
->nbAttrs
+ 3] = value
+ len
;
1329 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = BAD_CAST
"external";
1331 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = NULL
;
1332 defaults
->nbAttrs
++;
1337 xmlErrMemory(ctxt
, NULL
);
1342 * xmlAddSpecialAttr:
1343 * @ctxt: an XML parser context
1344 * @fullname: the element fullname
1345 * @fullattr: the attribute fullname
1346 * @type: the attribute type
1348 * Register this attribute type
1351 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt
,
1352 const xmlChar
*fullname
,
1353 const xmlChar
*fullattr
,
1356 if (ctxt
->attsSpecial
== NULL
) {
1357 ctxt
->attsSpecial
= xmlHashCreateDict(10, ctxt
->dict
);
1358 if (ctxt
->attsSpecial
== NULL
)
1362 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1365 xmlHashAddEntry2(ctxt
->attsSpecial
, fullname
, fullattr
,
1366 (void *) (ptrdiff_t) type
);
1370 xmlErrMemory(ctxt
, NULL
);
1375 * xmlCleanSpecialAttrCallback:
1377 * Removes CDATA attributes from the special attribute table
1380 xmlCleanSpecialAttrCallback(void *payload
, void *data
,
1381 const xmlChar
*fullname
, const xmlChar
*fullattr
,
1382 const xmlChar
*unused ATTRIBUTE_UNUSED
) {
1383 xmlParserCtxtPtr ctxt
= (xmlParserCtxtPtr
) data
;
1385 if (((ptrdiff_t) payload
) == XML_ATTRIBUTE_CDATA
) {
1386 xmlHashRemoveEntry2(ctxt
->attsSpecial
, fullname
, fullattr
, NULL
);
1391 * xmlCleanSpecialAttr:
1392 * @ctxt: an XML parser context
1394 * Trim the list of attributes defined to remove all those of type
1395 * CDATA as they are not special. This call should be done when finishing
1396 * to parse the DTD and before starting to parse the document root.
1399 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt
)
1401 if (ctxt
->attsSpecial
== NULL
)
1404 xmlHashScanFull(ctxt
->attsSpecial
, xmlCleanSpecialAttrCallback
, ctxt
);
1406 if (xmlHashSize(ctxt
->attsSpecial
) == 0) {
1407 xmlHashFree(ctxt
->attsSpecial
, NULL
);
1408 ctxt
->attsSpecial
= NULL
;
1414 * xmlCheckLanguageID:
1415 * @lang: pointer to the string value
1417 * Checks that the value conforms to the LanguageID production:
1419 * NOTE: this is somewhat deprecated, those productions were removed from
1420 * the XML Second edition.
1422 * [33] LanguageID ::= Langcode ('-' Subcode)*
1423 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1424 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1425 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1426 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1427 * [38] Subcode ::= ([a-z] | [A-Z])+
1429 * The current REC reference the successors of RFC 1766, currently 5646
1431 * http://www.rfc-editor.org/rfc/rfc5646.txt
1432 * langtag = language
1438 * language = 2*3ALPHA ; shortest ISO 639 code
1439 * ["-" extlang] ; sometimes followed by
1440 * ; extended language subtags
1441 * / 4ALPHA ; or reserved for future use
1442 * / 5*8ALPHA ; or registered language subtag
1444 * extlang = 3ALPHA ; selected ISO 639 codes
1445 * *2("-" 3ALPHA) ; permanently reserved
1447 * script = 4ALPHA ; ISO 15924 code
1449 * region = 2ALPHA ; ISO 3166-1 code
1450 * / 3DIGIT ; UN M.49 code
1452 * variant = 5*8alphanum ; registered variants
1453 * / (DIGIT 3alphanum)
1455 * extension = singleton 1*("-" (2*8alphanum))
1457 * ; Single alphanumerics
1458 * ; "x" reserved for private use
1459 * singleton = DIGIT ; 0 - 9
1465 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1466 * The parser below doesn't try to cope with extension or privateuse
1467 * that could be added but that's not interoperable anyway
1469 * Returns 1 if correct 0 otherwise
1472 xmlCheckLanguageID(const xmlChar
* lang
)
1474 const xmlChar
*cur
= lang
, *nxt
;
1478 if (((cur
[0] == 'i') && (cur
[1] == '-')) ||
1479 ((cur
[0] == 'I') && (cur
[1] == '-')) ||
1480 ((cur
[0] == 'x') && (cur
[1] == '-')) ||
1481 ((cur
[0] == 'X') && (cur
[1] == '-'))) {
1483 * Still allow IANA code and user code which were coming
1484 * from the previous version of the XML-1.0 specification
1485 * it's deprecated but we should not fail
1488 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1489 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1491 return(cur
[0] == 0);
1494 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1495 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1497 if (nxt
- cur
>= 4) {
1501 if ((nxt
- cur
> 8) || (nxt
[0] != 0))
1507 /* we got an ISO 639 code */
1515 /* now we can have extlang or script or region or variant */
1516 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1519 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1520 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1526 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1530 /* we parsed an extlang */
1538 /* now we can have script or region or variant */
1539 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1542 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1543 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1547 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1551 /* we parsed a script */
1560 /* now we can have region or variant */
1561 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1564 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1565 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1568 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1572 /* we parsed a region */
1581 /* now we can just have a variant */
1582 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1583 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1586 if ((nxt
- cur
< 5) || (nxt
- cur
> 8))
1589 /* we parsed a variant */
1595 /* extensions and private use subtags not checked */
1599 if (((nxt
[1] >= '0') && (nxt
[1] <= '9')) &&
1600 ((nxt
[2] >= '0') && (nxt
[2] <= '9'))) {
1607 /************************************************************************
1609 * Parser stacks related functions and macros *
1611 ************************************************************************/
1613 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
,
1614 const xmlChar
** str
);
1619 * @ctxt: an XML parser context
1620 * @prefix: the namespace prefix or NULL
1621 * @URL: the namespace name
1623 * Pushes a new parser namespace on top of the ns stack
1625 * Returns -1 in case of error, -2 if the namespace should be discarded
1626 * and the index in the stack otherwise.
1629 nsPush(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
, const xmlChar
*URL
)
1631 if (ctxt
->options
& XML_PARSE_NSCLEAN
) {
1633 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-= 2) {
1634 if (ctxt
->nsTab
[i
] == prefix
) {
1636 if (ctxt
->nsTab
[i
+ 1] == URL
)
1638 /* out of scope keep it */
1643 if ((ctxt
->nsMax
== 0) || (ctxt
->nsTab
== NULL
)) {
1646 ctxt
->nsTab
= (const xmlChar
**)
1647 xmlMalloc(ctxt
->nsMax
* sizeof(xmlChar
*));
1648 if (ctxt
->nsTab
== NULL
) {
1649 xmlErrMemory(ctxt
, NULL
);
1653 } else if (ctxt
->nsNr
>= ctxt
->nsMax
) {
1654 const xmlChar
** tmp
;
1656 tmp
= (const xmlChar
**) xmlRealloc((char *) ctxt
->nsTab
,
1657 ctxt
->nsMax
* sizeof(ctxt
->nsTab
[0]));
1659 xmlErrMemory(ctxt
, NULL
);
1665 ctxt
->nsTab
[ctxt
->nsNr
++] = prefix
;
1666 ctxt
->nsTab
[ctxt
->nsNr
++] = URL
;
1667 return (ctxt
->nsNr
);
1671 * @ctxt: an XML parser context
1672 * @nr: the number to pop
1674 * Pops the top @nr parser prefix/namespace from the ns stack
1676 * Returns the number of namespaces removed
1679 nsPop(xmlParserCtxtPtr ctxt
, int nr
)
1683 if (ctxt
->nsTab
== NULL
) return(0);
1684 if (ctxt
->nsNr
< nr
) {
1685 xmlGenericError(xmlGenericErrorContext
, "Pbm popping %d NS\n", nr
);
1688 if (ctxt
->nsNr
<= 0)
1691 for (i
= 0;i
< nr
;i
++) {
1693 ctxt
->nsTab
[ctxt
->nsNr
] = NULL
;
1700 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt
, int nr
) {
1701 const xmlChar
**atts
;
1705 if (ctxt
->atts
== NULL
) {
1706 maxatts
= 55; /* allow for 10 attrs by default */
1707 atts
= (const xmlChar
**)
1708 xmlMalloc(maxatts
* sizeof(xmlChar
*));
1709 if (atts
== NULL
) goto mem_error
;
1711 attallocs
= (int *) xmlMalloc((maxatts
/ 5) * sizeof(int));
1712 if (attallocs
== NULL
) goto mem_error
;
1713 ctxt
->attallocs
= attallocs
;
1714 ctxt
->maxatts
= maxatts
;
1715 } else if (nr
+ 5 > ctxt
->maxatts
) {
1716 maxatts
= (nr
+ 5) * 2;
1717 atts
= (const xmlChar
**) xmlRealloc((void *) ctxt
->atts
,
1718 maxatts
* sizeof(const xmlChar
*));
1719 if (atts
== NULL
) goto mem_error
;
1721 attallocs
= (int *) xmlRealloc((void *) ctxt
->attallocs
,
1722 (maxatts
/ 5) * sizeof(int));
1723 if (attallocs
== NULL
) goto mem_error
;
1724 ctxt
->attallocs
= attallocs
;
1725 ctxt
->maxatts
= maxatts
;
1727 return(ctxt
->maxatts
);
1729 xmlErrMemory(ctxt
, NULL
);
1735 * @ctxt: an XML parser context
1736 * @value: the parser input
1738 * Pushes a new parser input on top of the input stack
1740 * Returns -1 in case of error, the index in the stack otherwise
1743 inputPush(xmlParserCtxtPtr ctxt
, xmlParserInputPtr value
)
1745 if ((ctxt
== NULL
) || (value
== NULL
))
1747 if (ctxt
->inputNr
>= ctxt
->inputMax
) {
1748 ctxt
->inputMax
*= 2;
1750 (xmlParserInputPtr
*) xmlRealloc(ctxt
->inputTab
,
1752 sizeof(ctxt
->inputTab
[0]));
1753 if (ctxt
->inputTab
== NULL
) {
1754 xmlErrMemory(ctxt
, NULL
);
1755 ctxt
->inputMax
/= 2;
1759 ctxt
->inputTab
[ctxt
->inputNr
] = value
;
1760 ctxt
->input
= value
;
1761 return (ctxt
->inputNr
++);
1765 * @ctxt: an XML parser context
1767 * Pops the top parser input from the input stack
1769 * Returns the input just removed
1772 inputPop(xmlParserCtxtPtr ctxt
)
1774 xmlParserInputPtr ret
;
1778 if (ctxt
->inputNr
<= 0)
1781 if (ctxt
->inputNr
> 0)
1782 ctxt
->input
= ctxt
->inputTab
[ctxt
->inputNr
- 1];
1785 ret
= ctxt
->inputTab
[ctxt
->inputNr
];
1786 ctxt
->inputTab
[ctxt
->inputNr
] = NULL
;
1791 * @ctxt: an XML parser context
1792 * @value: the element node
1794 * Pushes a new element node on top of the node stack
1796 * Returns -1 in case of error, the index in the stack otherwise
1799 nodePush(xmlParserCtxtPtr ctxt
, xmlNodePtr value
)
1801 if (ctxt
== NULL
) return(0);
1802 if (ctxt
->nodeNr
>= ctxt
->nodeMax
) {
1805 tmp
= (xmlNodePtr
*) xmlRealloc(ctxt
->nodeTab
,
1807 sizeof(ctxt
->nodeTab
[0]));
1809 xmlErrMemory(ctxt
, NULL
);
1812 ctxt
->nodeTab
= tmp
;
1815 if ((((unsigned int) ctxt
->nodeNr
) > xmlParserMaxDepth
) &&
1816 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
1817 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
1818 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1820 xmlHaltParser(ctxt
);
1823 ctxt
->nodeTab
[ctxt
->nodeNr
] = value
;
1825 return (ctxt
->nodeNr
++);
1830 * @ctxt: an XML parser context
1832 * Pops the top element node from the node stack
1834 * Returns the node just removed
1837 nodePop(xmlParserCtxtPtr ctxt
)
1841 if (ctxt
== NULL
) return(NULL
);
1842 if (ctxt
->nodeNr
<= 0)
1845 if (ctxt
->nodeNr
> 0)
1846 ctxt
->node
= ctxt
->nodeTab
[ctxt
->nodeNr
- 1];
1849 ret
= ctxt
->nodeTab
[ctxt
->nodeNr
];
1850 ctxt
->nodeTab
[ctxt
->nodeNr
] = NULL
;
1856 * @ctxt: an XML parser context
1857 * @value: the element name
1858 * @prefix: the element prefix
1859 * @URI: the element namespace name
1860 * @line: the current line number for error messages
1861 * @nsNr: the number of namespaces pushed on the namespace table
1863 * Pushes a new element name/prefix/URL on top of the name stack
1865 * Returns -1 in case of error, the index in the stack otherwise
1868 nameNsPush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
,
1869 const xmlChar
*prefix
, const xmlChar
*URI
, int line
, int nsNr
)
1873 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1874 const xmlChar
* *tmp
;
1877 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1879 sizeof(ctxt
->nameTab
[0]));
1884 ctxt
->nameTab
= tmp
;
1885 tmp2
= (xmlStartTag
*) xmlRealloc((void * *)ctxt
->pushTab
,
1887 sizeof(ctxt
->pushTab
[0]));
1892 ctxt
->pushTab
= tmp2
;
1893 } else if (ctxt
->pushTab
== NULL
) {
1894 ctxt
->pushTab
= (xmlStartTag
*) xmlMalloc(ctxt
->nameMax
*
1895 sizeof(ctxt
->pushTab
[0]));
1896 if (ctxt
->pushTab
== NULL
)
1899 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1901 tag
= &ctxt
->pushTab
[ctxt
->nameNr
];
1902 tag
->prefix
= prefix
;
1906 return (ctxt
->nameNr
++);
1908 xmlErrMemory(ctxt
, NULL
);
1911 #ifdef LIBXML_PUSH_ENABLED
1914 * @ctxt: an XML parser context
1916 * Pops the top element/prefix/URI name from the name stack
1918 * Returns the name just removed
1920 static const xmlChar
*
1921 nameNsPop(xmlParserCtxtPtr ctxt
)
1925 if (ctxt
->nameNr
<= 0)
1928 if (ctxt
->nameNr
> 0)
1929 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1932 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1933 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1936 #endif /* LIBXML_PUSH_ENABLED */
1940 * @ctxt: an XML parser context
1941 * @value: the element name
1943 * Pushes a new element name on top of the name stack
1945 * Returns -1 in case of error, the index in the stack otherwise
1948 namePush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
)
1950 if (ctxt
== NULL
) return (-1);
1952 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1953 const xmlChar
* *tmp
;
1954 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1956 sizeof(ctxt
->nameTab
[0]));
1960 ctxt
->nameTab
= tmp
;
1963 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1965 return (ctxt
->nameNr
++);
1967 xmlErrMemory(ctxt
, NULL
);
1972 * @ctxt: an XML parser context
1974 * Pops the top element name from the name stack
1976 * Returns the name just removed
1979 namePop(xmlParserCtxtPtr ctxt
)
1983 if ((ctxt
== NULL
) || (ctxt
->nameNr
<= 0))
1986 if (ctxt
->nameNr
> 0)
1987 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1990 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1991 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1995 static int spacePush(xmlParserCtxtPtr ctxt
, int val
) {
1996 if (ctxt
->spaceNr
>= ctxt
->spaceMax
) {
1999 ctxt
->spaceMax
*= 2;
2000 tmp
= (int *) xmlRealloc(ctxt
->spaceTab
,
2001 ctxt
->spaceMax
* sizeof(ctxt
->spaceTab
[0]));
2003 xmlErrMemory(ctxt
, NULL
);
2007 ctxt
->spaceTab
= tmp
;
2009 ctxt
->spaceTab
[ctxt
->spaceNr
] = val
;
2010 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
];
2011 return(ctxt
->spaceNr
++);
2014 static int spacePop(xmlParserCtxtPtr ctxt
) {
2016 if (ctxt
->spaceNr
<= 0) return(0);
2018 if (ctxt
->spaceNr
> 0)
2019 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
- 1];
2021 ctxt
->space
= &ctxt
->spaceTab
[0];
2022 ret
= ctxt
->spaceTab
[ctxt
->spaceNr
];
2023 ctxt
->spaceTab
[ctxt
->spaceNr
] = -1;
2028 * Macros for accessing the content. Those should be used only by the parser,
2031 * Dirty macros, i.e. one often need to make assumption on the context to
2034 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2035 * To be used with extreme caution since operations consuming
2036 * characters may move the input buffer to a different location !
2037 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2038 * This should be used internally by the parser
2039 * only to compare to ASCII values otherwise it would break when
2040 * running with UTF-8 encoding.
2041 * RAW same as CUR but in the input buffer, bypass any token
2042 * extraction that may have been done
2043 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2044 * to compare on ASCII based substring.
2045 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2046 * strings without newlines within the parser.
2047 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2048 * defined char within the parser.
2049 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2051 * NEXT Skip to the next character, this does the proper decoding
2052 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2053 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2054 * CUR_CHAR(l) returns the current unicode character (int), set l
2055 * to the number of xmlChars used for the encoding [0-5].
2056 * CUR_SCHAR same but operate on a string instead of the context
2057 * COPY_BUF copy the current unicode char to the target buffer, increment
2059 * GROW, SHRINK handling of input buffers
2062 #define RAW (*ctxt->input->cur)
2063 #define CUR (*ctxt->input->cur)
2064 #define NXT(val) ctxt->input->cur[(val)]
2065 #define CUR_PTR ctxt->input->cur
2066 #define BASE_PTR ctxt->input->base
2068 #define CMP4( s, c1, c2, c3, c4 ) \
2069 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2070 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2071 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2072 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2073 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2074 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2075 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2076 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2077 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2078 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2079 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2080 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2081 ((unsigned char *) s)[ 8 ] == c9 )
2082 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2083 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2084 ((unsigned char *) s)[ 9 ] == c10 )
2086 #define SKIP(val) do { \
2087 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2088 if (*ctxt->input->cur == 0) \
2089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2092 #define SKIPL(val) do { \
2094 for(skipl=0; skipl<val; skipl++) { \
2095 if (*(ctxt->input->cur) == '\n') { \
2096 ctxt->input->line++; ctxt->input->col = 1; \
2097 } else ctxt->input->col++; \
2098 ctxt->input->cur++; \
2100 if (*ctxt->input->cur == 0) \
2101 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2104 #define SHRINK if ((ctxt->progressive == 0) && \
2105 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2106 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2109 static void xmlSHRINK (xmlParserCtxtPtr ctxt
) {
2110 xmlParserInputShrink(ctxt
->input
);
2111 if (*ctxt
->input
->cur
== 0)
2112 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2115 #define GROW if ((ctxt->progressive == 0) && \
2116 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2119 static void xmlGROW (xmlParserCtxtPtr ctxt
) {
2120 ptrdiff_t curEnd
= ctxt
->input
->end
- ctxt
->input
->cur
;
2121 ptrdiff_t curBase
= ctxt
->input
->cur
- ctxt
->input
->base
;
2123 if (((curEnd
> XML_MAX_LOOKUP_LIMIT
) ||
2124 (curBase
> XML_MAX_LOOKUP_LIMIT
)) &&
2125 ((ctxt
->input
->buf
) &&
2126 (ctxt
->input
->buf
->readcallback
!= xmlInputReadCallbackNop
)) &&
2127 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
2128 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "Huge input lookup");
2129 xmlHaltParser(ctxt
);
2132 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2133 if ((ctxt
->input
->cur
> ctxt
->input
->end
) ||
2134 (ctxt
->input
->cur
< ctxt
->input
->base
)) {
2135 xmlHaltParser(ctxt
);
2136 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "cur index out of bound");
2139 if ((ctxt
->input
->cur
!= NULL
) && (*ctxt
->input
->cur
== 0))
2140 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2143 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2145 #define NEXT xmlNextChar(ctxt)
2148 ctxt->input->col++; \
2149 ctxt->input->cur++; \
2150 if (*ctxt->input->cur == 0) \
2151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2154 #define NEXTL(l) do { \
2155 if (*(ctxt->input->cur) == '\n') { \
2156 ctxt->input->line++; ctxt->input->col = 1; \
2157 } else ctxt->input->col++; \
2158 ctxt->input->cur += l; \
2161 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2162 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2164 #define COPY_BUF(l,b,i,v) \
2165 if (l == 1) b[i++] = (xmlChar) v; \
2166 else i += xmlCopyCharMultiByte(&b[i],v)
2168 #define CUR_CONSUMED \
2169 (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2172 * xmlSkipBlankChars:
2173 * @ctxt: the XML parser context
2175 * skip all blanks character found at that point in the input streams.
2176 * It pops up finished entities in the process if allowable at that point.
2178 * Returns the number of space chars skipped
2182 xmlSkipBlankChars(xmlParserCtxtPtr ctxt
) {
2186 * It's Okay to use CUR/NEXT here since all the blanks are on
2189 if (((ctxt
->inputNr
== 1) && (ctxt
->instate
!= XML_PARSER_DTD
)) ||
2190 (ctxt
->instate
== XML_PARSER_START
)) {
2193 * if we are in the document content, go really fast
2195 cur
= ctxt
->input
->cur
;
2196 while (IS_BLANK_CH(*cur
)) {
2198 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
2206 ctxt
->input
->cur
= cur
;
2207 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2208 cur
= ctxt
->input
->cur
;
2211 ctxt
->input
->cur
= cur
;
2213 int expandPE
= ((ctxt
->external
!= 0) || (ctxt
->inputNr
!= 1));
2216 if (IS_BLANK_CH(CUR
)) { /* CHECKED tstblanks.xml */
2218 } else if (CUR
== '%') {
2220 * Need to handle support of entities branching here
2222 if ((expandPE
== 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2224 xmlParsePEReference(ctxt
);
2225 } else if (CUR
== 0) {
2226 if (ctxt
->inputNr
<= 1)
2234 * Also increase the counter when entering or exiting a PERef.
2235 * The spec says: "When a parameter-entity reference is recognized
2236 * in the DTD and included, its replacement text MUST be enlarged
2237 * by the attachment of one leading and one following space (#x20)
2247 /************************************************************************
2249 * Commodity functions to handle entities *
2251 ************************************************************************/
2255 * @ctxt: an XML parser context
2257 * xmlPopInput: the current input pointed by ctxt->input came to an end
2258 * pop it and return the next char.
2260 * Returns the current xmlChar in the parser context
2263 xmlPopInput(xmlParserCtxtPtr ctxt
) {
2264 if ((ctxt
== NULL
) || (ctxt
->inputNr
<= 1)) return(0);
2265 if (xmlParserDebugEntities
)
2266 xmlGenericError(xmlGenericErrorContext
,
2267 "Popping input %d\n", ctxt
->inputNr
);
2268 if ((ctxt
->inputNr
> 1) && (ctxt
->inSubset
== 0) &&
2269 (ctxt
->instate
!= XML_PARSER_EOF
))
2270 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
2271 "Unfinished entity outside the DTD");
2272 xmlFreeInputStream(inputPop(ctxt
));
2273 if (*ctxt
->input
->cur
== 0)
2274 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
2280 * @ctxt: an XML parser context
2281 * @input: an XML parser input fragment (entity, XML fragment ...).
2283 * xmlPushInput: switch to a new input stream which is stacked on top
2284 * of the previous one(s).
2285 * Returns -1 in case of error or the index in the input stack
2288 xmlPushInput(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
) {
2290 if (input
== NULL
) return(-1);
2292 if (xmlParserDebugEntities
) {
2293 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
2294 xmlGenericError(xmlGenericErrorContext
,
2295 "%s(%d): ", ctxt
->input
->filename
,
2297 xmlGenericError(xmlGenericErrorContext
,
2298 "Pushing input %d : %.30s\n", ctxt
->inputNr
+1, input
->cur
);
2300 if (((ctxt
->inputNr
> 40) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2301 (ctxt
->inputNr
> 1024)) {
2302 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2303 while (ctxt
->inputNr
> 1)
2304 xmlFreeInputStream(inputPop(ctxt
));
2307 ret
= inputPush(ctxt
, input
);
2308 if (ctxt
->instate
== XML_PARSER_EOF
)
2316 * @ctxt: an XML parser context
2318 * parse Reference declarations
2320 * [66] CharRef ::= '&#' [0-9]+ ';' |
2321 * '&#x' [0-9a-fA-F]+ ';'
2323 * [ WFC: Legal Character ]
2324 * Characters referred to using character references must match the
2325 * production for Char.
2327 * Returns the value parsed (as an int), 0 in case of error
2330 xmlParseCharRef(xmlParserCtxtPtr ctxt
) {
2335 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2337 if ((RAW
== '&') && (NXT(1) == '#') &&
2341 while (RAW
!= ';') { /* loop blocked by count */
2345 if (ctxt
->instate
== XML_PARSER_EOF
)
2348 if ((RAW
>= '0') && (RAW
<= '9'))
2349 val
= val
* 16 + (CUR
- '0');
2350 else if ((RAW
>= 'a') && (RAW
<= 'f') && (count
< 20))
2351 val
= val
* 16 + (CUR
- 'a') + 10;
2352 else if ((RAW
>= 'A') && (RAW
<= 'F') && (count
< 20))
2353 val
= val
* 16 + (CUR
- 'A') + 10;
2355 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2366 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2370 } else if ((RAW
== '&') && (NXT(1) == '#')) {
2373 while (RAW
!= ';') { /* loop blocked by count */
2377 if (ctxt
->instate
== XML_PARSER_EOF
)
2380 if ((RAW
>= '0') && (RAW
<= '9'))
2381 val
= val
* 10 + (CUR
- '0');
2383 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2394 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2399 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2403 * [ WFC: Legal Character ]
2404 * Characters referred to using character references must match the
2405 * production for Char.
2407 if (val
>= 0x110000) {
2408 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2409 "xmlParseCharRef: character reference out of bounds\n",
2411 } else if (IS_CHAR(val
)) {
2414 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2415 "xmlParseCharRef: invalid xmlChar value %d\n",
2422 * xmlParseStringCharRef:
2423 * @ctxt: an XML parser context
2424 * @str: a pointer to an index in the string
2426 * parse Reference declarations, variant parsing from a string rather
2427 * than an an input flow.
2429 * [66] CharRef ::= '&#' [0-9]+ ';' |
2430 * '&#x' [0-9a-fA-F]+ ';'
2432 * [ WFC: Legal Character ]
2433 * Characters referred to using character references must match the
2434 * production for Char.
2436 * Returns the value parsed (as an int), 0 in case of error, str will be
2437 * updated to the current value of the index
2440 xmlParseStringCharRef(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
2445 if ((str
== NULL
) || (*str
== NULL
)) return(0);
2448 if ((cur
== '&') && (ptr
[1] == '#') && (ptr
[2] == 'x')) {
2451 while (cur
!= ';') { /* Non input consuming loop */
2452 if ((cur
>= '0') && (cur
<= '9'))
2453 val
= val
* 16 + (cur
- '0');
2454 else if ((cur
>= 'a') && (cur
<= 'f'))
2455 val
= val
* 16 + (cur
- 'a') + 10;
2456 else if ((cur
>= 'A') && (cur
<= 'F'))
2457 val
= val
* 16 + (cur
- 'A') + 10;
2459 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2471 } else if ((cur
== '&') && (ptr
[1] == '#')){
2474 while (cur
!= ';') { /* Non input consuming loops */
2475 if ((cur
>= '0') && (cur
<= '9'))
2476 val
= val
* 10 + (cur
- '0');
2478 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2491 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2497 * [ WFC: Legal Character ]
2498 * Characters referred to using character references must match the
2499 * production for Char.
2501 if (val
>= 0x110000) {
2502 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2503 "xmlParseStringCharRef: character reference out of bounds\n",
2505 } else if (IS_CHAR(val
)) {
2508 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2509 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2516 * xmlParserHandlePEReference:
2517 * @ctxt: the parser context
2519 * [69] PEReference ::= '%' Name ';'
2521 * [ WFC: No Recursion ]
2522 * A parsed entity must not contain a recursive
2523 * reference to itself, either directly or indirectly.
2525 * [ WFC: Entity Declared ]
2526 * In a document without any DTD, a document with only an internal DTD
2527 * subset which contains no parameter entity references, or a document
2528 * with "standalone='yes'", ... ... The declaration of a parameter
2529 * entity must precede any reference to it...
2531 * [ VC: Entity Declared ]
2532 * In a document with an external subset or external parameter entities
2533 * with "standalone='no'", ... ... The declaration of a parameter entity
2534 * must precede any reference to it...
2537 * Parameter-entity references may only appear in the DTD.
2538 * NOTE: misleading but this is handled.
2540 * A PEReference may have been detected in the current input stream
2541 * the handling is done accordingly to
2542 * http://www.w3.org/TR/REC-xml#entproc
2544 * - Included in literal in entity values
2545 * - Included as Parameter Entity reference within DTDs
2548 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt
) {
2549 switch(ctxt
->instate
) {
2550 case XML_PARSER_CDATA_SECTION
:
2552 case XML_PARSER_COMMENT
:
2554 case XML_PARSER_START_TAG
:
2556 case XML_PARSER_END_TAG
:
2558 case XML_PARSER_EOF
:
2559 xmlFatalErr(ctxt
, XML_ERR_PEREF_AT_EOF
, NULL
);
2561 case XML_PARSER_PROLOG
:
2562 case XML_PARSER_START
:
2563 case XML_PARSER_MISC
:
2564 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_PROLOG
, NULL
);
2566 case XML_PARSER_ENTITY_DECL
:
2567 case XML_PARSER_CONTENT
:
2568 case XML_PARSER_ATTRIBUTE_VALUE
:
2570 case XML_PARSER_SYSTEM_LITERAL
:
2571 case XML_PARSER_PUBLIC_LITERAL
:
2572 /* we just ignore it there */
2574 case XML_PARSER_EPILOG
:
2575 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_EPILOG
, NULL
);
2577 case XML_PARSER_ENTITY_VALUE
:
2579 * NOTE: in the case of entity values, we don't do the
2580 * substitution here since we need the literal
2581 * entity value to be able to save the internal
2582 * subset of the document.
2583 * This will be handled by xmlStringDecodeEntities
2586 case XML_PARSER_DTD
:
2588 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2589 * In the internal DTD subset, parameter-entity references
2590 * can occur only where markup declarations can occur, not
2591 * within markup declarations.
2592 * In that case this is handled in xmlParseMarkupDecl
2594 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
2596 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2599 case XML_PARSER_IGNORE
:
2603 xmlParsePEReference(ctxt
);
2607 * Macro used to grow the current buffer.
2608 * buffer##_size is expected to be a size_t
2609 * mem_error: is expected to handle memory allocation failures
2611 #define growBuffer(buffer, n) { \
2613 size_t new_size = buffer##_size * 2 + n; \
2614 if (new_size < buffer##_size) goto mem_error; \
2615 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2616 if (tmp == NULL) goto mem_error; \
2618 buffer##_size = new_size; \
2622 * xmlStringLenDecodeEntities:
2623 * @ctxt: the parser context
2624 * @str: the input string
2625 * @len: the string length
2626 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2627 * @end: an end marker xmlChar, 0 if none
2628 * @end2: an end marker xmlChar, 0 if none
2629 * @end3: an end marker xmlChar, 0 if none
2631 * Takes a entity string content and process to do the adequate substitutions.
2633 * [67] Reference ::= EntityRef | CharRef
2635 * [69] PEReference ::= '%' Name ';'
2637 * Returns A newly allocated string with the substitution done. The caller
2638 * must deallocate it !
2641 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2642 int what
, xmlChar end
, xmlChar end2
, xmlChar end3
) {
2643 xmlChar
*buffer
= NULL
;
2644 size_t buffer_size
= 0;
2647 xmlChar
*current
= NULL
;
2648 xmlChar
*rep
= NULL
;
2649 const xmlChar
*last
;
2653 if ((ctxt
== NULL
) || (str
== NULL
) || (len
< 0))
2657 if (((ctxt
->depth
> 40) &&
2658 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2659 (ctxt
->depth
> 1024)) {
2660 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2665 * allocate a translation buffer.
2667 buffer_size
= XML_PARSER_BIG_BUFFER_SIZE
;
2668 buffer
= (xmlChar
*) xmlMallocAtomic(buffer_size
);
2669 if (buffer
== NULL
) goto mem_error
;
2672 * OK loop until we reach one of the ending char or a size limit.
2673 * we are operating on already parsed values.
2676 c
= CUR_SCHAR(str
, l
);
2679 while ((c
!= 0) && (c
!= end
) && /* non input consuming loop */
2680 (c
!= end2
) && (c
!= end3
) &&
2681 (ctxt
->instate
!= XML_PARSER_EOF
)) {
2684 if ((c
== '&') && (str
[1] == '#')) {
2685 int val
= xmlParseStringCharRef(ctxt
, &str
);
2688 COPY_BUF(0,buffer
,nbchars
,val
);
2689 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2690 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2692 } else if ((c
== '&') && (what
& XML_SUBSTITUTE_REF
)) {
2693 if (xmlParserDebugEntities
)
2694 xmlGenericError(xmlGenericErrorContext
,
2695 "String decoding Entity Reference: %.30s\n",
2697 ent
= xmlParseStringEntityRef(ctxt
, &str
);
2698 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
2700 ctxt
->nbentities
+= ent
->checked
/ 2;
2701 if ((ent
!= NULL
) &&
2702 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
2703 if (ent
->content
!= NULL
) {
2704 COPY_BUF(0,buffer
,nbchars
,ent
->content
[0]);
2705 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2706 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2709 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
2710 "predefined entity has no content\n");
2713 } else if ((ent
!= NULL
) && (ent
->content
!= NULL
)) {
2715 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2719 ent
->content
[0] = 0;
2724 while (*current
!= 0) { /* non input consuming loop */
2725 buffer
[nbchars
++] = *current
++;
2726 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2727 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
, 0))
2729 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2734 } else if (ent
!= NULL
) {
2735 int i
= xmlStrlen(ent
->name
);
2736 const xmlChar
*cur
= ent
->name
;
2738 buffer
[nbchars
++] = '&';
2739 if (nbchars
+ i
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2740 growBuffer(buffer
, i
+ XML_PARSER_BUFFER_SIZE
);
2743 buffer
[nbchars
++] = *cur
++;
2744 buffer
[nbchars
++] = ';';
2746 } else if (c
== '%' && (what
& XML_SUBSTITUTE_PEREF
)) {
2747 if (xmlParserDebugEntities
)
2748 xmlGenericError(xmlGenericErrorContext
,
2749 "String decoding PE Reference: %.30s\n", str
);
2750 ent
= xmlParseStringPEReference(ctxt
, &str
);
2751 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
2753 ctxt
->nbentities
+= ent
->checked
/ 2;
2755 if (ent
->content
== NULL
) {
2757 * Note: external parsed entities will not be loaded,
2758 * it is not required for a non-validating parser to
2759 * complete external PEReferences coming from the
2762 if (((ctxt
->options
& XML_PARSE_NOENT
) != 0) ||
2763 ((ctxt
->options
& XML_PARSE_DTDVALID
) != 0) ||
2764 (ctxt
->validate
!= 0)) {
2765 xmlLoadEntityContent(ctxt
, ent
);
2767 xmlWarningMsg(ctxt
, XML_ERR_ENTITY_PROCESSING
,
2768 "not validating will not read content for PE entity %s\n",
2773 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2777 if (ent
->content
!= NULL
)
2778 ent
->content
[0] = 0;
2782 while (*current
!= 0) { /* non input consuming loop */
2783 buffer
[nbchars
++] = *current
++;
2784 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2785 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
, 0))
2787 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2794 COPY_BUF(l
,buffer
,nbchars
,c
);
2796 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2797 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2801 c
= CUR_SCHAR(str
, l
);
2805 buffer
[nbchars
] = 0;
2809 xmlErrMemory(ctxt
, NULL
);
2819 * xmlStringDecodeEntities:
2820 * @ctxt: the parser context
2821 * @str: the input string
2822 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2823 * @end: an end marker xmlChar, 0 if none
2824 * @end2: an end marker xmlChar, 0 if none
2825 * @end3: an end marker xmlChar, 0 if none
2827 * Takes a entity string content and process to do the adequate substitutions.
2829 * [67] Reference ::= EntityRef | CharRef
2831 * [69] PEReference ::= '%' Name ';'
2833 * Returns A newly allocated string with the substitution done. The caller
2834 * must deallocate it !
2837 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int what
,
2838 xmlChar end
, xmlChar end2
, xmlChar end3
) {
2839 if ((ctxt
== NULL
) || (str
== NULL
)) return(NULL
);
2840 return(xmlStringLenDecodeEntities(ctxt
, str
, xmlStrlen(str
), what
,
2844 /************************************************************************
2846 * Commodity functions, cleanup needed ? *
2848 ************************************************************************/
2852 * @ctxt: an XML parser context
2854 * @len: the size of @str
2855 * @blank_chars: we know the chars are blanks
2857 * Is this a sequence of blank chars that one can ignore ?
2859 * Returns 1 if ignorable 0 otherwise.
2862 static int areBlanks(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2865 xmlNodePtr lastChild
;
2868 * Don't spend time trying to differentiate them, the same callback is
2871 if (ctxt
->sax
->ignorableWhitespace
== ctxt
->sax
->characters
)
2875 * Check for xml:space value.
2877 if ((ctxt
->space
== NULL
) || (*(ctxt
->space
) == 1) ||
2878 (*(ctxt
->space
) == -2))
2882 * Check that the string is made of blanks
2884 if (blank_chars
== 0) {
2885 for (i
= 0;i
< len
;i
++)
2886 if (!(IS_BLANK_CH(str
[i
]))) return(0);
2890 * Look if the element is mixed content in the DTD if available
2892 if (ctxt
->node
== NULL
) return(0);
2893 if (ctxt
->myDoc
!= NULL
) {
2894 ret
= xmlIsMixedElement(ctxt
->myDoc
, ctxt
->node
->name
);
2895 if (ret
== 0) return(1);
2896 if (ret
== 1) return(0);
2900 * Otherwise, heuristic :-\
2902 if ((RAW
!= '<') && (RAW
!= 0xD)) return(0);
2903 if ((ctxt
->node
->children
== NULL
) &&
2904 (RAW
== '<') && (NXT(1) == '/')) return(0);
2906 lastChild
= xmlGetLastChild(ctxt
->node
);
2907 if (lastChild
== NULL
) {
2908 if ((ctxt
->node
->type
!= XML_ELEMENT_NODE
) &&
2909 (ctxt
->node
->content
!= NULL
)) return(0);
2910 } else if (xmlNodeIsText(lastChild
))
2912 else if ((ctxt
->node
->children
!= NULL
) &&
2913 (xmlNodeIsText(ctxt
->node
->children
)))
2918 /************************************************************************
2920 * Extra stuff for namespace support *
2921 * Relates to http://www.w3.org/TR/WD-xml-names *
2923 ************************************************************************/
2927 * @ctxt: an XML parser context
2928 * @name: an XML parser context
2929 * @prefix: a xmlChar **
2931 * parse an UTF8 encoded XML qualified name string
2933 * [NS 5] QName ::= (Prefix ':')? LocalPart
2935 * [NS 6] Prefix ::= NCName
2937 * [NS 7] LocalPart ::= NCName
2939 * Returns the local part, and prefix is updated
2940 * to get the Prefix if any.
2944 xmlSplitQName(xmlParserCtxtPtr ctxt
, const xmlChar
*name
, xmlChar
**prefix
) {
2945 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
2946 xmlChar
*buffer
= NULL
;
2948 int max
= XML_MAX_NAMELEN
;
2949 xmlChar
*ret
= NULL
;
2950 const xmlChar
*cur
= name
;
2953 if (prefix
== NULL
) return(NULL
);
2956 if (cur
== NULL
) return(NULL
);
2958 #ifndef XML_XML_NAMESPACE
2959 /* xml: prefix is not really a namespace */
2960 if ((cur
[0] == 'x') && (cur
[1] == 'm') &&
2961 (cur
[2] == 'l') && (cur
[3] == ':'))
2962 return(xmlStrdup(name
));
2965 /* nasty but well=formed */
2967 return(xmlStrdup(name
));
2970 while ((c
!= 0) && (c
!= ':') && (len
< max
)) { /* tested bigname.xml */
2976 * Okay someone managed to make a huge name, so he's ready to pay
2977 * for the processing speed.
2981 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
2982 if (buffer
== NULL
) {
2983 xmlErrMemory(ctxt
, NULL
);
2986 memcpy(buffer
, buf
, len
);
2987 while ((c
!= 0) && (c
!= ':')) { /* tested bigname.xml */
2988 if (len
+ 10 > max
) {
2992 tmp
= (xmlChar
*) xmlRealloc(buffer
,
2993 max
* sizeof(xmlChar
));
2996 xmlErrMemory(ctxt
, NULL
);
3007 if ((c
== ':') && (*cur
== 0)) {
3011 return(xmlStrdup(name
));
3015 ret
= xmlStrndup(buf
, len
);
3019 max
= XML_MAX_NAMELEN
;
3027 return(xmlStrndup(BAD_CAST
"", 0));
3032 * Check that the first character is proper to start
3035 if (!(((c
>= 0x61) && (c
<= 0x7A)) ||
3036 ((c
>= 0x41) && (c
<= 0x5A)) ||
3037 (c
== '_') || (c
== ':'))) {
3039 int first
= CUR_SCHAR(cur
, l
);
3041 if (!IS_LETTER(first
) && (first
!= '_')) {
3042 xmlFatalErrMsgStr(ctxt
, XML_NS_ERR_QNAME
,
3043 "Name %s is not XML Namespace compliant\n",
3049 while ((c
!= 0) && (len
< max
)) { /* tested bigname2.xml */
3055 * Okay someone managed to make a huge name, so he's ready to pay
3056 * for the processing speed.
3060 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3061 if (buffer
== NULL
) {
3062 xmlErrMemory(ctxt
, NULL
);
3065 memcpy(buffer
, buf
, len
);
3066 while (c
!= 0) { /* tested bigname2.xml */
3067 if (len
+ 10 > max
) {
3071 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3072 max
* sizeof(xmlChar
));
3074 xmlErrMemory(ctxt
, NULL
);
3087 ret
= xmlStrndup(buf
, len
);
3096 /************************************************************************
3098 * The parser itself *
3099 * Relates to http://www.w3.org/TR/REC-xml *
3101 ************************************************************************/
3103 /************************************************************************
3105 * Routines to parse Name, NCName and NmToken *
3107 ************************************************************************/
3109 static unsigned long nbParseName
= 0;
3110 static unsigned long nbParseNmToken
= 0;
3111 static unsigned long nbParseNCName
= 0;
3112 static unsigned long nbParseNCNameComplex
= 0;
3113 static unsigned long nbParseNameComplex
= 0;
3114 static unsigned long nbParseStringName
= 0;
3118 * The two following functions are related to the change of accepted
3119 * characters for Name and NmToken in the Revision 5 of XML-1.0
3120 * They correspond to the modified production [4] and the new production [4a]
3121 * changes in that revision. Also note that the macros used for the
3122 * productions Letter, Digit, CombiningChar and Extender are not needed
3124 * We still keep compatibility to pre-revision5 parsing semantic if the
3125 * new XML_PARSE_OLD10 option is given to the parser.
3128 xmlIsNameStartChar(xmlParserCtxtPtr ctxt
, int c
) {
3129 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3131 * Use the new checks of production [4] [4a] amd [5] of the
3132 * Update 5 of XML-1.0
3134 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3135 (((c
>= 'a') && (c
<= 'z')) ||
3136 ((c
>= 'A') && (c
<= 'Z')) ||
3137 (c
== '_') || (c
== ':') ||
3138 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3139 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3140 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3141 ((c
>= 0x370) && (c
<= 0x37D)) ||
3142 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3143 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3144 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3145 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3146 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3147 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3148 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3149 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3152 if (IS_LETTER(c
) || (c
== '_') || (c
== ':'))
3159 xmlIsNameChar(xmlParserCtxtPtr ctxt
, int c
) {
3160 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3162 * Use the new checks of production [4] [4a] amd [5] of the
3163 * Update 5 of XML-1.0
3165 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3166 (((c
>= 'a') && (c
<= 'z')) ||
3167 ((c
>= 'A') && (c
<= 'Z')) ||
3168 ((c
>= '0') && (c
<= '9')) || /* !start */
3169 (c
== '_') || (c
== ':') ||
3170 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3171 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3172 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3173 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3174 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3175 ((c
>= 0x370) && (c
<= 0x37D)) ||
3176 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3177 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3178 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3179 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3180 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3181 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3182 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3183 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3184 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3187 if ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3188 (c
== '.') || (c
== '-') ||
3189 (c
== '_') || (c
== ':') ||
3190 (IS_COMBINING(c
)) ||
3197 static xmlChar
* xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
,
3198 int *len
, int *alloc
, int normalize
);
3200 static const xmlChar
*
3201 xmlParseNameComplex(xmlParserCtxtPtr ctxt
) {
3207 nbParseNameComplex
++;
3211 * Handler for more complex cases
3214 if (ctxt
->instate
== XML_PARSER_EOF
)
3217 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3219 * Use the new checks of production [4] [4a] amd [5] of the
3220 * Update 5 of XML-1.0
3222 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3223 (!(((c
>= 'a') && (c
<= 'z')) ||
3224 ((c
>= 'A') && (c
<= 'Z')) ||
3225 (c
== '_') || (c
== ':') ||
3226 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3227 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3228 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3229 ((c
>= 0x370) && (c
<= 0x37D)) ||
3230 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3231 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3232 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3233 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3234 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3235 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3236 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3237 ((c
>= 0x10000) && (c
<= 0xEFFFF))))) {
3243 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3244 (((c
>= 'a') && (c
<= 'z')) ||
3245 ((c
>= 'A') && (c
<= 'Z')) ||
3246 ((c
>= '0') && (c
<= '9')) || /* !start */
3247 (c
== '_') || (c
== ':') ||
3248 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3249 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3250 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3251 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3252 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3253 ((c
>= 0x370) && (c
<= 0x37D)) ||
3254 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3255 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3256 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3257 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3258 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3259 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3260 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3261 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3262 ((c
>= 0x10000) && (c
<= 0xEFFFF))
3264 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3267 if (ctxt
->instate
== XML_PARSER_EOF
)
3275 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3276 (!IS_LETTER(c
) && (c
!= '_') &&
3284 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3285 ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3286 (c
== '.') || (c
== '-') ||
3287 (c
== '_') || (c
== ':') ||
3288 (IS_COMBINING(c
)) ||
3289 (IS_EXTENDER(c
)))) {
3290 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3293 if (ctxt
->instate
== XML_PARSER_EOF
)
3301 if ((len
> XML_MAX_NAME_LENGTH
) &&
3302 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3303 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3306 if (ctxt
->input
->cur
- ctxt
->input
->base
< len
) {
3308 * There were a couple of bugs where PERefs lead to to a change
3309 * of the buffer. Check the buffer size to avoid passing an invalid
3310 * pointer to xmlDictLookup.
3312 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
3313 "unexpected change of input buffer");
3316 if ((*ctxt
->input
->cur
== '\n') && (ctxt
->input
->cur
[-1] == '\r'))
3317 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- (len
+ 1), len
));
3318 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- len
, len
));
3323 * @ctxt: an XML parser context
3325 * parse an XML name.
3327 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3328 * CombiningChar | Extender
3330 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3332 * [6] Names ::= Name (#x20 Name)*
3334 * Returns the Name parsed or NULL
3338 xmlParseName(xmlParserCtxtPtr ctxt
) {
3350 * Accelerator for simple ASCII names
3352 in
= ctxt
->input
->cur
;
3353 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3354 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3355 (*in
== '_') || (*in
== ':')) {
3357 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3358 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3359 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3360 (*in
== '_') || (*in
== '-') ||
3361 (*in
== ':') || (*in
== '.'))
3363 if ((*in
> 0) && (*in
< 0x80)) {
3364 count
= in
- ctxt
->input
->cur
;
3365 if ((count
> XML_MAX_NAME_LENGTH
) &&
3366 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3367 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3370 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3371 ctxt
->input
->cur
= in
;
3372 ctxt
->input
->col
+= count
;
3374 xmlErrMemory(ctxt
, NULL
);
3378 /* accelerator for special cases */
3379 return(xmlParseNameComplex(ctxt
));
3382 static const xmlChar
*
3383 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt
) {
3387 size_t startPosition
= 0;
3390 nbParseNCNameComplex
++;
3394 * Handler for more complex cases
3397 startPosition
= CUR_PTR
- BASE_PTR
;
3399 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3400 (!xmlIsNameStartChar(ctxt
, c
) || (c
== ':'))) {
3404 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3405 (xmlIsNameChar(ctxt
, c
) && (c
!= ':'))) {
3406 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3407 if ((len
> XML_MAX_NAME_LENGTH
) &&
3408 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3409 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3414 if (ctxt
->instate
== XML_PARSER_EOF
)
3423 * when shrinking to extend the buffer we really need to preserve
3424 * the part of the name we already parsed. Hence rolling back
3425 * by current length.
3427 ctxt
->input
->cur
-= l
;
3429 if (ctxt
->instate
== XML_PARSER_EOF
)
3431 ctxt
->input
->cur
+= l
;
3435 if ((len
> XML_MAX_NAME_LENGTH
) &&
3436 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3437 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3440 return(xmlDictLookup(ctxt
->dict
, (BASE_PTR
+ startPosition
), len
));
3445 * @ctxt: an XML parser context
3446 * @len: length of the string parsed
3448 * parse an XML name.
3450 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3451 * CombiningChar | Extender
3453 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3455 * Returns the Name parsed or NULL
3458 static const xmlChar
*
3459 xmlParseNCName(xmlParserCtxtPtr ctxt
) {
3460 const xmlChar
*in
, *e
;
3469 * Accelerator for simple ASCII names
3471 in
= ctxt
->input
->cur
;
3472 e
= ctxt
->input
->end
;
3473 if ((((*in
>= 0x61) && (*in
<= 0x7A)) ||
3474 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3475 (*in
== '_')) && (in
< e
)) {
3477 while ((((*in
>= 0x61) && (*in
<= 0x7A)) ||
3478 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3479 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3480 (*in
== '_') || (*in
== '-') ||
3481 (*in
== '.')) && (in
< e
))
3485 if ((*in
> 0) && (*in
< 0x80)) {
3486 count
= in
- ctxt
->input
->cur
;
3487 if ((count
> XML_MAX_NAME_LENGTH
) &&
3488 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3489 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3492 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3493 ctxt
->input
->cur
= in
;
3494 ctxt
->input
->col
+= count
;
3496 xmlErrMemory(ctxt
, NULL
);
3502 return(xmlParseNCNameComplex(ctxt
));
3506 * xmlParseNameAndCompare:
3507 * @ctxt: an XML parser context
3509 * parse an XML name and compares for match
3510 * (specialized for endtag parsing)
3512 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3513 * and the name for mismatch
3516 static const xmlChar
*
3517 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *other
) {
3518 register const xmlChar
*cmp
= other
;
3519 register const xmlChar
*in
;
3523 if (ctxt
->instate
== XML_PARSER_EOF
)
3526 in
= ctxt
->input
->cur
;
3527 while (*in
!= 0 && *in
== *cmp
) {
3531 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
3533 ctxt
->input
->col
+= in
- ctxt
->input
->cur
;
3534 ctxt
->input
->cur
= in
;
3535 return (const xmlChar
*) 1;
3537 /* failure (or end of input buffer), check with full function */
3538 ret
= xmlParseName (ctxt
);
3539 /* strings coming from the dictionary direct compare possible */
3541 return (const xmlChar
*) 1;
3547 * xmlParseStringName:
3548 * @ctxt: an XML parser context
3549 * @str: a pointer to the string pointer (IN/OUT)
3551 * parse an XML name.
3553 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3554 * CombiningChar | Extender
3556 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3558 * [6] Names ::= Name (#x20 Name)*
3560 * Returns the Name parsed or NULL. The @str pointer
3561 * is updated to the current location in the string.
3565 xmlParseStringName(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
3566 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3567 const xmlChar
*cur
= *str
;
3572 nbParseStringName
++;
3575 c
= CUR_SCHAR(cur
, l
);
3576 if (!xmlIsNameStartChar(ctxt
, c
)) {
3580 COPY_BUF(l
,buf
,len
,c
);
3582 c
= CUR_SCHAR(cur
, l
);
3583 while (xmlIsNameChar(ctxt
, c
)) {
3584 COPY_BUF(l
,buf
,len
,c
);
3586 c
= CUR_SCHAR(cur
, l
);
3587 if (len
>= XML_MAX_NAMELEN
) { /* test bigentname.xml */
3589 * Okay someone managed to make a huge name, so he's ready to pay
3590 * for the processing speed.
3595 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3596 if (buffer
== NULL
) {
3597 xmlErrMemory(ctxt
, NULL
);
3600 memcpy(buffer
, buf
, len
);
3601 while (xmlIsNameChar(ctxt
, c
)) {
3602 if (len
+ 10 > max
) {
3605 if ((len
> XML_MAX_NAME_LENGTH
) &&
3606 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3607 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3612 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3613 max
* sizeof(xmlChar
));
3615 xmlErrMemory(ctxt
, NULL
);
3621 COPY_BUF(l
,buffer
,len
,c
);
3623 c
= CUR_SCHAR(cur
, l
);
3630 if ((len
> XML_MAX_NAME_LENGTH
) &&
3631 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3632 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3636 return(xmlStrndup(buf
, len
));
3641 * @ctxt: an XML parser context
3643 * parse an XML Nmtoken.
3645 * [7] Nmtoken ::= (NameChar)+
3647 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3649 * Returns the Nmtoken parsed or NULL
3653 xmlParseNmtoken(xmlParserCtxtPtr ctxt
) {
3654 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3664 if (ctxt
->instate
== XML_PARSER_EOF
)
3668 while (xmlIsNameChar(ctxt
, c
)) {
3669 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3673 COPY_BUF(l
,buf
,len
,c
);
3679 if (ctxt
->instate
== XML_PARSER_EOF
)
3683 if (len
>= XML_MAX_NAMELEN
) {
3685 * Okay someone managed to make a huge token, so he's ready to pay
3686 * for the processing speed.
3691 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3692 if (buffer
== NULL
) {
3693 xmlErrMemory(ctxt
, NULL
);
3696 memcpy(buffer
, buf
, len
);
3697 while (xmlIsNameChar(ctxt
, c
)) {
3698 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
3701 if (ctxt
->instate
== XML_PARSER_EOF
) {
3706 if (len
+ 10 > max
) {
3709 if ((max
> XML_MAX_NAME_LENGTH
) &&
3710 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3711 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3716 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3717 max
* sizeof(xmlChar
));
3719 xmlErrMemory(ctxt
, NULL
);
3725 COPY_BUF(l
,buffer
,len
,c
);
3735 if ((len
> XML_MAX_NAME_LENGTH
) &&
3736 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3737 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3740 return(xmlStrndup(buf
, len
));
3744 * xmlParseEntityValue:
3745 * @ctxt: an XML parser context
3746 * @orig: if non-NULL store a copy of the original entity value
3748 * parse a value for ENTITY declarations
3750 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3751 * "'" ([^%&'] | PEReference | Reference)* "'"
3753 * Returns the EntityValue parsed with reference substituted or NULL
3757 xmlParseEntityValue(xmlParserCtxtPtr ctxt
, xmlChar
**orig
) {
3758 xmlChar
*buf
= NULL
;
3760 int size
= XML_PARSER_BUFFER_SIZE
;
3763 xmlChar
*ret
= NULL
;
3764 const xmlChar
*cur
= NULL
;
3765 xmlParserInputPtr input
;
3767 if (RAW
== '"') stop
= '"';
3768 else if (RAW
== '\'') stop
= '\'';
3770 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_STARTED
, NULL
);
3773 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
3775 xmlErrMemory(ctxt
, NULL
);
3780 * The content of the entity definition is copied in a buffer.
3783 ctxt
->instate
= XML_PARSER_ENTITY_VALUE
;
3784 input
= ctxt
->input
;
3786 if (ctxt
->instate
== XML_PARSER_EOF
)
3791 * NOTE: 4.4.5 Included in Literal
3792 * When a parameter entity reference appears in a literal entity
3793 * value, ... a single or double quote character in the replacement
3794 * text is always treated as a normal data character and will not
3795 * terminate the literal.
3796 * In practice it means we stop the loop only when back at parsing
3797 * the initial entity and the quote is found
3799 while (((IS_CHAR(c
)) && ((c
!= stop
) || /* checked */
3800 (ctxt
->input
!= input
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
3801 if (len
+ 5 >= size
) {
3805 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3807 xmlErrMemory(ctxt
, NULL
);
3812 COPY_BUF(l
,buf
,len
,c
);
3823 if (ctxt
->instate
== XML_PARSER_EOF
)
3826 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
, NULL
);
3832 * Raise problem w.r.t. '&' and '%' being used in non-entities
3833 * reference constructs. Note Charref will be handled in
3834 * xmlStringDecodeEntities()
3837 while (*cur
!= 0) { /* non input consuming */
3838 if ((*cur
== '%') || ((*cur
== '&') && (cur
[1] != '#'))) {
3844 name
= xmlParseStringName(ctxt
, &cur
);
3849 if ((nameOk
== 0) || (*cur
!= ';')) {
3850 xmlFatalErrMsgInt(ctxt
, XML_ERR_ENTITY_CHAR_ERROR
,
3851 "EntityValue: '%c' forbidden except for entities references\n",
3855 if ((tmp
== '%') && (ctxt
->inSubset
== 1) &&
3856 (ctxt
->inputNr
== 1)) {
3857 xmlFatalErr(ctxt
, XML_ERR_ENTITY_PE_INTERNAL
, NULL
);
3867 * Then PEReference entities are substituted.
3869 * NOTE: 4.4.7 Bypassed
3870 * When a general entity reference appears in the EntityValue in
3871 * an entity declaration, it is bypassed and left as is.
3872 * so XML_SUBSTITUTE_REF is not set here.
3875 ret
= xmlStringDecodeEntities(ctxt
, buf
, XML_SUBSTITUTE_PEREF
,
3890 * xmlParseAttValueComplex:
3891 * @ctxt: an XML parser context
3892 * @len: the resulting attribute len
3893 * @normalize: whether to apply the inner normalization
3895 * parse a value for an attribute, this is the fallback function
3896 * of xmlParseAttValue() when the attribute parsing requires handling
3897 * of non-ASCII characters, or normalization compaction.
3899 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3902 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt
, int *attlen
, int normalize
) {
3904 xmlChar
*buf
= NULL
;
3905 xmlChar
*rep
= NULL
;
3907 size_t buf_size
= 0;
3908 int c
, l
, in_space
= 0;
3909 xmlChar
*current
= NULL
;
3912 if (NXT(0) == '"') {
3913 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3916 } else if (NXT(0) == '\'') {
3918 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3921 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
3926 * allocate a translation buffer.
3928 buf_size
= XML_PARSER_BUFFER_SIZE
;
3929 buf
= (xmlChar
*) xmlMallocAtomic(buf_size
);
3930 if (buf
== NULL
) goto mem_error
;
3933 * OK loop until we reach one of the ending char or a size limit.
3936 while (((NXT(0) != limit
) && /* checked */
3937 (IS_CHAR(c
)) && (c
!= '<')) &&
3938 (ctxt
->instate
!= XML_PARSER_EOF
)) {
3940 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3941 * special option is given
3943 if ((len
> XML_MAX_TEXT_LENGTH
) &&
3944 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
3945 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
3946 "AttValue length too long\n");
3951 if (NXT(1) == '#') {
3952 int val
= xmlParseCharRef(ctxt
);
3955 if (ctxt
->replaceEntities
) {
3956 if (len
+ 10 > buf_size
) {
3957 growBuffer(buf
, 10);
3962 * The reparsing will be done in xmlStringGetNodeList()
3963 * called by the attribute() function in SAX.c
3965 if (len
+ 10 > buf_size
) {
3966 growBuffer(buf
, 10);
3974 } else if (val
!= 0) {
3975 if (len
+ 10 > buf_size
) {
3976 growBuffer(buf
, 10);
3978 len
+= xmlCopyChar(0, &buf
[len
], val
);
3981 ent
= xmlParseEntityRef(ctxt
);
3984 ctxt
->nbentities
+= ent
->owner
;
3985 if ((ent
!= NULL
) &&
3986 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
3987 if (len
+ 10 > buf_size
) {
3988 growBuffer(buf
, 10);
3990 if ((ctxt
->replaceEntities
== 0) &&
3991 (ent
->content
[0] == '&')) {
3998 buf
[len
++] = ent
->content
[0];
4000 } else if ((ent
!= NULL
) &&
4001 (ctxt
->replaceEntities
!= 0)) {
4002 if (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) {
4004 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
4010 while (*current
!= 0) { /* non input consuming */
4011 if ((*current
== 0xD) || (*current
== 0xA) ||
4012 (*current
== 0x9)) {
4016 buf
[len
++] = *current
++;
4017 if (len
+ 10 > buf_size
) {
4018 growBuffer(buf
, 10);
4025 if (len
+ 10 > buf_size
) {
4026 growBuffer(buf
, 10);
4028 if (ent
->content
!= NULL
)
4029 buf
[len
++] = ent
->content
[0];
4031 } else if (ent
!= NULL
) {
4032 int i
= xmlStrlen(ent
->name
);
4033 const xmlChar
*cur
= ent
->name
;
4036 * This may look absurd but is needed to detect
4039 if ((ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
4040 (ent
->content
!= NULL
) && (ent
->checked
== 0)) {
4041 unsigned long oldnbent
= ctxt
->nbentities
, diff
;
4044 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
4045 XML_SUBSTITUTE_REF
, 0, 0, 0);
4048 diff
= ctxt
->nbentities
- oldnbent
+ 1;
4049 if (diff
> INT_MAX
/ 2)
4051 ent
->checked
= diff
* 2;
4053 if (xmlStrchr(rep
, '<'))
4058 ent
->content
[0] = 0;
4063 * Just output the reference
4066 while (len
+ i
+ 10 > buf_size
) {
4067 growBuffer(buf
, i
+ 10);
4070 buf
[len
++] = *cur
++;
4075 if ((c
== 0x20) || (c
== 0xD) || (c
== 0xA) || (c
== 0x9)) {
4076 if ((len
!= 0) || (!normalize
)) {
4077 if ((!normalize
) || (!in_space
)) {
4078 COPY_BUF(l
,buf
,len
,0x20);
4079 while (len
+ 10 > buf_size
) {
4080 growBuffer(buf
, 10);
4087 COPY_BUF(l
,buf
,len
,c
);
4088 if (len
+ 10 > buf_size
) {
4089 growBuffer(buf
, 10);
4097 if (ctxt
->instate
== XML_PARSER_EOF
)
4100 if ((in_space
) && (normalize
)) {
4101 while ((len
> 0) && (buf
[len
- 1] == 0x20)) len
--;
4105 xmlFatalErr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
, NULL
);
4106 } else if (RAW
!= limit
) {
4107 if ((c
!= 0) && (!IS_CHAR(c
))) {
4108 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_CHAR
,
4109 "invalid character in attribute value\n");
4111 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4112 "AttValue: ' expected\n");
4118 * There we potentially risk an overflow, don't allow attribute value of
4119 * length more than INT_MAX it is a very reasonable assumption !
4121 if (len
>= INT_MAX
) {
4122 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4123 "AttValue length too long\n");
4127 if (attlen
!= NULL
) *attlen
= (int) len
;
4131 xmlErrMemory(ctxt
, NULL
);
4142 * @ctxt: an XML parser context
4144 * parse a value for an attribute
4145 * Note: the parser won't do substitution of entities here, this
4146 * will be handled later in xmlStringGetNodeList
4148 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4149 * "'" ([^<&'] | Reference)* "'"
4151 * 3.3.3 Attribute-Value Normalization:
4152 * Before the value of an attribute is passed to the application or
4153 * checked for validity, the XML processor must normalize it as follows:
4154 * - a character reference is processed by appending the referenced
4155 * character to the attribute value
4156 * - an entity reference is processed by recursively processing the
4157 * replacement text of the entity
4158 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4159 * appending #x20 to the normalized value, except that only a single
4160 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4161 * parsed entity or the literal entity value of an internal parsed entity
4162 * - other characters are processed by appending them to the normalized value
4163 * If the declared value is not CDATA, then the XML processor must further
4164 * process the normalized attribute value by discarding any leading and
4165 * trailing space (#x20) characters, and by replacing sequences of space
4166 * (#x20) characters by a single space (#x20) character.
4167 * All attributes for which no declaration has been read should be treated
4168 * by a non-validating parser as if declared CDATA.
4170 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4175 xmlParseAttValue(xmlParserCtxtPtr ctxt
) {
4176 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
)) return(NULL
);
4177 return(xmlParseAttValueInternal(ctxt
, NULL
, NULL
, 0));
4181 * xmlParseSystemLiteral:
4182 * @ctxt: an XML parser context
4184 * parse an XML Literal
4186 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4188 * Returns the SystemLiteral parsed or NULL
4192 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt
) {
4193 xmlChar
*buf
= NULL
;
4195 int size
= XML_PARSER_BUFFER_SIZE
;
4198 int state
= ctxt
->instate
;
4205 } else if (RAW
== '\'') {
4209 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4213 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4215 xmlErrMemory(ctxt
, NULL
);
4218 ctxt
->instate
= XML_PARSER_SYSTEM_LITERAL
;
4220 while ((IS_CHAR(cur
)) && (cur
!= stop
)) { /* checked */
4221 if (len
+ 5 >= size
) {
4224 if ((size
> XML_MAX_NAME_LENGTH
) &&
4225 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
4226 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "SystemLiteral");
4228 ctxt
->instate
= (xmlParserInputState
) state
;
4232 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4235 xmlErrMemory(ctxt
, NULL
);
4236 ctxt
->instate
= (xmlParserInputState
) state
;
4246 if (ctxt
->instate
== XML_PARSER_EOF
) {
4251 COPY_BUF(l
,buf
,len
,cur
);
4261 ctxt
->instate
= (xmlParserInputState
) state
;
4262 if (!IS_CHAR(cur
)) {
4263 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4271 * xmlParsePubidLiteral:
4272 * @ctxt: an XML parser context
4274 * parse an XML public literal
4276 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4278 * Returns the PubidLiteral parsed or NULL.
4282 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt
) {
4283 xmlChar
*buf
= NULL
;
4285 int size
= XML_PARSER_BUFFER_SIZE
;
4289 xmlParserInputState oldstate
= ctxt
->instate
;
4295 } else if (RAW
== '\'') {
4299 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4302 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4304 xmlErrMemory(ctxt
, NULL
);
4307 ctxt
->instate
= XML_PARSER_PUBLIC_LITERAL
;
4309 while ((IS_PUBIDCHAR_CH(cur
)) && (cur
!= stop
)) { /* checked */
4310 if (len
+ 1 >= size
) {
4313 if ((size
> XML_MAX_NAME_LENGTH
) &&
4314 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
4315 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Public ID");
4320 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4322 xmlErrMemory(ctxt
, NULL
);
4334 if (ctxt
->instate
== XML_PARSER_EOF
) {
4349 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4353 ctxt
->instate
= oldstate
;
4357 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
);
4360 * used for the test in the inner loop of the char data testing
4362 static const unsigned char test_char_data
[256] = {
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4368 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4369 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4370 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4371 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4372 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4373 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4374 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4375 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4376 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4378 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4379 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4399 * @ctxt: an XML parser context
4400 * @cdata: int indicating whether we are within a CDATA section
4402 * parse a CharData section.
4403 * if we are within a CDATA section ']]>' marks an end of section.
4405 * The right angle bracket (>) may be represented using the string ">",
4406 * and must, for compatibility, be escaped using ">" or a character
4407 * reference when it appears in the string "]]>" in content, when that
4408 * string is not marking the end of a CDATA section.
4410 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4414 xmlParseCharData(xmlParserCtxtPtr ctxt
, int cdata
) {
4417 int line
= ctxt
->input
->line
;
4418 int col
= ctxt
->input
->col
;
4424 * Accelerated common case where input don't need to be
4425 * modified before passing it to the handler.
4428 in
= ctxt
->input
->cur
;
4431 while (*in
== 0x20) { in
++; ctxt
->input
->col
++; }
4434 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4436 } while (*in
== 0xA);
4437 goto get_more_space
;
4440 nbchar
= in
- ctxt
->input
->cur
;
4442 const xmlChar
*tmp
= ctxt
->input
->cur
;
4443 ctxt
->input
->cur
= in
;
4445 if ((ctxt
->sax
!= NULL
) &&
4446 (ctxt
->sax
->ignorableWhitespace
!=
4447 ctxt
->sax
->characters
)) {
4448 if (areBlanks(ctxt
, tmp
, nbchar
, 1)) {
4449 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4450 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4453 if (ctxt
->sax
->characters
!= NULL
)
4454 ctxt
->sax
->characters(ctxt
->userData
,
4456 if (*ctxt
->space
== -1)
4459 } else if ((ctxt
->sax
!= NULL
) &&
4460 (ctxt
->sax
->characters
!= NULL
)) {
4461 ctxt
->sax
->characters(ctxt
->userData
,
4469 ccol
= ctxt
->input
->col
;
4470 while (test_char_data
[*in
]) {
4474 ctxt
->input
->col
= ccol
;
4477 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4479 } while (*in
== 0xA);
4483 if ((in
[1] == ']') && (in
[2] == '>')) {
4484 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4485 ctxt
->input
->cur
= in
+ 1;
4492 nbchar
= in
- ctxt
->input
->cur
;
4494 if ((ctxt
->sax
!= NULL
) &&
4495 (ctxt
->sax
->ignorableWhitespace
!=
4496 ctxt
->sax
->characters
) &&
4497 (IS_BLANK_CH(*ctxt
->input
->cur
))) {
4498 const xmlChar
*tmp
= ctxt
->input
->cur
;
4499 ctxt
->input
->cur
= in
;
4501 if (areBlanks(ctxt
, tmp
, nbchar
, 0)) {
4502 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4503 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4506 if (ctxt
->sax
->characters
!= NULL
)
4507 ctxt
->sax
->characters(ctxt
->userData
,
4509 if (*ctxt
->space
== -1)
4512 line
= ctxt
->input
->line
;
4513 col
= ctxt
->input
->col
;
4514 } else if (ctxt
->sax
!= NULL
) {
4515 if (ctxt
->sax
->characters
!= NULL
)
4516 ctxt
->sax
->characters(ctxt
->userData
,
4517 ctxt
->input
->cur
, nbchar
);
4518 line
= ctxt
->input
->line
;
4519 col
= ctxt
->input
->col
;
4521 /* something really bad happened in the SAX callback */
4522 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4525 ctxt
->input
->cur
= in
;
4529 ctxt
->input
->cur
= in
;
4531 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4532 continue; /* while */
4544 if (ctxt
->instate
== XML_PARSER_EOF
)
4546 in
= ctxt
->input
->cur
;
4547 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09) || (*in
== 0x0a));
4550 ctxt
->input
->line
= line
;
4551 ctxt
->input
->col
= col
;
4552 xmlParseCharDataComplex(ctxt
, cdata
);
4556 * xmlParseCharDataComplex:
4557 * @ctxt: an XML parser context
4558 * @cdata: int indicating whether we are within a CDATA section
4560 * parse a CharData section.this is the fallback function
4561 * of xmlParseCharData() when the parsing requires handling
4562 * of non-ASCII characters.
4565 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
) {
4566 xmlChar buf
[XML_PARSER_BIG_BUFFER_SIZE
+ 5];
4574 while ((cur
!= '<') && /* checked */
4576 (IS_CHAR(cur
))) /* test also done in xmlCurrentChar() */ {
4577 if ((cur
== ']') && (NXT(1) == ']') &&
4581 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4584 COPY_BUF(l
,buf
,nbchar
,cur
);
4585 /* move current position before possible calling of ctxt->sax->characters */
4588 if (nbchar
>= XML_PARSER_BIG_BUFFER_SIZE
) {
4592 * OK the segment is to be consumed as chars.
4594 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4595 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4596 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4597 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4600 if (ctxt
->sax
->characters
!= NULL
)
4601 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4602 if ((ctxt
->sax
->characters
!=
4603 ctxt
->sax
->ignorableWhitespace
) &&
4604 (*ctxt
->space
== -1))
4609 /* something really bad happened in the SAX callback */
4610 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4618 if (ctxt
->instate
== XML_PARSER_EOF
)
4625 * OK the segment is to be consumed as chars.
4627 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4628 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4629 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4630 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
, buf
, nbchar
);
4632 if (ctxt
->sax
->characters
!= NULL
)
4633 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4634 if ((ctxt
->sax
->characters
!= ctxt
->sax
->ignorableWhitespace
) &&
4635 (*ctxt
->space
== -1))
4640 if ((cur
!= 0) && (!IS_CHAR(cur
))) {
4641 /* Generate the error and skip the offending character */
4642 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4643 "PCDATA invalid Char value %d\n",
4650 * xmlParseExternalID:
4651 * @ctxt: an XML parser context
4652 * @publicID: a xmlChar** receiving PubidLiteral
4653 * @strict: indicate whether we should restrict parsing to only
4654 * production [75], see NOTE below
4656 * Parse an External ID or a Public ID
4658 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4659 * 'PUBLIC' S PubidLiteral S SystemLiteral
4661 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4662 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4664 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4666 * Returns the function returns SystemLiteral and in the second
4667 * case publicID receives PubidLiteral, is strict is off
4668 * it is possible to return NULL and have publicID set.
4672 xmlParseExternalID(xmlParserCtxtPtr ctxt
, xmlChar
**publicID
, int strict
) {
4673 xmlChar
*URI
= NULL
;
4678 if (CMP6(CUR_PTR
, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4680 if (SKIP_BLANKS
== 0) {
4681 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4682 "Space required after 'SYSTEM'\n");
4684 URI
= xmlParseSystemLiteral(ctxt
);
4686 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4688 } else if (CMP6(CUR_PTR
, 'P', 'U', 'B', 'L', 'I', 'C')) {
4690 if (SKIP_BLANKS
== 0) {
4691 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4692 "Space required after 'PUBLIC'\n");
4694 *publicID
= xmlParsePubidLiteral(ctxt
);
4695 if (*publicID
== NULL
) {
4696 xmlFatalErr(ctxt
, XML_ERR_PUBID_REQUIRED
, NULL
);
4700 * We don't handle [83] so "S SystemLiteral" is required.
4702 if (SKIP_BLANKS
== 0) {
4703 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4704 "Space required after the Public Identifier\n");
4708 * We handle [83] so we return immediately, if
4709 * "S SystemLiteral" is not detected. We skip blanks if no
4710 * system literal was found, but this is harmless since we must
4711 * be at the end of a NotationDecl.
4713 if (SKIP_BLANKS
== 0) return(NULL
);
4714 if ((CUR
!= '\'') && (CUR
!= '"')) return(NULL
);
4716 URI
= xmlParseSystemLiteral(ctxt
);
4718 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4725 * xmlParseCommentComplex:
4726 * @ctxt: an XML parser context
4727 * @buf: the already parsed part of the buffer
4728 * @len: number of bytes in the buffer
4729 * @size: allocated size of the buffer
4731 * Skip an XML (SGML) comment <!-- .... -->
4732 * The spec says that "For compatibility, the string "--" (double-hyphen)
4733 * must not occur within comments. "
4734 * This is the slow routine in case the accelerator for ascii didn't work
4736 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4739 xmlParseCommentComplex(xmlParserCtxtPtr ctxt
, xmlChar
*buf
,
4740 size_t len
, size_t size
) {
4747 inputid
= ctxt
->input
->id
;
4751 size
= XML_PARSER_BUFFER_SIZE
;
4752 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4754 xmlErrMemory(ctxt
, NULL
);
4758 GROW
; /* Assure there's enough input data */
4761 goto not_terminated
;
4763 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4764 "xmlParseComment: invalid xmlChar value %d\n",
4772 goto not_terminated
;
4774 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4775 "xmlParseComment: invalid xmlChar value %d\n",
4783 goto not_terminated
;
4784 while (IS_CHAR(cur
) && /* checked */
4786 (r
!= '-') || (q
!= '-'))) {
4787 if ((r
== '-') && (q
== '-')) {
4788 xmlFatalErr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
, NULL
);
4790 if ((len
> XML_MAX_TEXT_LENGTH
) &&
4791 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
4792 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4793 "Comment too big found", NULL
);
4797 if (len
+ 5 >= size
) {
4801 new_size
= size
* 2;
4802 new_buf
= (xmlChar
*) xmlRealloc(buf
, new_size
);
4803 if (new_buf
== NULL
) {
4805 xmlErrMemory(ctxt
, NULL
);
4811 COPY_BUF(ql
,buf
,len
,q
);
4822 if (ctxt
->instate
== XML_PARSER_EOF
) {
4837 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4838 "Comment not terminated \n<!--%.50s\n", buf
);
4839 } else if (!IS_CHAR(cur
)) {
4840 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4841 "xmlParseComment: invalid xmlChar value %d\n",
4844 if (inputid
!= ctxt
->input
->id
) {
4845 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4846 "Comment doesn't start and stop in the same"
4850 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4851 (!ctxt
->disableSAX
))
4852 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4857 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4858 "Comment not terminated\n", NULL
);
4865 * @ctxt: an XML parser context
4867 * Skip an XML (SGML) comment <!-- .... -->
4868 * The spec says that "For compatibility, the string "--" (double-hyphen)
4869 * must not occur within comments. "
4871 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4874 xmlParseComment(xmlParserCtxtPtr ctxt
) {
4875 xmlChar
*buf
= NULL
;
4876 size_t size
= XML_PARSER_BUFFER_SIZE
;
4878 xmlParserInputState state
;
4885 * Check that there is a comment right here.
4887 if ((RAW
!= '<') || (NXT(1) != '!') ||
4888 (NXT(2) != '-') || (NXT(3) != '-')) return;
4889 state
= ctxt
->instate
;
4890 ctxt
->instate
= XML_PARSER_COMMENT
;
4891 inputid
= ctxt
->input
->id
;
4897 * Accelerated common case where input don't need to be
4898 * modified before passing it to the handler.
4900 in
= ctxt
->input
->cur
;
4904 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4906 } while (*in
== 0xA);
4909 ccol
= ctxt
->input
->col
;
4910 while (((*in
> '-') && (*in
<= 0x7F)) ||
4911 ((*in
>= 0x20) && (*in
< '-')) ||
4916 ctxt
->input
->col
= ccol
;
4919 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4921 } while (*in
== 0xA);
4924 nbchar
= in
- ctxt
->input
->cur
;
4926 * save current set of data
4929 if ((ctxt
->sax
!= NULL
) &&
4930 (ctxt
->sax
->comment
!= NULL
)) {
4932 if ((*in
== '-') && (in
[1] == '-'))
4935 size
= XML_PARSER_BUFFER_SIZE
+ nbchar
;
4936 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4938 xmlErrMemory(ctxt
, NULL
);
4939 ctxt
->instate
= state
;
4943 } else if (len
+ nbchar
+ 1 >= size
) {
4945 size
+= len
+ nbchar
+ XML_PARSER_BUFFER_SIZE
;
4946 new_buf
= (xmlChar
*) xmlRealloc(buf
,
4947 size
* sizeof(xmlChar
));
4948 if (new_buf
== NULL
) {
4950 xmlErrMemory(ctxt
, NULL
);
4951 ctxt
->instate
= state
;
4956 memcpy(&buf
[len
], ctxt
->input
->cur
, nbchar
);
4961 if ((len
> XML_MAX_TEXT_LENGTH
) &&
4962 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
4963 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4964 "Comment too big found", NULL
);
4968 ctxt
->input
->cur
= in
;
4971 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4976 ctxt
->input
->cur
= in
;
4978 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4985 if (ctxt
->instate
== XML_PARSER_EOF
) {
4989 in
= ctxt
->input
->cur
;
4993 if (ctxt
->input
->id
!= inputid
) {
4994 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4995 "comment doesn't start and stop in the"
4999 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
5000 (!ctxt
->disableSAX
)) {
5002 ctxt
->sax
->comment(ctxt
->userData
, buf
);
5004 ctxt
->sax
->comment(ctxt
->userData
, BAD_CAST
"");
5008 if (ctxt
->instate
!= XML_PARSER_EOF
)
5009 ctxt
->instate
= state
;
5013 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5014 "Double hyphen within comment: "
5018 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5019 "Double hyphen within comment\n", NULL
);
5020 if (ctxt
->instate
== XML_PARSER_EOF
) {
5031 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09) || (*in
== 0x0a));
5032 xmlParseCommentComplex(ctxt
, buf
, len
, size
);
5033 ctxt
->instate
= state
;
5040 * @ctxt: an XML parser context
5042 * parse the name of a PI
5044 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5046 * Returns the PITarget name or NULL
5050 xmlParsePITarget(xmlParserCtxtPtr ctxt
) {
5051 const xmlChar
*name
;
5053 name
= xmlParseName(ctxt
);
5054 if ((name
!= NULL
) &&
5055 ((name
[0] == 'x') || (name
[0] == 'X')) &&
5056 ((name
[1] == 'm') || (name
[1] == 'M')) &&
5057 ((name
[2] == 'l') || (name
[2] == 'L'))) {
5059 if ((name
[0] == 'x') && (name
[1] == 'm') &&
5060 (name
[2] == 'l') && (name
[3] == 0)) {
5061 xmlFatalErrMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5062 "XML declaration allowed only at the start of the document\n");
5064 } else if (name
[3] == 0) {
5065 xmlFatalErr(ctxt
, XML_ERR_RESERVED_XML_NAME
, NULL
);
5069 if (xmlW3CPIs
[i
] == NULL
) break;
5070 if (xmlStrEqual(name
, (const xmlChar
*)xmlW3CPIs
[i
]))
5073 xmlWarningMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5074 "xmlParsePITarget: invalid name prefix 'xml'\n",
5077 if ((name
!= NULL
) && (xmlStrchr(name
, ':') != NULL
)) {
5078 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5079 "colons are forbidden from PI names '%s'\n", name
, NULL
, NULL
);
5084 #ifdef LIBXML_CATALOG_ENABLED
5086 * xmlParseCatalogPI:
5087 * @ctxt: an XML parser context
5088 * @catalog: the PI value string
5090 * parse an XML Catalog Processing Instruction.
5092 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5094 * Occurs only if allowed by the user and if happening in the Misc
5095 * part of the document before any doctype information
5096 * This will add the given catalog to the parsing context in order
5097 * to be used if there is a resolution need further down in the document
5101 xmlParseCatalogPI(xmlParserCtxtPtr ctxt
, const xmlChar
*catalog
) {
5102 xmlChar
*URL
= NULL
;
5103 const xmlChar
*tmp
, *base
;
5107 while (IS_BLANK_CH(*tmp
)) tmp
++;
5108 if (xmlStrncmp(tmp
, BAD_CAST
"catalog", 7))
5111 while (IS_BLANK_CH(*tmp
)) tmp
++;
5116 while (IS_BLANK_CH(*tmp
)) tmp
++;
5118 if ((marker
!= '\'') && (marker
!= '"'))
5122 while ((*tmp
!= 0) && (*tmp
!= marker
)) tmp
++;
5125 URL
= xmlStrndup(base
, tmp
- base
);
5127 while (IS_BLANK_CH(*tmp
)) tmp
++;
5132 ctxt
->catalogs
= xmlCatalogAddLocal(ctxt
->catalogs
, URL
);
5138 xmlWarningMsg(ctxt
, XML_WAR_CATALOG_PI
,
5139 "Catalog PI syntax error: %s\n",
5148 * @ctxt: an XML parser context
5150 * parse an XML Processing Instruction.
5152 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5154 * The processing is transferred to SAX once parsed.
5158 xmlParsePI(xmlParserCtxtPtr ctxt
) {
5159 xmlChar
*buf
= NULL
;
5161 size_t size
= XML_PARSER_BUFFER_SIZE
;
5163 const xmlChar
*target
;
5164 xmlParserInputState state
;
5167 if ((RAW
== '<') && (NXT(1) == '?')) {
5168 int inputid
= ctxt
->input
->id
;
5169 state
= ctxt
->instate
;
5170 ctxt
->instate
= XML_PARSER_PI
;
5172 * this is a Processing Instruction.
5178 * Parse the target name and check for special support like
5181 target
= xmlParsePITarget(ctxt
);
5182 if (target
!= NULL
) {
5183 if ((RAW
== '?') && (NXT(1) == '>')) {
5184 if (inputid
!= ctxt
->input
->id
) {
5185 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5186 "PI declaration doesn't start and stop in"
5187 " the same entity\n");
5194 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5195 (ctxt
->sax
->processingInstruction
!= NULL
))
5196 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5198 if (ctxt
->instate
!= XML_PARSER_EOF
)
5199 ctxt
->instate
= state
;
5202 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
5204 xmlErrMemory(ctxt
, NULL
);
5205 ctxt
->instate
= state
;
5208 if (SKIP_BLANKS
== 0) {
5209 xmlFatalErrMsgStr(ctxt
, XML_ERR_SPACE_REQUIRED
,
5210 "ParsePI: PI %s space expected\n", target
);
5213 while (IS_CHAR(cur
) && /* checked */
5214 ((cur
!= '?') || (NXT(1) != '>'))) {
5215 if (len
+ 5 >= size
) {
5217 size_t new_size
= size
* 2;
5218 tmp
= (xmlChar
*) xmlRealloc(buf
, new_size
);
5220 xmlErrMemory(ctxt
, NULL
);
5222 ctxt
->instate
= state
;
5232 if (ctxt
->instate
== XML_PARSER_EOF
) {
5237 if ((len
> XML_MAX_TEXT_LENGTH
) &&
5238 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
5239 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5240 "PI %s too big found", target
);
5242 ctxt
->instate
= state
;
5246 COPY_BUF(l
,buf
,len
,cur
);
5255 if ((len
> XML_MAX_TEXT_LENGTH
) &&
5256 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
5257 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5258 "PI %s too big found", target
);
5260 ctxt
->instate
= state
;
5265 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5266 "ParsePI: PI %s never end ...\n", target
);
5268 if (inputid
!= ctxt
->input
->id
) {
5269 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5270 "PI declaration doesn't start and stop in"
5271 " the same entity\n");
5275 #ifdef LIBXML_CATALOG_ENABLED
5276 if (((state
== XML_PARSER_MISC
) ||
5277 (state
== XML_PARSER_START
)) &&
5278 (xmlStrEqual(target
, XML_CATALOG_PI
))) {
5279 xmlCatalogAllow allow
= xmlCatalogGetDefaults();
5280 if ((allow
== XML_CATA_ALLOW_DOCUMENT
) ||
5281 (allow
== XML_CATA_ALLOW_ALL
))
5282 xmlParseCatalogPI(ctxt
, buf
);
5290 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5291 (ctxt
->sax
->processingInstruction
!= NULL
))
5292 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5297 xmlFatalErr(ctxt
, XML_ERR_PI_NOT_STARTED
, NULL
);
5299 if (ctxt
->instate
!= XML_PARSER_EOF
)
5300 ctxt
->instate
= state
;
5305 * xmlParseNotationDecl:
5306 * @ctxt: an XML parser context
5308 * parse a notation declaration
5310 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5312 * Hence there is actually 3 choices:
5313 * 'PUBLIC' S PubidLiteral
5314 * 'PUBLIC' S PubidLiteral S SystemLiteral
5315 * and 'SYSTEM' S SystemLiteral
5317 * See the NOTE on xmlParseExternalID().
5321 xmlParseNotationDecl(xmlParserCtxtPtr ctxt
) {
5322 const xmlChar
*name
;
5326 if (CMP10(CUR_PTR
, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5327 int inputid
= ctxt
->input
->id
;
5330 if (SKIP_BLANKS
== 0) {
5331 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5332 "Space required after '<!NOTATION'\n");
5336 name
= xmlParseName(ctxt
);
5338 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5341 if (xmlStrchr(name
, ':') != NULL
) {
5342 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5343 "colons are forbidden from notation names '%s'\n",
5346 if (SKIP_BLANKS
== 0) {
5347 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5348 "Space required after the NOTATION name'\n");
5355 Systemid
= xmlParseExternalID(ctxt
, &Pubid
, 0);
5359 if (inputid
!= ctxt
->input
->id
) {
5360 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5361 "Notation declaration doesn't start and stop"
5362 " in the same entity\n");
5365 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5366 (ctxt
->sax
->notationDecl
!= NULL
))
5367 ctxt
->sax
->notationDecl(ctxt
->userData
, name
, Pubid
, Systemid
);
5369 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5371 if (Systemid
!= NULL
) xmlFree(Systemid
);
5372 if (Pubid
!= NULL
) xmlFree(Pubid
);
5377 * xmlParseEntityDecl:
5378 * @ctxt: an XML parser context
5380 * parse <!ENTITY declarations
5382 * [70] EntityDecl ::= GEDecl | PEDecl
5384 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5386 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5388 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5390 * [74] PEDef ::= EntityValue | ExternalID
5392 * [76] NDataDecl ::= S 'NDATA' S Name
5394 * [ VC: Notation Declared ]
5395 * The Name must match the declared name of a notation.
5399 xmlParseEntityDecl(xmlParserCtxtPtr ctxt
) {
5400 const xmlChar
*name
= NULL
;
5401 xmlChar
*value
= NULL
;
5402 xmlChar
*URI
= NULL
, *literal
= NULL
;
5403 const xmlChar
*ndata
= NULL
;
5404 int isParameter
= 0;
5405 xmlChar
*orig
= NULL
;
5407 /* GROW; done in the caller */
5408 if (CMP8(CUR_PTR
, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5409 int inputid
= ctxt
->input
->id
;
5412 if (SKIP_BLANKS
== 0) {
5413 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5414 "Space required after '<!ENTITY'\n");
5419 if (SKIP_BLANKS
== 0) {
5420 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5421 "Space required after '%%'\n");
5426 name
= xmlParseName(ctxt
);
5428 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5429 "xmlParseEntityDecl: no name\n");
5432 if (xmlStrchr(name
, ':') != NULL
) {
5433 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5434 "colons are forbidden from entities names '%s'\n",
5437 if (SKIP_BLANKS
== 0) {
5438 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5439 "Space required after the entity name\n");
5442 ctxt
->instate
= XML_PARSER_ENTITY_DECL
;
5444 * handle the various case of definitions...
5447 if ((RAW
== '"') || (RAW
== '\'')) {
5448 value
= xmlParseEntityValue(ctxt
, &orig
);
5450 if ((ctxt
->sax
!= NULL
) &&
5451 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5452 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5453 XML_INTERNAL_PARAMETER_ENTITY
,
5457 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5458 if ((URI
== NULL
) && (literal
== NULL
)) {
5459 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5464 uri
= xmlParseURI((const char *) URI
);
5466 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5467 "Invalid URI: %s\n", URI
);
5469 * This really ought to be a well formedness error
5470 * but the XML Core WG decided otherwise c.f. issue
5471 * E26 of the XML erratas.
5474 if (uri
->fragment
!= NULL
) {
5476 * Okay this is foolish to block those but not
5479 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5481 if ((ctxt
->sax
!= NULL
) &&
5482 (!ctxt
->disableSAX
) &&
5483 (ctxt
->sax
->entityDecl
!= NULL
))
5484 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5485 XML_EXTERNAL_PARAMETER_ENTITY
,
5486 literal
, URI
, NULL
);
5493 if ((RAW
== '"') || (RAW
== '\'')) {
5494 value
= xmlParseEntityValue(ctxt
, &orig
);
5495 if ((ctxt
->sax
!= NULL
) &&
5496 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5497 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5498 XML_INTERNAL_GENERAL_ENTITY
,
5501 * For expat compatibility in SAX mode.
5503 if ((ctxt
->myDoc
== NULL
) ||
5504 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
5505 if (ctxt
->myDoc
== NULL
) {
5506 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5507 if (ctxt
->myDoc
== NULL
) {
5508 xmlErrMemory(ctxt
, "New Doc failed");
5511 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5513 if (ctxt
->myDoc
->intSubset
== NULL
)
5514 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5515 BAD_CAST
"fake", NULL
, NULL
);
5517 xmlSAX2EntityDecl(ctxt
, name
, XML_INTERNAL_GENERAL_ENTITY
,
5521 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5522 if ((URI
== NULL
) && (literal
== NULL
)) {
5523 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5528 uri
= xmlParseURI((const char *)URI
);
5530 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5531 "Invalid URI: %s\n", URI
);
5533 * This really ought to be a well formedness error
5534 * but the XML Core WG decided otherwise c.f. issue
5535 * E26 of the XML erratas.
5538 if (uri
->fragment
!= NULL
) {
5540 * Okay this is foolish to block those but not
5543 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5548 if ((RAW
!= '>') && (SKIP_BLANKS
== 0)) {
5549 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5550 "Space required before 'NDATA'\n");
5552 if (CMP5(CUR_PTR
, 'N', 'D', 'A', 'T', 'A')) {
5554 if (SKIP_BLANKS
== 0) {
5555 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5556 "Space required after 'NDATA'\n");
5558 ndata
= xmlParseName(ctxt
);
5559 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5560 (ctxt
->sax
->unparsedEntityDecl
!= NULL
))
5561 ctxt
->sax
->unparsedEntityDecl(ctxt
->userData
, name
,
5562 literal
, URI
, ndata
);
5564 if ((ctxt
->sax
!= NULL
) &&
5565 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5566 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5567 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5568 literal
, URI
, NULL
);
5570 * For expat compatibility in SAX mode.
5571 * assuming the entity replacement was asked for
5573 if ((ctxt
->replaceEntities
!= 0) &&
5574 ((ctxt
->myDoc
== NULL
) ||
5575 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
)))) {
5576 if (ctxt
->myDoc
== NULL
) {
5577 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5578 if (ctxt
->myDoc
== NULL
) {
5579 xmlErrMemory(ctxt
, "New Doc failed");
5582 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5585 if (ctxt
->myDoc
->intSubset
== NULL
)
5586 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5587 BAD_CAST
"fake", NULL
, NULL
);
5588 xmlSAX2EntityDecl(ctxt
, name
,
5589 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5590 literal
, URI
, NULL
);
5595 if (ctxt
->instate
== XML_PARSER_EOF
)
5599 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
5600 "xmlParseEntityDecl: entity %s not terminated\n", name
);
5601 xmlHaltParser(ctxt
);
5603 if (inputid
!= ctxt
->input
->id
) {
5604 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5605 "Entity declaration doesn't start and stop in"
5606 " the same entity\n");
5612 * Ugly mechanism to save the raw entity value.
5614 xmlEntityPtr cur
= NULL
;
5617 if ((ctxt
->sax
!= NULL
) &&
5618 (ctxt
->sax
->getParameterEntity
!= NULL
))
5619 cur
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
5621 if ((ctxt
->sax
!= NULL
) &&
5622 (ctxt
->sax
->getEntity
!= NULL
))
5623 cur
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
5624 if ((cur
== NULL
) && (ctxt
->userData
==ctxt
)) {
5625 cur
= xmlSAX2GetEntity(ctxt
, name
);
5628 if ((cur
!= NULL
) && (cur
->orig
== NULL
)) {
5635 if (value
!= NULL
) xmlFree(value
);
5636 if (URI
!= NULL
) xmlFree(URI
);
5637 if (literal
!= NULL
) xmlFree(literal
);
5638 if (orig
!= NULL
) xmlFree(orig
);
5643 * xmlParseDefaultDecl:
5644 * @ctxt: an XML parser context
5645 * @value: Receive a possible fixed default value for the attribute
5647 * Parse an attribute default declaration
5649 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5651 * [ VC: Required Attribute ]
5652 * if the default declaration is the keyword #REQUIRED, then the
5653 * attribute must be specified for all elements of the type in the
5654 * attribute-list declaration.
5656 * [ VC: Attribute Default Legal ]
5657 * The declared default value must meet the lexical constraints of
5658 * the declared attribute type c.f. xmlValidateAttributeDecl()
5660 * [ VC: Fixed Attribute Default ]
5661 * if an attribute has a default value declared with the #FIXED
5662 * keyword, instances of that attribute must match the default value.
5664 * [ WFC: No < in Attribute Values ]
5665 * handled in xmlParseAttValue()
5667 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5668 * or XML_ATTRIBUTE_FIXED.
5672 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
5677 if (CMP9(CUR_PTR
, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5679 return(XML_ATTRIBUTE_REQUIRED
);
5681 if (CMP8(CUR_PTR
, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5683 return(XML_ATTRIBUTE_IMPLIED
);
5685 val
= XML_ATTRIBUTE_NONE
;
5686 if (CMP6(CUR_PTR
, '#', 'F', 'I', 'X', 'E', 'D')) {
5688 val
= XML_ATTRIBUTE_FIXED
;
5689 if (SKIP_BLANKS
== 0) {
5690 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5691 "Space required after '#FIXED'\n");
5694 ret
= xmlParseAttValue(ctxt
);
5695 ctxt
->instate
= XML_PARSER_DTD
;
5697 xmlFatalErrMsg(ctxt
, (xmlParserErrors
)ctxt
->errNo
,
5698 "Attribute default value declaration error\n");
5705 * xmlParseNotationType:
5706 * @ctxt: an XML parser context
5708 * parse an Notation attribute type.
5710 * Note: the leading 'NOTATION' S part has already being parsed...
5712 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5714 * [ VC: Notation Attributes ]
5715 * Values of this type must match one of the notation names included
5716 * in the declaration; all notation names in the declaration must be declared.
5718 * Returns: the notation attribute tree built while parsing
5722 xmlParseNotationType(xmlParserCtxtPtr ctxt
) {
5723 const xmlChar
*name
;
5724 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5727 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5734 name
= xmlParseName(ctxt
);
5736 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5737 "Name expected in NOTATION declaration\n");
5738 xmlFreeEnumeration(ret
);
5742 while (tmp
!= NULL
) {
5743 if (xmlStrEqual(name
, tmp
->name
)) {
5744 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5745 "standalone: attribute notation value token %s duplicated\n",
5747 if (!xmlDictOwns(ctxt
->dict
, name
))
5748 xmlFree((xmlChar
*) name
);
5754 cur
= xmlCreateEnumeration(name
);
5756 xmlFreeEnumeration(ret
);
5759 if (last
== NULL
) ret
= last
= cur
;
5766 } while (RAW
== '|');
5768 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5769 xmlFreeEnumeration(ret
);
5777 * xmlParseEnumerationType:
5778 * @ctxt: an XML parser context
5780 * parse an Enumeration attribute type.
5782 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5784 * [ VC: Enumeration ]
5785 * Values of this type must match one of the Nmtoken tokens in
5788 * Returns: the enumeration attribute tree built while parsing
5792 xmlParseEnumerationType(xmlParserCtxtPtr ctxt
) {
5794 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5797 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_STARTED
, NULL
);
5804 name
= xmlParseNmtoken(ctxt
);
5806 xmlFatalErr(ctxt
, XML_ERR_NMTOKEN_REQUIRED
, NULL
);
5810 while (tmp
!= NULL
) {
5811 if (xmlStrEqual(name
, tmp
->name
)) {
5812 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5813 "standalone: attribute enumeration value token %s duplicated\n",
5815 if (!xmlDictOwns(ctxt
->dict
, name
))
5822 cur
= xmlCreateEnumeration(name
);
5823 if (!xmlDictOwns(ctxt
->dict
, name
))
5826 xmlFreeEnumeration(ret
);
5829 if (last
== NULL
) ret
= last
= cur
;
5836 } while (RAW
== '|');
5838 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_FINISHED
, NULL
);
5846 * xmlParseEnumeratedType:
5847 * @ctxt: an XML parser context
5848 * @tree: the enumeration tree built while parsing
5850 * parse an Enumerated attribute type.
5852 * [57] EnumeratedType ::= NotationType | Enumeration
5854 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5857 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5861 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5862 if (CMP8(CUR_PTR
, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5864 if (SKIP_BLANKS
== 0) {
5865 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5866 "Space required after 'NOTATION'\n");
5869 *tree
= xmlParseNotationType(ctxt
);
5870 if (*tree
== NULL
) return(0);
5871 return(XML_ATTRIBUTE_NOTATION
);
5873 *tree
= xmlParseEnumerationType(ctxt
);
5874 if (*tree
== NULL
) return(0);
5875 return(XML_ATTRIBUTE_ENUMERATION
);
5879 * xmlParseAttributeType:
5880 * @ctxt: an XML parser context
5881 * @tree: the enumeration tree built while parsing
5883 * parse the Attribute list def for an element
5885 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5887 * [55] StringType ::= 'CDATA'
5889 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5890 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5892 * Validity constraints for attribute values syntax are checked in
5893 * xmlValidateAttributeValue()
5896 * Values of type ID must match the Name production. A name must not
5897 * appear more than once in an XML document as a value of this type;
5898 * i.e., ID values must uniquely identify the elements which bear them.
5900 * [ VC: One ID per Element Type ]
5901 * No element type may have more than one ID attribute specified.
5903 * [ VC: ID Attribute Default ]
5904 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5907 * Values of type IDREF must match the Name production, and values
5908 * of type IDREFS must match Names; each IDREF Name must match the value
5909 * of an ID attribute on some element in the XML document; i.e. IDREF
5910 * values must match the value of some ID attribute.
5912 * [ VC: Entity Name ]
5913 * Values of type ENTITY must match the Name production, values
5914 * of type ENTITIES must match Names; each Entity Name must match the
5915 * name of an unparsed entity declared in the DTD.
5917 * [ VC: Name Token ]
5918 * Values of type NMTOKEN must match the Nmtoken production; values
5919 * of type NMTOKENS must match Nmtokens.
5921 * Returns the attribute type
5924 xmlParseAttributeType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5926 if (CMP5(CUR_PTR
, 'C', 'D', 'A', 'T', 'A')) {
5928 return(XML_ATTRIBUTE_CDATA
);
5929 } else if (CMP6(CUR_PTR
, 'I', 'D', 'R', 'E', 'F', 'S')) {
5931 return(XML_ATTRIBUTE_IDREFS
);
5932 } else if (CMP5(CUR_PTR
, 'I', 'D', 'R', 'E', 'F')) {
5934 return(XML_ATTRIBUTE_IDREF
);
5935 } else if ((RAW
== 'I') && (NXT(1) == 'D')) {
5937 return(XML_ATTRIBUTE_ID
);
5938 } else if (CMP6(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5940 return(XML_ATTRIBUTE_ENTITY
);
5941 } else if (CMP8(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5943 return(XML_ATTRIBUTE_ENTITIES
);
5944 } else if (CMP8(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5946 return(XML_ATTRIBUTE_NMTOKENS
);
5947 } else if (CMP7(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5949 return(XML_ATTRIBUTE_NMTOKEN
);
5951 return(xmlParseEnumeratedType(ctxt
, tree
));
5955 * xmlParseAttributeListDecl:
5956 * @ctxt: an XML parser context
5958 * : parse the Attribute list def for an element
5960 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5962 * [53] AttDef ::= S Name S AttType S DefaultDecl
5966 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt
) {
5967 const xmlChar
*elemName
;
5968 const xmlChar
*attrName
;
5969 xmlEnumerationPtr tree
;
5971 if (CMP9(CUR_PTR
, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5972 int inputid
= ctxt
->input
->id
;
5975 if (SKIP_BLANKS
== 0) {
5976 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5977 "Space required after '<!ATTLIST'\n");
5979 elemName
= xmlParseName(ctxt
);
5980 if (elemName
== NULL
) {
5981 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5982 "ATTLIST: no name for Element\n");
5987 while ((RAW
!= '>') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
5990 xmlChar
*defaultValue
= NULL
;
5994 attrName
= xmlParseName(ctxt
);
5995 if (attrName
== NULL
) {
5996 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5997 "ATTLIST: no name for Attribute\n");
6001 if (SKIP_BLANKS
== 0) {
6002 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6003 "Space required after the attribute name\n");
6007 type
= xmlParseAttributeType(ctxt
, &tree
);
6013 if (SKIP_BLANKS
== 0) {
6014 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6015 "Space required after the attribute type\n");
6017 xmlFreeEnumeration(tree
);
6021 def
= xmlParseDefaultDecl(ctxt
, &defaultValue
);
6023 if (defaultValue
!= NULL
)
6024 xmlFree(defaultValue
);
6026 xmlFreeEnumeration(tree
);
6029 if ((type
!= XML_ATTRIBUTE_CDATA
) && (defaultValue
!= NULL
))
6030 xmlAttrNormalizeSpace(defaultValue
, defaultValue
);
6034 if (SKIP_BLANKS
== 0) {
6035 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6036 "Space required after the attribute default value\n");
6037 if (defaultValue
!= NULL
)
6038 xmlFree(defaultValue
);
6040 xmlFreeEnumeration(tree
);
6044 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6045 (ctxt
->sax
->attributeDecl
!= NULL
))
6046 ctxt
->sax
->attributeDecl(ctxt
->userData
, elemName
, attrName
,
6047 type
, def
, defaultValue
, tree
);
6048 else if (tree
!= NULL
)
6049 xmlFreeEnumeration(tree
);
6051 if ((ctxt
->sax2
) && (defaultValue
!= NULL
) &&
6052 (def
!= XML_ATTRIBUTE_IMPLIED
) &&
6053 (def
!= XML_ATTRIBUTE_REQUIRED
)) {
6054 xmlAddDefAttrs(ctxt
, elemName
, attrName
, defaultValue
);
6057 xmlAddSpecialAttr(ctxt
, elemName
, attrName
, type
);
6059 if (defaultValue
!= NULL
)
6060 xmlFree(defaultValue
);
6064 if (inputid
!= ctxt
->input
->id
) {
6065 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6066 "Attribute list declaration doesn't start and"
6067 " stop in the same entity\n");
6075 * xmlParseElementMixedContentDecl:
6076 * @ctxt: an XML parser context
6077 * @inputchk: the input used for the current entity, needed for boundary checks
6079 * parse the declaration for a Mixed Element content
6080 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6082 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6083 * '(' S? '#PCDATA' S? ')'
6085 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6087 * [ VC: No Duplicate Types ]
6088 * The same name must not appear more than once in a single
6089 * mixed-content declaration.
6091 * returns: the list of the xmlElementContentPtr describing the element choices
6093 xmlElementContentPtr
6094 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6095 xmlElementContentPtr ret
= NULL
, cur
= NULL
, n
;
6096 const xmlChar
*elem
= NULL
;
6099 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6104 if (ctxt
->input
->id
!= inputchk
) {
6105 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6106 "Element content declaration doesn't start and"
6107 " stop in the same entity\n");
6110 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6114 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6119 if ((RAW
== '(') || (RAW
== '|')) {
6120 ret
= cur
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6121 if (ret
== NULL
) return(NULL
);
6123 while ((RAW
== '|') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6126 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6128 xmlFreeDocElementContent(ctxt
->myDoc
, cur
);
6136 n
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6138 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6141 n
->c1
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6150 elem
= xmlParseName(ctxt
);
6152 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6153 "xmlParseElementMixedContentDecl : Name expected\n");
6154 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6160 if ((RAW
== ')') && (NXT(1) == '*')) {
6162 cur
->c2
= xmlNewDocElementContent(ctxt
->myDoc
, elem
,
6163 XML_ELEMENT_CONTENT_ELEMENT
);
6164 if (cur
->c2
!= NULL
)
6165 cur
->c2
->parent
= cur
;
6168 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6169 if (ctxt
->input
->id
!= inputchk
) {
6170 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6171 "Element content declaration doesn't start and"
6172 " stop in the same entity\n");
6176 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6177 xmlFatalErr(ctxt
, XML_ERR_MIXED_NOT_STARTED
, NULL
);
6182 xmlFatalErr(ctxt
, XML_ERR_PCDATA_REQUIRED
, NULL
);
6188 * xmlParseElementChildrenContentDeclPriv:
6189 * @ctxt: an XML parser context
6190 * @inputchk: the input used for the current entity, needed for boundary checks
6191 * @depth: the level of recursion
6193 * parse the declaration for a Mixed Element content
6194 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6197 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6199 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6201 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6203 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6205 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6206 * TODO Parameter-entity replacement text must be properly nested
6207 * with parenthesized groups. That is to say, if either of the
6208 * opening or closing parentheses in a choice, seq, or Mixed
6209 * construct is contained in the replacement text for a parameter
6210 * entity, both must be contained in the same replacement text. For
6211 * interoperability, if a parameter-entity reference appears in a
6212 * choice, seq, or Mixed construct, its replacement text should not
6213 * be empty, and neither the first nor last non-blank character of
6214 * the replacement text should be a connector (| or ,).
6216 * Returns the tree of xmlElementContentPtr describing the element
6219 static xmlElementContentPtr
6220 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt
, int inputchk
,
6222 xmlElementContentPtr ret
= NULL
, cur
= NULL
, last
= NULL
, op
= NULL
;
6223 const xmlChar
*elem
;
6226 if (((depth
> 128) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
6228 xmlFatalErrMsgInt(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
,
6229 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6236 int inputid
= ctxt
->input
->id
;
6238 /* Recurse on first child */
6241 cur
= ret
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6248 elem
= xmlParseName(ctxt
);
6250 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6253 cur
= ret
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6255 xmlErrMemory(ctxt
, NULL
);
6260 cur
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6262 } else if (RAW
== '*') {
6263 cur
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6265 } else if (RAW
== '+') {
6266 cur
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6269 cur
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6275 while ((RAW
!= ')') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6277 * Each loop we parse one separator and one element.
6280 if (type
== 0) type
= CUR
;
6283 * Detect "Name | Name , Name" error
6285 else if (type
!= CUR
) {
6286 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6287 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6289 if ((last
!= NULL
) && (last
!= ret
))
6290 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6292 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6297 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_SEQ
);
6299 if ((last
!= NULL
) && (last
!= ret
))
6300 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6301 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6319 } else if (RAW
== '|') {
6320 if (type
== 0) type
= CUR
;
6323 * Detect "Name , Name | Name" error
6325 else if (type
!= CUR
) {
6326 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6327 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6329 if ((last
!= NULL
) && (last
!= ret
))
6330 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6332 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6337 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6339 if ((last
!= NULL
) && (last
!= ret
))
6340 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6342 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6361 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
, NULL
);
6362 if ((last
!= NULL
) && (last
!= ret
))
6363 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6365 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6372 int inputid
= ctxt
->input
->id
;
6373 /* Recurse on second child */
6376 last
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6380 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6385 elem
= xmlParseName(ctxt
);
6387 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6389 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6392 last
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6395 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6399 last
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6401 } else if (RAW
== '*') {
6402 last
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6404 } else if (RAW
== '+') {
6405 last
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6408 last
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6414 if ((cur
!= NULL
) && (last
!= NULL
)) {
6419 if (ctxt
->input
->id
!= inputchk
) {
6420 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6421 "Element content declaration doesn't start and stop in"
6422 " the same entity\n");
6427 if ((ret
->ocur
== XML_ELEMENT_CONTENT_PLUS
) ||
6428 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6429 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6431 ret
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6434 } else if (RAW
== '*') {
6436 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6439 * Some normalization:
6440 * (a | b* | c?)* == (a | b | c)*
6442 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6443 if ((cur
->c1
!= NULL
) &&
6444 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6445 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6446 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6447 if ((cur
->c2
!= NULL
) &&
6448 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6449 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6450 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6455 } else if (RAW
== '+') {
6459 if ((ret
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6460 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6461 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6463 ret
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6465 * Some normalization:
6466 * (a | b*)+ == (a | b)*
6467 * (a | b?)+ == (a | b)*
6469 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6470 if ((cur
->c1
!= NULL
) &&
6471 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6472 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6473 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6476 if ((cur
->c2
!= NULL
) &&
6477 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6478 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6479 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6485 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6493 * xmlParseElementChildrenContentDecl:
6494 * @ctxt: an XML parser context
6495 * @inputchk: the input used for the current entity, needed for boundary checks
6497 * parse the declaration for a Mixed Element content
6498 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6500 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6502 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6504 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6506 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6508 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6509 * TODO Parameter-entity replacement text must be properly nested
6510 * with parenthesized groups. That is to say, if either of the
6511 * opening or closing parentheses in a choice, seq, or Mixed
6512 * construct is contained in the replacement text for a parameter
6513 * entity, both must be contained in the same replacement text. For
6514 * interoperability, if a parameter-entity reference appears in a
6515 * choice, seq, or Mixed construct, its replacement text should not
6516 * be empty, and neither the first nor last non-blank character of
6517 * the replacement text should be a connector (| or ,).
6519 * Returns the tree of xmlElementContentPtr describing the element
6522 xmlElementContentPtr
6523 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6524 /* stub left for API/ABI compat */
6525 return(xmlParseElementChildrenContentDeclPriv(ctxt
, inputchk
, 1));
6529 * xmlParseElementContentDecl:
6530 * @ctxt: an XML parser context
6531 * @name: the name of the element being defined.
6532 * @result: the Element Content pointer will be stored here if any
6534 * parse the declaration for an Element content either Mixed or Children,
6535 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6537 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6539 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6543 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt
, const xmlChar
*name
,
6544 xmlElementContentPtr
*result
) {
6546 xmlElementContentPtr tree
= NULL
;
6547 int inputid
= ctxt
->input
->id
;
6553 xmlFatalErrMsgStr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6554 "xmlParseElementContentDecl : %s '(' expected\n", name
);
6559 if (ctxt
->instate
== XML_PARSER_EOF
)
6562 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6563 tree
= xmlParseElementMixedContentDecl(ctxt
, inputid
);
6564 res
= XML_ELEMENT_TYPE_MIXED
;
6566 tree
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
, 1);
6567 res
= XML_ELEMENT_TYPE_ELEMENT
;
6575 * xmlParseElementDecl:
6576 * @ctxt: an XML parser context
6578 * parse an Element declaration.
6580 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6582 * [ VC: Unique Element Type Declaration ]
6583 * No element type may be declared more than once
6585 * Returns the type of the element, or -1 in case of error
6588 xmlParseElementDecl(xmlParserCtxtPtr ctxt
) {
6589 const xmlChar
*name
;
6591 xmlElementContentPtr content
= NULL
;
6593 /* GROW; done in the caller */
6594 if (CMP9(CUR_PTR
, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6595 int inputid
= ctxt
->input
->id
;
6598 if (SKIP_BLANKS
== 0) {
6599 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6600 "Space required after 'ELEMENT'\n");
6603 name
= xmlParseName(ctxt
);
6605 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6606 "xmlParseElementDecl: no name for Element\n");
6609 if (SKIP_BLANKS
== 0) {
6610 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6611 "Space required after the element name\n");
6613 if (CMP5(CUR_PTR
, 'E', 'M', 'P', 'T', 'Y')) {
6616 * Element must always be empty.
6618 ret
= XML_ELEMENT_TYPE_EMPTY
;
6619 } else if ((RAW
== 'A') && (NXT(1) == 'N') &&
6623 * Element is a generic container.
6625 ret
= XML_ELEMENT_TYPE_ANY
;
6626 } else if (RAW
== '(') {
6627 ret
= xmlParseElementContentDecl(ctxt
, name
, &content
);
6630 * [ WFC: PEs in Internal Subset ] error handling.
6632 if ((RAW
== '%') && (ctxt
->external
== 0) &&
6633 (ctxt
->inputNr
== 1)) {
6634 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_IN_INT_SUBSET
,
6635 "PEReference: forbidden within markup decl in internal subset\n");
6637 xmlFatalErrMsg(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6638 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6646 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
6647 if (content
!= NULL
) {
6648 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6651 if (inputid
!= ctxt
->input
->id
) {
6652 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6653 "Element declaration doesn't start and stop in"
6654 " the same entity\n");
6658 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6659 (ctxt
->sax
->elementDecl
!= NULL
)) {
6660 if (content
!= NULL
)
6661 content
->parent
= NULL
;
6662 ctxt
->sax
->elementDecl(ctxt
->userData
, name
, ret
,
6664 if ((content
!= NULL
) && (content
->parent
== NULL
)) {
6666 * this is a trick: if xmlAddElementDecl is called,
6667 * instead of copying the full tree it is plugged directly
6668 * if called from the parser. Avoid duplicating the
6669 * interfaces or change the API/ABI
6671 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6673 } else if (content
!= NULL
) {
6674 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6682 * xmlParseConditionalSections
6683 * @ctxt: an XML parser context
6685 * [61] conditionalSect ::= includeSect | ignoreSect
6686 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6687 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6688 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6689 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6693 xmlParseConditionalSections(xmlParserCtxtPtr ctxt
) {
6694 int *inputIds
= NULL
;
6695 size_t inputIdsSize
= 0;
6698 while (ctxt
->instate
!= XML_PARSER_EOF
) {
6699 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6700 int id
= ctxt
->input
->id
;
6705 if (CMP7(CUR_PTR
, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6709 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6710 xmlHaltParser(ctxt
);
6713 if (ctxt
->input
->id
!= id
) {
6714 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6715 "All markup of the conditional section is"
6716 " not in the same entity\n");
6720 if (inputIdsSize
<= depth
) {
6723 inputIdsSize
= (inputIdsSize
== 0 ? 4 : inputIdsSize
* 2);
6724 tmp
= (int *) xmlRealloc(inputIds
,
6725 inputIdsSize
* sizeof(int));
6727 xmlErrMemory(ctxt
, NULL
);
6732 inputIds
[depth
] = id
;
6734 } else if (CMP6(CUR_PTR
, 'I', 'G', 'N', 'O', 'R', 'E')) {
6736 xmlParserInputState instate
;
6737 size_t ignoreDepth
= 0;
6742 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6743 xmlHaltParser(ctxt
);
6746 if (ctxt
->input
->id
!= id
) {
6747 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6748 "All markup of the conditional section is"
6749 " not in the same entity\n");
6754 * Parse up to the end of the conditional section but disable
6755 * SAX event generating DTD building in the meantime
6757 state
= ctxt
->disableSAX
;
6758 instate
= ctxt
->instate
;
6759 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6760 ctxt
->instate
= XML_PARSER_IGNORE
;
6763 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6766 /* Check for integer overflow */
6767 if (ignoreDepth
== 0) {
6768 xmlErrMemory(ctxt
, NULL
);
6771 } else if ((RAW
== ']') && (NXT(1) == ']') &&
6773 if (ignoreDepth
== 0)
6782 ctxt
->disableSAX
= state
;
6783 ctxt
->instate
= instate
;
6786 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_NOT_FINISHED
, NULL
);
6789 if (ctxt
->input
->id
!= id
) {
6790 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6791 "All markup of the conditional section is"
6792 " not in the same entity\n");
6796 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID_KEYWORD
, NULL
);
6797 xmlHaltParser(ctxt
);
6800 } else if ((depth
> 0) &&
6801 (RAW
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6803 if (ctxt
->input
->id
!= inputIds
[depth
]) {
6804 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6805 "All markup of the conditional section is not"
6806 " in the same entity\n");
6810 int id
= ctxt
->input
->id
;
6811 unsigned long cons
= CUR_CONSUMED
;
6813 xmlParseMarkupDecl(ctxt
);
6815 if ((id
== ctxt
->input
->id
) && (cons
== CUR_CONSUMED
)) {
6816 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6817 xmlHaltParser(ctxt
);
6834 * xmlParseMarkupDecl:
6835 * @ctxt: an XML parser context
6837 * parse Markup declarations
6839 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6840 * NotationDecl | PI | Comment
6842 * [ VC: Proper Declaration/PE Nesting ]
6843 * Parameter-entity replacement text must be properly nested with
6844 * markup declarations. That is to say, if either the first character
6845 * or the last character of a markup declaration (markupdecl above) is
6846 * contained in the replacement text for a parameter-entity reference,
6847 * both must be contained in the same replacement text.
6849 * [ WFC: PEs in Internal Subset ]
6850 * In the internal DTD subset, parameter-entity references can occur
6851 * only where markup declarations can occur, not within markup declarations.
6852 * (This does not apply to references that occur in external parameter
6853 * entities or to the external subset.)
6856 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt
) {
6859 if (NXT(1) == '!') {
6863 xmlParseElementDecl(ctxt
);
6864 else if (NXT(3) == 'N')
6865 xmlParseEntityDecl(ctxt
);
6868 xmlParseAttributeListDecl(ctxt
);
6871 xmlParseNotationDecl(ctxt
);
6874 xmlParseComment(ctxt
);
6877 /* there is an error but it will be detected later */
6880 } else if (NXT(1) == '?') {
6886 * detect requirement to exit there and act accordingly
6887 * and avoid having instate overridden later on
6889 if (ctxt
->instate
== XML_PARSER_EOF
)
6892 ctxt
->instate
= XML_PARSER_DTD
;
6897 * @ctxt: an XML parser context
6899 * parse an XML declaration header for external entities
6901 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6905 xmlParseTextDecl(xmlParserCtxtPtr ctxt
) {
6907 const xmlChar
*encoding
;
6911 * We know that '<?xml' is here.
6913 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6916 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_STARTED
, NULL
);
6920 /* Avoid expansion of parameter entities when skipping blanks. */
6921 oldstate
= ctxt
->instate
;
6922 ctxt
->instate
= XML_PARSER_START
;
6924 if (SKIP_BLANKS
== 0) {
6925 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6926 "Space needed after '<?xml'\n");
6930 * We may have the VersionInfo here.
6932 version
= xmlParseVersionInfo(ctxt
);
6933 if (version
== NULL
)
6934 version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
6936 if (SKIP_BLANKS
== 0) {
6937 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6938 "Space needed here\n");
6941 ctxt
->input
->version
= version
;
6944 * We must have the encoding declaration
6946 encoding
= xmlParseEncodingDecl(ctxt
);
6947 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
6949 * The XML REC instructs us to stop parsing right here
6951 ctxt
->instate
= oldstate
;
6954 if ((encoding
== NULL
) && (ctxt
->errNo
== XML_ERR_OK
)) {
6955 xmlFatalErrMsg(ctxt
, XML_ERR_MISSING_ENCODING
,
6956 "Missing encoding in text declaration\n");
6960 if ((RAW
== '?') && (NXT(1) == '>')) {
6962 } else if (RAW
== '>') {
6963 /* Deprecated old WD ... */
6964 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6967 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6968 MOVETO_ENDTAG(CUR_PTR
);
6972 ctxt
->instate
= oldstate
;
6976 * xmlParseExternalSubset:
6977 * @ctxt: an XML parser context
6978 * @ExternalID: the external identifier
6979 * @SystemID: the system identifier (or URL)
6981 * parse Markup declarations from an external subset
6983 * [30] extSubset ::= textDecl? extSubsetDecl
6985 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6988 xmlParseExternalSubset(xmlParserCtxtPtr ctxt
, const xmlChar
*ExternalID
,
6989 const xmlChar
*SystemID
) {
6990 xmlDetectSAX2(ctxt
);
6993 if ((ctxt
->encoding
== NULL
) &&
6994 (ctxt
->input
->end
- ctxt
->input
->cur
>= 4)) {
6996 xmlCharEncoding enc
;
7002 enc
= xmlDetectCharEncoding(start
, 4);
7003 if (enc
!= XML_CHAR_ENCODING_NONE
)
7004 xmlSwitchEncoding(ctxt
, enc
);
7007 if (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) {
7008 xmlParseTextDecl(ctxt
);
7009 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7011 * The XML REC instructs us to stop parsing right here
7013 xmlHaltParser(ctxt
);
7017 if (ctxt
->myDoc
== NULL
) {
7018 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
7019 if (ctxt
->myDoc
== NULL
) {
7020 xmlErrMemory(ctxt
, "New Doc failed");
7023 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
7025 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->myDoc
->intSubset
== NULL
))
7026 xmlCreateIntSubset(ctxt
->myDoc
, NULL
, ExternalID
, SystemID
);
7028 ctxt
->instate
= XML_PARSER_DTD
;
7031 while (((RAW
== '<') && (NXT(1) == '?')) ||
7032 ((RAW
== '<') && (NXT(1) == '!')) ||
7034 int id
= ctxt
->input
->id
;
7035 unsigned long cons
= CUR_CONSUMED
;
7038 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7039 xmlParseConditionalSections(ctxt
);
7041 xmlParseMarkupDecl(ctxt
);
7044 if ((id
== ctxt
->input
->id
) && (cons
== CUR_CONSUMED
)) {
7045 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7051 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7057 * xmlParseReference:
7058 * @ctxt: an XML parser context
7060 * parse and handle entity references in content, depending on the SAX
7061 * interface, this may end-up in a call to character() if this is a
7062 * CharRef, a predefined entity, if there is no reference() callback.
7063 * or if the parser was asked to switch to that mode.
7065 * [67] Reference ::= EntityRef | CharRef
7068 xmlParseReference(xmlParserCtxtPtr ctxt
) {
7072 xmlNodePtr list
= NULL
;
7073 xmlParserErrors ret
= XML_ERR_OK
;
7080 * Simple case of a CharRef
7082 if (NXT(1) == '#') {
7086 int value
= xmlParseCharRef(ctxt
);
7090 if (ctxt
->charset
!= XML_CHAR_ENCODING_UTF8
) {
7092 * So we are using non-UTF-8 buffers
7093 * Check that the char fit on 8bits, if not
7094 * generate a CharRef.
7096 if (value
<= 0xFF) {
7099 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7100 (!ctxt
->disableSAX
))
7101 ctxt
->sax
->characters(ctxt
->userData
, out
, 1);
7103 if ((hex
== 'x') || (hex
== 'X'))
7104 snprintf((char *)out
, sizeof(out
), "#x%X", value
);
7106 snprintf((char *)out
, sizeof(out
), "#%d", value
);
7107 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7108 (!ctxt
->disableSAX
))
7109 ctxt
->sax
->reference(ctxt
->userData
, out
);
7113 * Just encode the value in UTF-8
7115 COPY_BUF(0 ,out
, i
, value
);
7117 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7118 (!ctxt
->disableSAX
))
7119 ctxt
->sax
->characters(ctxt
->userData
, out
, i
);
7125 * We are seeing an entity reference
7127 ent
= xmlParseEntityRef(ctxt
);
7128 if (ent
== NULL
) return;
7129 if (!ctxt
->wellFormed
)
7131 was_checked
= ent
->checked
;
7133 /* special case of predefined entities */
7134 if ((ent
->name
== NULL
) ||
7135 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
7137 if (val
== NULL
) return;
7139 * inline the entity.
7141 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7142 (!ctxt
->disableSAX
))
7143 ctxt
->sax
->characters(ctxt
->userData
, val
, xmlStrlen(val
));
7148 * The first reference to the entity trigger a parsing phase
7149 * where the ent->children is filled with the result from
7151 * Note: external parsed entities will not be loaded, it is not
7152 * required for a non-validating parser, unless the parsing option
7153 * of validating, or substituting entities were given. Doing so is
7154 * far more secure as the parser will only process data coming from
7155 * the document entity by default.
7157 if (((ent
->checked
== 0) ||
7158 ((ent
->children
== NULL
) && (ctxt
->options
& XML_PARSE_NOENT
))) &&
7159 ((ent
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
) ||
7160 (ctxt
->options
& (XML_PARSE_NOENT
| XML_PARSE_DTDVALID
)))) {
7161 unsigned long oldnbent
= ctxt
->nbentities
, diff
;
7164 * This is a bit hackish but this seems the best
7165 * way to make sure both SAX and DOM entity support
7169 if (ctxt
->userData
== ctxt
)
7172 user_data
= ctxt
->userData
;
7175 * Check that this entity is well formed
7176 * 4.3.2: An internal general parsed entity is well-formed
7177 * if its replacement text matches the production labeled
7180 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7182 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
, ent
->content
,
7186 } else if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7188 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
, ctxt
->sax
,
7189 user_data
, ctxt
->depth
, ent
->URI
,
7190 ent
->ExternalID
, &list
);
7193 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7194 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7195 "invalid entity type found\n", NULL
);
7199 * Store the number of entities needing parsing for this entity
7200 * content and do checkings
7202 diff
= ctxt
->nbentities
- oldnbent
+ 1;
7203 if (diff
> INT_MAX
/ 2)
7205 ent
->checked
= diff
* 2;
7206 if ((ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<')))
7208 if (ret
== XML_ERR_ENTITY_LOOP
) {
7209 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7210 xmlHaltParser(ctxt
);
7211 xmlFreeNodeList(list
);
7214 if (xmlParserEntityCheck(ctxt
, 0, ent
, 0)) {
7215 xmlFreeNodeList(list
);
7219 if ((ret
== XML_ERR_OK
) && (list
!= NULL
)) {
7220 if (((ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) ||
7221 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
))&&
7222 (ent
->children
== NULL
)) {
7223 ent
->children
= list
;
7225 * Prune it directly in the generated document
7226 * except for single text nodes.
7228 if ((ctxt
->replaceEntities
== 0) ||
7229 (ctxt
->parseMode
== XML_PARSE_READER
) ||
7230 ((list
->type
== XML_TEXT_NODE
) &&
7231 (list
->next
== NULL
))) {
7233 while (list
!= NULL
) {
7234 list
->parent
= (xmlNodePtr
) ent
;
7235 if (list
->doc
!= ent
->doc
)
7236 xmlSetTreeDoc(list
, ent
->doc
);
7237 if (list
->next
== NULL
)
7244 while (list
!= NULL
) {
7245 list
->parent
= (xmlNodePtr
) ctxt
->node
;
7246 list
->doc
= ctxt
->myDoc
;
7247 if (list
->next
== NULL
)
7251 list
= ent
->children
;
7252 #ifdef LIBXML_LEGACY_ENABLED
7253 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7254 xmlAddEntityReference(ent
, list
, NULL
);
7255 #endif /* LIBXML_LEGACY_ENABLED */
7258 xmlFreeNodeList(list
);
7261 } else if ((ret
!= XML_ERR_OK
) &&
7262 (ret
!= XML_WAR_UNDECLARED_ENTITY
)) {
7263 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7264 "Entity '%s' failed to parse\n", ent
->name
);
7265 if (ent
->content
!= NULL
)
7266 ent
->content
[0] = 0;
7267 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
7268 } else if (list
!= NULL
) {
7269 xmlFreeNodeList(list
);
7272 if (ent
->checked
== 0)
7275 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7277 } else if (ent
->checked
!= 1) {
7278 ctxt
->nbentities
+= ent
->checked
/ 2;
7282 * Now that the entity content has been gathered
7283 * provide it to the application, this can take different forms based
7284 * on the parsing modes.
7286 if (ent
->children
== NULL
) {
7288 * Probably running in SAX mode and the callbacks don't
7289 * build the entity content. So unless we already went
7290 * though parsing for first checking go though the entity
7291 * content to generate callbacks associated to the entity
7293 if (was_checked
!= 0) {
7296 * This is a bit hackish but this seems the best
7297 * way to make sure both SAX and DOM entity support
7300 if (ctxt
->userData
== ctxt
)
7303 user_data
= ctxt
->userData
;
7305 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7307 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
,
7308 ent
->content
, user_data
, NULL
);
7310 } else if (ent
->etype
==
7311 XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7313 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
,
7314 ctxt
->sax
, user_data
, ctxt
->depth
,
7315 ent
->URI
, ent
->ExternalID
, NULL
);
7318 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7319 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7320 "invalid entity type found\n", NULL
);
7322 if (ret
== XML_ERR_ENTITY_LOOP
) {
7323 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7327 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7328 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7330 * Entity reference callback comes second, it's somewhat
7331 * superfluous but a compatibility to historical behaviour
7333 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7339 * If we didn't get any children for the entity being built
7341 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7342 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7346 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7350 if ((ctxt
->replaceEntities
) || (ent
->children
== NULL
)) {
7352 * There is a problem on the handling of _private for entities
7353 * (bug 155816): Should we copy the content of the field from
7354 * the entity (possibly overwriting some value set by the user
7355 * when a copy is created), should we leave it alone, or should
7356 * we try to take care of different situations? The problem
7357 * is exacerbated by the usage of this field by the xmlReader.
7358 * To fix this bug, we look at _private on the created node
7359 * and, if it's NULL, we copy in whatever was in the entity.
7360 * If it's not NULL we leave it alone. This is somewhat of a
7361 * hack - maybe we should have further tests to determine
7364 if ((ctxt
->node
!= NULL
) && (ent
->children
!= NULL
)) {
7366 * Seems we are generating the DOM content, do
7367 * a simple tree copy for all references except the first
7368 * In the first occurrence list contains the replacement.
7370 if (((list
== NULL
) && (ent
->owner
== 0)) ||
7371 (ctxt
->parseMode
== XML_PARSE_READER
)) {
7372 xmlNodePtr nw
= NULL
, cur
, firstChild
= NULL
;
7375 * We are copying here, make sure there is no abuse
7377 ctxt
->sizeentcopy
+= ent
->length
+ 5;
7378 if (xmlParserEntityCheck(ctxt
, 0, ent
, ctxt
->sizeentcopy
))
7382 * when operating on a reader, the entities definitions
7383 * are always owning the entities subtree.
7384 if (ctxt->parseMode == XML_PARSE_READER)
7388 cur
= ent
->children
;
7389 while (cur
!= NULL
) {
7390 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7392 if (nw
->_private
== NULL
)
7393 nw
->_private
= cur
->_private
;
7394 if (firstChild
== NULL
){
7397 nw
= xmlAddChild(ctxt
->node
, nw
);
7399 if (cur
== ent
->last
) {
7401 * needed to detect some strange empty
7402 * node cases in the reader tests
7404 if ((ctxt
->parseMode
== XML_PARSE_READER
) &&
7406 (nw
->type
== XML_ELEMENT_NODE
) &&
7407 (nw
->children
== NULL
))
7414 #ifdef LIBXML_LEGACY_ENABLED
7415 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7416 xmlAddEntityReference(ent
, firstChild
, nw
);
7417 #endif /* LIBXML_LEGACY_ENABLED */
7418 } else if ((list
== NULL
) || (ctxt
->inputNr
> 0)) {
7419 xmlNodePtr nw
= NULL
, cur
, next
, last
,
7423 * We are copying here, make sure there is no abuse
7425 ctxt
->sizeentcopy
+= ent
->length
+ 5;
7426 if (xmlParserEntityCheck(ctxt
, 0, ent
, ctxt
->sizeentcopy
))
7430 * Copy the entity child list and make it the new
7431 * entity child list. The goal is to make sure any
7432 * ID or REF referenced will be the one from the
7433 * document content and not the entity copy.
7435 cur
= ent
->children
;
7436 ent
->children
= NULL
;
7439 while (cur
!= NULL
) {
7443 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7445 if (nw
->_private
== NULL
)
7446 nw
->_private
= cur
->_private
;
7447 if (firstChild
== NULL
){
7450 xmlAddChild((xmlNodePtr
) ent
, nw
);
7451 xmlAddChild(ctxt
->node
, cur
);
7457 if (ent
->owner
== 0)
7459 #ifdef LIBXML_LEGACY_ENABLED
7460 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7461 xmlAddEntityReference(ent
, firstChild
, nw
);
7462 #endif /* LIBXML_LEGACY_ENABLED */
7464 const xmlChar
*nbktext
;
7467 * the name change is to avoid coalescing of the
7468 * node with a possible previous text one which
7469 * would make ent->children a dangling pointer
7471 nbktext
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"nbktext",
7473 if (ent
->children
->type
== XML_TEXT_NODE
)
7474 ent
->children
->name
= nbktext
;
7475 if ((ent
->last
!= ent
->children
) &&
7476 (ent
->last
->type
== XML_TEXT_NODE
))
7477 ent
->last
->name
= nbktext
;
7478 xmlAddChildList(ctxt
->node
, ent
->children
);
7482 * This is to avoid a nasty side effect, see
7483 * characters() in SAX.c
7493 * xmlParseEntityRef:
7494 * @ctxt: an XML parser context
7496 * parse ENTITY references declarations
7498 * [68] EntityRef ::= '&' Name ';'
7500 * [ WFC: Entity Declared ]
7501 * In a document without any DTD, a document with only an internal DTD
7502 * subset which contains no parameter entity references, or a document
7503 * with "standalone='yes'", the Name given in the entity reference
7504 * must match that in an entity declaration, except that well-formed
7505 * documents need not declare any of the following entities: amp, lt,
7506 * gt, apos, quot. The declaration of a parameter entity must precede
7507 * any reference to it. Similarly, the declaration of a general entity
7508 * must precede any reference to it which appears in a default value in an
7509 * attribute-list declaration. Note that if entities are declared in the
7510 * external subset or in external parameter entities, a non-validating
7511 * processor is not obligated to read and process their declarations;
7512 * for such documents, the rule that an entity must be declared is a
7513 * well-formedness constraint only if standalone='yes'.
7515 * [ WFC: Parsed Entity ]
7516 * An entity reference must not contain the name of an unparsed entity
7518 * Returns the xmlEntityPtr if found, or NULL otherwise.
7521 xmlParseEntityRef(xmlParserCtxtPtr ctxt
) {
7522 const xmlChar
*name
;
7523 xmlEntityPtr ent
= NULL
;
7526 if (ctxt
->instate
== XML_PARSER_EOF
)
7532 name
= xmlParseName(ctxt
);
7534 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7535 "xmlParseEntityRef: no name\n");
7539 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7545 * Predefined entities override any extra definition
7547 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7548 ent
= xmlGetPredefinedEntity(name
);
7554 * Increase the number of entity references parsed
7559 * Ask first SAX for entity resolution, otherwise try the
7560 * entities which may have stored in the parser context.
7562 if (ctxt
->sax
!= NULL
) {
7563 if (ctxt
->sax
->getEntity
!= NULL
)
7564 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7565 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7566 (ctxt
->options
& XML_PARSE_OLDSAX
))
7567 ent
= xmlGetPredefinedEntity(name
);
7568 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7569 (ctxt
->userData
==ctxt
)) {
7570 ent
= xmlSAX2GetEntity(ctxt
, name
);
7573 if (ctxt
->instate
== XML_PARSER_EOF
)
7576 * [ WFC: Entity Declared ]
7577 * In a document without any DTD, a document with only an
7578 * internal DTD subset which contains no parameter entity
7579 * references, or a document with "standalone='yes'", the
7580 * Name given in the entity reference must match that in an
7581 * entity declaration, except that well-formed documents
7582 * need not declare any of the following entities: amp, lt,
7584 * The declaration of a parameter entity must precede any
7586 * Similarly, the declaration of a general entity must
7587 * precede any reference to it which appears in a default
7588 * value in an attribute-list declaration. Note that if
7589 * entities are declared in the external subset or in
7590 * external parameter entities, a non-validating processor
7591 * is not obligated to read and process their declarations;
7592 * for such documents, the rule that an entity must be
7593 * declared is a well-formedness constraint only if
7597 if ((ctxt
->standalone
== 1) ||
7598 ((ctxt
->hasExternalSubset
== 0) &&
7599 (ctxt
->hasPErefs
== 0))) {
7600 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7601 "Entity '%s' not defined\n", name
);
7603 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7604 "Entity '%s' not defined\n", name
);
7605 if ((ctxt
->inSubset
== 0) &&
7606 (ctxt
->sax
!= NULL
) &&
7607 (ctxt
->sax
->reference
!= NULL
)) {
7608 ctxt
->sax
->reference(ctxt
->userData
, name
);
7611 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
7616 * [ WFC: Parsed Entity ]
7617 * An entity reference must not contain the name of an
7620 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7621 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7622 "Entity reference to unparsed entity %s\n", name
);
7626 * [ WFC: No External Entity References ]
7627 * Attribute values cannot contain direct or indirect
7628 * entity references to external entities.
7630 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7631 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7632 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7633 "Attribute references external entity '%s'\n", name
);
7636 * [ WFC: No < in Attribute Values ]
7637 * The replacement text of any entity referred to directly or
7638 * indirectly in an attribute value (other than "<") must
7641 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7643 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
)) {
7644 if (((ent
->checked
& 1) || (ent
->checked
== 0)) &&
7645 (ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<'))) {
7646 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7647 "'<' in entity '%s' is not allowed in attributes values\n", name
);
7652 * Internal check, no parameter entities here ...
7655 switch (ent
->etype
) {
7656 case XML_INTERNAL_PARAMETER_ENTITY
:
7657 case XML_EXTERNAL_PARAMETER_ENTITY
:
7658 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7659 "Attempt to reference the parameter entity '%s'\n",
7668 * [ WFC: No Recursion ]
7669 * A parsed entity must not contain a recursive reference
7670 * to itself, either directly or indirectly.
7671 * Done somewhere else
7677 * xmlParseStringEntityRef:
7678 * @ctxt: an XML parser context
7679 * @str: a pointer to an index in the string
7681 * parse ENTITY references declarations, but this version parses it from
7684 * [68] EntityRef ::= '&' Name ';'
7686 * [ WFC: Entity Declared ]
7687 * In a document without any DTD, a document with only an internal DTD
7688 * subset which contains no parameter entity references, or a document
7689 * with "standalone='yes'", the Name given in the entity reference
7690 * must match that in an entity declaration, except that well-formed
7691 * documents need not declare any of the following entities: amp, lt,
7692 * gt, apos, quot. The declaration of a parameter entity must precede
7693 * any reference to it. Similarly, the declaration of a general entity
7694 * must precede any reference to it which appears in a default value in an
7695 * attribute-list declaration. Note that if entities are declared in the
7696 * external subset or in external parameter entities, a non-validating
7697 * processor is not obligated to read and process their declarations;
7698 * for such documents, the rule that an entity must be declared is a
7699 * well-formedness constraint only if standalone='yes'.
7701 * [ WFC: Parsed Entity ]
7702 * An entity reference must not contain the name of an unparsed entity
7704 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7705 * is updated to the current location in the string.
7708 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
7712 xmlEntityPtr ent
= NULL
;
7714 if ((str
== NULL
) || (*str
== NULL
))
7722 name
= xmlParseStringName(ctxt
, &ptr
);
7724 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7725 "xmlParseStringEntityRef: no name\n");
7730 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7739 * Predefined entities override any extra definition
7741 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7742 ent
= xmlGetPredefinedEntity(name
);
7751 * Increase the number of entity references parsed
7756 * Ask first SAX for entity resolution, otherwise try the
7757 * entities which may have stored in the parser context.
7759 if (ctxt
->sax
!= NULL
) {
7760 if (ctxt
->sax
->getEntity
!= NULL
)
7761 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7762 if ((ent
== NULL
) && (ctxt
->options
& XML_PARSE_OLDSAX
))
7763 ent
= xmlGetPredefinedEntity(name
);
7764 if ((ent
== NULL
) && (ctxt
->userData
==ctxt
)) {
7765 ent
= xmlSAX2GetEntity(ctxt
, name
);
7768 if (ctxt
->instate
== XML_PARSER_EOF
) {
7774 * [ WFC: Entity Declared ]
7775 * In a document without any DTD, a document with only an
7776 * internal DTD subset which contains no parameter entity
7777 * references, or a document with "standalone='yes'", the
7778 * Name given in the entity reference must match that in an
7779 * entity declaration, except that well-formed documents
7780 * need not declare any of the following entities: amp, lt,
7782 * The declaration of a parameter entity must precede any
7784 * Similarly, the declaration of a general entity must
7785 * precede any reference to it which appears in a default
7786 * value in an attribute-list declaration. Note that if
7787 * entities are declared in the external subset or in
7788 * external parameter entities, a non-validating processor
7789 * is not obligated to read and process their declarations;
7790 * for such documents, the rule that an entity must be
7791 * declared is a well-formedness constraint only if
7795 if ((ctxt
->standalone
== 1) ||
7796 ((ctxt
->hasExternalSubset
== 0) &&
7797 (ctxt
->hasPErefs
== 0))) {
7798 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7799 "Entity '%s' not defined\n", name
);
7801 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7802 "Entity '%s' not defined\n",
7805 xmlParserEntityCheck(ctxt
, 0, ent
, 0);
7806 /* TODO ? check regressions ctxt->valid = 0; */
7810 * [ WFC: Parsed Entity ]
7811 * An entity reference must not contain the name of an
7814 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7815 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7816 "Entity reference to unparsed entity %s\n", name
);
7820 * [ WFC: No External Entity References ]
7821 * Attribute values cannot contain direct or indirect
7822 * entity references to external entities.
7824 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7825 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7826 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7827 "Attribute references external entity '%s'\n", name
);
7830 * [ WFC: No < in Attribute Values ]
7831 * The replacement text of any entity referred to directly or
7832 * indirectly in an attribute value (other than "<") must
7835 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7836 (ent
!= NULL
) && (ent
->content
!= NULL
) &&
7837 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
7838 (xmlStrchr(ent
->content
, '<'))) {
7839 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7840 "'<' in entity '%s' is not allowed in attributes values\n",
7845 * Internal check, no parameter entities here ...
7848 switch (ent
->etype
) {
7849 case XML_INTERNAL_PARAMETER_ENTITY
:
7850 case XML_EXTERNAL_PARAMETER_ENTITY
:
7851 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7852 "Attempt to reference the parameter entity '%s'\n",
7861 * [ WFC: No Recursion ]
7862 * A parsed entity must not contain a recursive reference
7863 * to itself, either directly or indirectly.
7864 * Done somewhere else
7873 * xmlParsePEReference:
7874 * @ctxt: an XML parser context
7876 * parse PEReference declarations
7877 * The entity content is handled directly by pushing it's content as
7878 * a new input stream.
7880 * [69] PEReference ::= '%' Name ';'
7882 * [ WFC: No Recursion ]
7883 * A parsed entity must not contain a recursive
7884 * reference to itself, either directly or indirectly.
7886 * [ WFC: Entity Declared ]
7887 * In a document without any DTD, a document with only an internal DTD
7888 * subset which contains no parameter entity references, or a document
7889 * with "standalone='yes'", ... ... The declaration of a parameter
7890 * entity must precede any reference to it...
7892 * [ VC: Entity Declared ]
7893 * In a document with an external subset or external parameter entities
7894 * with "standalone='no'", ... ... The declaration of a parameter entity
7895 * must precede any reference to it...
7898 * Parameter-entity references may only appear in the DTD.
7899 * NOTE: misleading but this is handled.
7902 xmlParsePEReference(xmlParserCtxtPtr ctxt
)
7904 const xmlChar
*name
;
7905 xmlEntityPtr entity
= NULL
;
7906 xmlParserInputPtr input
;
7911 name
= xmlParseName(ctxt
);
7913 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_NO_NAME
, "PEReference: no name\n");
7916 if (xmlParserDebugEntities
)
7917 xmlGenericError(xmlGenericErrorContext
,
7918 "PEReference: %s\n", name
);
7920 xmlFatalErr(ctxt
, XML_ERR_PEREF_SEMICOL_MISSING
, NULL
);
7927 * Increase the number of entity references parsed
7932 * Request the entity from SAX
7934 if ((ctxt
->sax
!= NULL
) &&
7935 (ctxt
->sax
->getParameterEntity
!= NULL
))
7936 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
7937 if (ctxt
->instate
== XML_PARSER_EOF
)
7939 if (entity
== NULL
) {
7941 * [ WFC: Entity Declared ]
7942 * In a document without any DTD, a document with only an
7943 * internal DTD subset which contains no parameter entity
7944 * references, or a document with "standalone='yes'", ...
7945 * ... The declaration of a parameter entity must precede
7946 * any reference to it...
7948 if ((ctxt
->standalone
== 1) ||
7949 ((ctxt
->hasExternalSubset
== 0) &&
7950 (ctxt
->hasPErefs
== 0))) {
7951 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7952 "PEReference: %%%s; not found\n",
7956 * [ VC: Entity Declared ]
7957 * In a document with an external subset or external
7958 * parameter entities with "standalone='no'", ...
7959 * ... The declaration of a parameter entity must
7960 * precede any reference to it...
7962 if ((ctxt
->validate
) && (ctxt
->vctxt
.error
!= NULL
)) {
7963 xmlValidityError(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7964 "PEReference: %%%s; not found\n",
7967 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7968 "PEReference: %%%s; not found\n",
7972 xmlParserEntityCheck(ctxt
, 0, NULL
, 0);
7975 * Internal checking in case the entity quest barfed
7977 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
7978 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
7979 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7980 "Internal: %%%s; is not a parameter entity\n",
7984 xmlCharEncoding enc
;
7986 if (xmlParserEntityCheck(ctxt
, 0, entity
, 0))
7989 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
7990 ((ctxt
->options
& XML_PARSE_NOENT
) == 0) &&
7991 ((ctxt
->options
& XML_PARSE_DTDVALID
) == 0) &&
7992 ((ctxt
->options
& XML_PARSE_DTDLOAD
) == 0) &&
7993 ((ctxt
->options
& XML_PARSE_DTDATTR
) == 0) &&
7994 (ctxt
->replaceEntities
== 0) &&
7995 (ctxt
->validate
== 0))
7998 input
= xmlNewEntityInputStream(ctxt
, entity
);
7999 if (xmlPushInput(ctxt
, input
) < 0) {
8000 xmlFreeInputStream(input
);
8004 if (entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) {
8006 * Get the 4 first bytes and decode the charset
8007 * if enc != XML_CHAR_ENCODING_NONE
8008 * plug some encoding conversion routines.
8009 * Note that, since we may have some non-UTF8
8010 * encoding (like UTF16, bug 135229), the 'length'
8011 * is not known, but we can calculate based upon
8012 * the amount of data in the buffer.
8015 if (ctxt
->instate
== XML_PARSER_EOF
)
8017 if ((ctxt
->input
->end
- ctxt
->input
->cur
)>=4) {
8022 enc
= xmlDetectCharEncoding(start
, 4);
8023 if (enc
!= XML_CHAR_ENCODING_NONE
) {
8024 xmlSwitchEncoding(ctxt
, enc
);
8028 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) &&
8029 (IS_BLANK_CH(NXT(5)))) {
8030 xmlParseTextDecl(ctxt
);
8035 ctxt
->hasPErefs
= 1;
8039 * xmlLoadEntityContent:
8040 * @ctxt: an XML parser context
8041 * @entity: an unloaded system entity
8043 * Load the original content of the given system entity from the
8044 * ExternalID/SystemID given. This is to be used for Included in Literal
8045 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8047 * Returns 0 in case of success and -1 in case of failure
8050 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
8051 xmlParserInputPtr input
;
8056 if ((ctxt
== NULL
) || (entity
== NULL
) ||
8057 ((entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
) &&
8058 (entity
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) ||
8059 (entity
->content
!= NULL
)) {
8060 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8061 "xmlLoadEntityContent parameter error");
8065 if (xmlParserDebugEntities
)
8066 xmlGenericError(xmlGenericErrorContext
,
8067 "Reading %s entity content input\n", entity
->name
);
8069 buf
= xmlBufferCreate();
8071 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8072 "xmlLoadEntityContent parameter error");
8075 xmlBufferSetAllocationScheme(buf
, XML_BUFFER_ALLOC_DOUBLEIT
);
8077 input
= xmlNewEntityInputStream(ctxt
, entity
);
8078 if (input
== NULL
) {
8079 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8080 "xmlLoadEntityContent input error");
8086 * Push the entity as the current input, read char by char
8087 * saving to the buffer until the end of the entity or an error
8089 if (xmlPushInput(ctxt
, input
) < 0) {
8091 xmlFreeInputStream(input
);
8097 while ((ctxt
->input
== input
) && (ctxt
->input
->cur
< ctxt
->input
->end
) &&
8099 xmlBufferAdd(buf
, ctxt
->input
->cur
, l
);
8100 if (count
++ > XML_PARSER_CHUNK_SIZE
) {
8103 if (ctxt
->instate
== XML_PARSER_EOF
) {
8113 if (ctxt
->instate
== XML_PARSER_EOF
) {
8121 if ((ctxt
->input
== input
) && (ctxt
->input
->cur
>= ctxt
->input
->end
)) {
8123 } else if (!IS_CHAR(c
)) {
8124 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
8125 "xmlLoadEntityContent: invalid char value %d\n",
8130 entity
->content
= buf
->content
;
8131 buf
->content
= NULL
;
8138 * xmlParseStringPEReference:
8139 * @ctxt: an XML parser context
8140 * @str: a pointer to an index in the string
8142 * parse PEReference declarations
8144 * [69] PEReference ::= '%' Name ';'
8146 * [ WFC: No Recursion ]
8147 * A parsed entity must not contain a recursive
8148 * reference to itself, either directly or indirectly.
8150 * [ WFC: Entity Declared ]
8151 * In a document without any DTD, a document with only an internal DTD
8152 * subset which contains no parameter entity references, or a document
8153 * with "standalone='yes'", ... ... The declaration of a parameter
8154 * entity must precede any reference to it...
8156 * [ VC: Entity Declared ]
8157 * In a document with an external subset or external parameter entities
8158 * with "standalone='no'", ... ... The declaration of a parameter entity
8159 * must precede any reference to it...
8162 * Parameter-entity references may only appear in the DTD.
8163 * NOTE: misleading but this is handled.
8165 * Returns the string of the entity content.
8166 * str is updated to the current value of the index
8169 xmlParseStringPEReference(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
8173 xmlEntityPtr entity
= NULL
;
8175 if ((str
== NULL
) || (*str
== NULL
)) return(NULL
);
8181 name
= xmlParseStringName(ctxt
, &ptr
);
8183 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8184 "xmlParseStringPEReference: no name\n");
8190 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
8198 * Increase the number of entity references parsed
8203 * Request the entity from SAX
8205 if ((ctxt
->sax
!= NULL
) &&
8206 (ctxt
->sax
->getParameterEntity
!= NULL
))
8207 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
8208 if (ctxt
->instate
== XML_PARSER_EOF
) {
8213 if (entity
== NULL
) {
8215 * [ WFC: Entity Declared ]
8216 * In a document without any DTD, a document with only an
8217 * internal DTD subset which contains no parameter entity
8218 * references, or a document with "standalone='yes'", ...
8219 * ... The declaration of a parameter entity must precede
8220 * any reference to it...
8222 if ((ctxt
->standalone
== 1) ||
8223 ((ctxt
->hasExternalSubset
== 0) && (ctxt
->hasPErefs
== 0))) {
8224 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
8225 "PEReference: %%%s; not found\n", name
);
8228 * [ VC: Entity Declared ]
8229 * In a document with an external subset or external
8230 * parameter entities with "standalone='no'", ...
8231 * ... The declaration of a parameter entity must
8232 * precede any reference to it...
8234 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8235 "PEReference: %%%s; not found\n",
8239 xmlParserEntityCheck(ctxt
, 0, NULL
, 0);
8242 * Internal checking in case the entity quest barfed
8244 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
8245 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
8246 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8247 "%%%s; is not a parameter entity\n",
8251 ctxt
->hasPErefs
= 1;
8258 * xmlParseDocTypeDecl:
8259 * @ctxt: an XML parser context
8261 * parse a DOCTYPE declaration
8263 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8264 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8266 * [ VC: Root Element Type ]
8267 * The Name in the document type declaration must match the element
8268 * type of the root element.
8272 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt
) {
8273 const xmlChar
*name
= NULL
;
8274 xmlChar
*ExternalID
= NULL
;
8275 xmlChar
*URI
= NULL
;
8278 * We know that '<!DOCTYPE' has been detected.
8285 * Parse the DOCTYPE name.
8287 name
= xmlParseName(ctxt
);
8289 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8290 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8292 ctxt
->intSubName
= name
;
8297 * Check for SystemID and ExternalID
8299 URI
= xmlParseExternalID(ctxt
, &ExternalID
, 1);
8301 if ((URI
!= NULL
) || (ExternalID
!= NULL
)) {
8302 ctxt
->hasExternalSubset
= 1;
8304 ctxt
->extSubURI
= URI
;
8305 ctxt
->extSubSystem
= ExternalID
;
8310 * Create and update the internal subset.
8312 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->internalSubset
!= NULL
) &&
8313 (!ctxt
->disableSAX
))
8314 ctxt
->sax
->internalSubset(ctxt
->userData
, name
, ExternalID
, URI
);
8315 if (ctxt
->instate
== XML_PARSER_EOF
)
8319 * Is there any internal subset declarations ?
8320 * they are handled separately in xmlParseInternalSubset()
8326 * We should be at the end of the DOCTYPE declaration.
8329 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8335 * xmlParseInternalSubset:
8336 * @ctxt: an XML parser context
8338 * parse the internal subset declaration
8340 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8344 xmlParseInternalSubset(xmlParserCtxtPtr ctxt
) {
8346 * Is there any DTD definition ?
8349 int baseInputNr
= ctxt
->inputNr
;
8350 ctxt
->instate
= XML_PARSER_DTD
;
8353 * Parse the succession of Markup declarations and
8355 * Subsequence (markupdecl | PEReference | S)*
8357 while (((RAW
!= ']') || (ctxt
->inputNr
> baseInputNr
)) &&
8358 (ctxt
->instate
!= XML_PARSER_EOF
)) {
8359 int id
= ctxt
->input
->id
;
8360 unsigned long cons
= CUR_CONSUMED
;
8363 xmlParseMarkupDecl(ctxt
);
8364 xmlParsePEReference(ctxt
);
8367 * Conditional sections are allowed from external entities included
8368 * by PE References in the internal subset.
8370 if ((ctxt
->inputNr
> 1) && (ctxt
->input
->filename
!= NULL
) &&
8371 (RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8372 xmlParseConditionalSections(ctxt
);
8375 if ((id
== ctxt
->input
->id
) && (cons
== CUR_CONSUMED
)) {
8376 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8377 "xmlParseInternalSubset: error detected in Markup declaration\n");
8378 if (ctxt
->inputNr
> baseInputNr
)
8391 * We should be at the end of the DOCTYPE declaration.
8394 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8400 #ifdef LIBXML_SAX1_ENABLED
8402 * xmlParseAttribute:
8403 * @ctxt: an XML parser context
8404 * @value: a xmlChar ** used to store the value of the attribute
8406 * parse an attribute
8408 * [41] Attribute ::= Name Eq AttValue
8410 * [ WFC: No External Entity References ]
8411 * Attribute values cannot contain direct or indirect entity references
8412 * to external entities.
8414 * [ WFC: No < in Attribute Values ]
8415 * The replacement text of any entity referred to directly or indirectly in
8416 * an attribute value (other than "<") must not contain a <.
8418 * [ VC: Attribute Value Type ]
8419 * The attribute must have been declared; the value must be of the type
8422 * [25] Eq ::= S? '=' S?
8426 * [NS 11] Attribute ::= QName Eq AttValue
8428 * Also the case QName == xmlns:??? is handled independently as a namespace
8431 * Returns the attribute name, and the value in *value.
8435 xmlParseAttribute(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
8436 const xmlChar
*name
;
8441 name
= xmlParseName(ctxt
);
8443 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8444 "error parsing attribute name\n");
8455 val
= xmlParseAttValue(ctxt
);
8456 ctxt
->instate
= XML_PARSER_CONTENT
;
8458 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
8459 "Specification mandates value for attribute %s\n", name
);
8464 * Check that xml:lang conforms to the specification
8465 * No more registered as an error, just generate a warning now
8466 * since this was deprecated in XML second edition
8468 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"xml:lang"))) {
8469 if (!xmlCheckLanguageID(val
)) {
8470 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
8471 "Malformed value for xml:lang : %s\n",
8477 * Check that xml:space conforms to the specification
8479 if (xmlStrEqual(name
, BAD_CAST
"xml:space")) {
8480 if (xmlStrEqual(val
, BAD_CAST
"default"))
8482 else if (xmlStrEqual(val
, BAD_CAST
"preserve"))
8485 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
8486 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8497 * @ctxt: an XML parser context
8499 * parse a start of tag either for rule element or
8500 * EmptyElement. In both case we don't parse the tag closing chars.
8502 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8504 * [ WFC: Unique Att Spec ]
8505 * No attribute name may appear more than once in the same start-tag or
8506 * empty-element tag.
8508 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8510 * [ WFC: Unique Att Spec ]
8511 * No attribute name may appear more than once in the same start-tag or
8512 * empty-element tag.
8516 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8518 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8520 * Returns the element name parsed
8524 xmlParseStartTag(xmlParserCtxtPtr ctxt
) {
8525 const xmlChar
*name
;
8526 const xmlChar
*attname
;
8528 const xmlChar
**atts
= ctxt
->atts
;
8530 int maxatts
= ctxt
->maxatts
;
8533 if (RAW
!= '<') return(NULL
);
8536 name
= xmlParseName(ctxt
);
8538 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8539 "xmlParseStartTag: invalid element name\n");
8544 * Now parse the attributes, it ends up with the ending
8551 while (((RAW
!= '>') &&
8552 ((RAW
!= '/') || (NXT(1) != '>')) &&
8553 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
8554 int id
= ctxt
->input
->id
;
8555 unsigned long cons
= CUR_CONSUMED
;
8557 attname
= xmlParseAttribute(ctxt
, &attvalue
);
8558 if ((attname
!= NULL
) && (attvalue
!= NULL
)) {
8560 * [ WFC: Unique Att Spec ]
8561 * No attribute name may appear more than once in the same
8562 * start-tag or empty-element tag.
8564 for (i
= 0; i
< nbatts
;i
+= 2) {
8565 if (xmlStrEqual(atts
[i
], attname
)) {
8566 xmlErrAttributeDup(ctxt
, NULL
, attname
);
8572 * Add the pair to atts
8575 maxatts
= 22; /* allow for 10 attrs by default */
8576 atts
= (const xmlChar
**)
8577 xmlMalloc(maxatts
* sizeof(xmlChar
*));
8579 xmlErrMemory(ctxt
, NULL
);
8580 if (attvalue
!= NULL
)
8585 ctxt
->maxatts
= maxatts
;
8586 } else if (nbatts
+ 4 > maxatts
) {
8590 n
= (const xmlChar
**) xmlRealloc((void *) atts
,
8591 maxatts
* sizeof(const xmlChar
*));
8593 xmlErrMemory(ctxt
, NULL
);
8594 if (attvalue
!= NULL
)
8600 ctxt
->maxatts
= maxatts
;
8602 atts
[nbatts
++] = attname
;
8603 atts
[nbatts
++] = attvalue
;
8604 atts
[nbatts
] = NULL
;
8605 atts
[nbatts
+ 1] = NULL
;
8607 if (attvalue
!= NULL
)
8614 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
8616 if (SKIP_BLANKS
== 0) {
8617 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
8618 "attributes construct error\n");
8620 if ((cons
== CUR_CONSUMED
) && (id
== ctxt
->input
->id
) &&
8621 (attname
== NULL
) && (attvalue
== NULL
)) {
8622 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
8623 "xmlParseStartTag: problem parsing attributes\n");
8631 * SAX: Start of Element !
8633 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElement
!= NULL
) &&
8634 (!ctxt
->disableSAX
)) {
8636 ctxt
->sax
->startElement(ctxt
->userData
, name
, atts
);
8638 ctxt
->sax
->startElement(ctxt
->userData
, name
, NULL
);
8642 /* Free only the content strings */
8643 for (i
= 1;i
< nbatts
;i
+=2)
8644 if (atts
[i
] != NULL
)
8645 xmlFree((xmlChar
*) atts
[i
]);
8652 * @ctxt: an XML parser context
8653 * @line: line of the start tag
8654 * @nsNr: number of namespaces on the start tag
8656 * parse an end of tag
8658 * [42] ETag ::= '</' Name S? '>'
8662 * [NS 9] ETag ::= '</' QName S? '>'
8666 xmlParseEndTag1(xmlParserCtxtPtr ctxt
, int line
) {
8667 const xmlChar
*name
;
8670 if ((RAW
!= '<') || (NXT(1) != '/')) {
8671 xmlFatalErrMsg(ctxt
, XML_ERR_LTSLASH_REQUIRED
,
8672 "xmlParseEndTag: '</' not found\n");
8677 name
= xmlParseNameAndCompare(ctxt
,ctxt
->name
);
8680 * We should definitely be at the ending "S? '>'" part
8684 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
8685 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
8690 * [ WFC: Element Type Match ]
8691 * The Name in an element's end-tag must match the element type in the
8695 if (name
!= (xmlChar
*)1) {
8696 if (name
== NULL
) name
= BAD_CAST
"unparsable";
8697 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
8698 "Opening and ending tag mismatch: %s line %d and %s\n",
8699 ctxt
->name
, line
, name
);
8705 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
8706 (!ctxt
->disableSAX
))
8707 ctxt
->sax
->endElement(ctxt
->userData
, ctxt
->name
);
8716 * @ctxt: an XML parser context
8718 * parse an end of tag
8720 * [42] ETag ::= '</' Name S? '>'
8724 * [NS 9] ETag ::= '</' QName S? '>'
8728 xmlParseEndTag(xmlParserCtxtPtr ctxt
) {
8729 xmlParseEndTag1(ctxt
, 0);
8731 #endif /* LIBXML_SAX1_ENABLED */
8733 /************************************************************************
8735 * SAX 2 specific operations *
8737 ************************************************************************/
8741 * @ctxt: an XML parser context
8742 * @prefix: the prefix to lookup
8744 * Lookup the namespace name for the @prefix (which ca be NULL)
8745 * The prefix must come from the @ctxt->dict dictionary
8747 * Returns the namespace name or NULL if not bound
8749 static const xmlChar
*
8750 xmlGetNamespace(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
) {
8753 if (prefix
== ctxt
->str_xml
) return(ctxt
->str_xml_ns
);
8754 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-=2)
8755 if (ctxt
->nsTab
[i
] == prefix
) {
8756 if ((prefix
== NULL
) && (*ctxt
->nsTab
[i
+ 1] == 0))
8758 return(ctxt
->nsTab
[i
+ 1]);
8765 * @ctxt: an XML parser context
8766 * @prefix: pointer to store the prefix part
8768 * parse an XML Namespace QName
8770 * [6] QName ::= (Prefix ':')? LocalPart
8771 * [7] Prefix ::= NCName
8772 * [8] LocalPart ::= NCName
8774 * Returns the Name parsed or NULL
8777 static const xmlChar
*
8778 xmlParseQName(xmlParserCtxtPtr ctxt
, const xmlChar
**prefix
) {
8779 const xmlChar
*l
, *p
;
8783 l
= xmlParseNCName(ctxt
);
8786 l
= xmlParseName(ctxt
);
8788 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8789 "Failed to parse QName '%s'\n", l
, NULL
, NULL
);
8799 l
= xmlParseNCName(ctxt
);
8803 if (ctxt
->instate
== XML_PARSER_EOF
)
8805 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8806 "Failed to parse QName '%s:'\n", p
, NULL
, NULL
);
8807 l
= xmlParseNmtoken(ctxt
);
8809 if (ctxt
->instate
== XML_PARSER_EOF
)
8811 tmp
= xmlBuildQName(BAD_CAST
"", p
, NULL
, 0);
8813 tmp
= xmlBuildQName(l
, p
, NULL
, 0);
8816 p
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8817 if (tmp
!= NULL
) xmlFree(tmp
);
8824 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8825 "Failed to parse QName '%s:%s:'\n", p
, l
, NULL
);
8827 tmp
= (xmlChar
*) xmlParseName(ctxt
);
8829 tmp
= xmlBuildQName(tmp
, l
, NULL
, 0);
8830 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8831 if (tmp
!= NULL
) xmlFree(tmp
);
8835 if (ctxt
->instate
== XML_PARSER_EOF
)
8837 tmp
= xmlBuildQName(BAD_CAST
"", l
, NULL
, 0);
8838 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8839 if (tmp
!= NULL
) xmlFree(tmp
);
8850 * xmlParseQNameAndCompare:
8851 * @ctxt: an XML parser context
8852 * @name: the localname
8853 * @prefix: the prefix, if any.
8855 * parse an XML name and compares for match
8856 * (specialized for endtag parsing)
8858 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8859 * and the name for mismatch
8862 static const xmlChar
*
8863 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *name
,
8864 xmlChar
const *prefix
) {
8868 const xmlChar
*prefix2
;
8870 if (prefix
== NULL
) return(xmlParseNameAndCompare(ctxt
, name
));
8873 in
= ctxt
->input
->cur
;
8876 while (*in
!= 0 && *in
== *cmp
) {
8880 if ((*cmp
== 0) && (*in
== ':')) {
8883 while (*in
!= 0 && *in
== *cmp
) {
8887 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
8889 ctxt
->input
->col
+= in
- ctxt
->input
->cur
;
8890 ctxt
->input
->cur
= in
;
8891 return((const xmlChar
*) 1);
8895 * all strings coms from the dictionary, equality can be done directly
8897 ret
= xmlParseQName (ctxt
, &prefix2
);
8898 if ((ret
== name
) && (prefix
== prefix2
))
8899 return((const xmlChar
*) 1);
8904 * xmlParseAttValueInternal:
8905 * @ctxt: an XML parser context
8906 * @len: attribute len result
8907 * @alloc: whether the attribute was reallocated as a new string
8908 * @normalize: if 1 then further non-CDATA normalization must be done
8910 * parse a value for an attribute.
8911 * NOTE: if no normalization is needed, the routine will return pointers
8912 * directly from the data buffer.
8914 * 3.3.3 Attribute-Value Normalization:
8915 * Before the value of an attribute is passed to the application or
8916 * checked for validity, the XML processor must normalize it as follows:
8917 * - a character reference is processed by appending the referenced
8918 * character to the attribute value
8919 * - an entity reference is processed by recursively processing the
8920 * replacement text of the entity
8921 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8922 * appending #x20 to the normalized value, except that only a single
8923 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8924 * parsed entity or the literal entity value of an internal parsed entity
8925 * - other characters are processed by appending them to the normalized value
8926 * If the declared value is not CDATA, then the XML processor must further
8927 * process the normalized attribute value by discarding any leading and
8928 * trailing space (#x20) characters, and by replacing sequences of space
8929 * (#x20) characters by a single space (#x20) character.
8930 * All attributes for which no declaration has been read should be treated
8931 * by a non-validating parser as if declared CDATA.
8933 * Returns the AttValue parsed or NULL. The value has to be freed by the
8934 * caller if it was copied, this can be detected by val[*len] == 0.
8937 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8938 const xmlChar *oldbase = ctxt->input->base;\
8940 if (ctxt->instate == XML_PARSER_EOF)\
8942 if (oldbase != ctxt->input->base) {\
8943 ptrdiff_t delta = ctxt->input->base - oldbase;\
8944 start = start + delta;\
8947 end = ctxt->input->end;
8950 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
, int *len
, int *alloc
,
8954 const xmlChar
*in
= NULL
, *start
, *end
, *last
;
8955 xmlChar
*ret
= NULL
;
8959 in
= (xmlChar
*) CUR_PTR
;
8960 line
= ctxt
->input
->line
;
8961 col
= ctxt
->input
->col
;
8962 if (*in
!= '"' && *in
!= '\'') {
8963 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
8966 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
8969 * try to handle in this routine the most common case where no
8970 * allocation of a new string is required and where content is
8975 end
= ctxt
->input
->end
;
8978 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
8982 * Skip any leading spaces
8984 while ((in
< end
) && (*in
!= limit
) &&
8985 ((*in
== 0x20) || (*in
== 0x9) ||
8986 (*in
== 0xA) || (*in
== 0xD))) {
8995 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
8996 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
8997 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
8998 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
8999 "AttValue length too long\n");
9004 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9005 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9007 if ((*in
++ == 0x20) && (*in
== 0x20)) break;
9009 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
9010 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9011 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9012 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9013 "AttValue length too long\n");
9020 * skip the trailing blanks
9022 while ((last
[-1] == 0x20) && (last
> start
)) last
--;
9023 while ((in
< end
) && (*in
!= limit
) &&
9024 ((*in
== 0x20) || (*in
== 0x9) ||
9025 (*in
== 0xA) || (*in
== 0xD))) {
9033 const xmlChar
*oldbase
= ctxt
->input
->base
;
9035 if (ctxt
->instate
== XML_PARSER_EOF
)
9037 if (oldbase
!= ctxt
->input
->base
) {
9038 ptrdiff_t delta
= ctxt
->input
->base
- oldbase
;
9039 start
= start
+ delta
;
9041 last
= last
+ delta
;
9043 end
= ctxt
->input
->end
;
9044 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9045 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9046 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9047 "AttValue length too long\n");
9052 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9053 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9054 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9055 "AttValue length too long\n");
9058 if (*in
!= limit
) goto need_complex
;
9060 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9061 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9065 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
9066 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9067 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9068 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9069 "AttValue length too long\n");
9075 if (((in
- start
) > XML_MAX_TEXT_LENGTH
) &&
9076 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9077 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9078 "AttValue length too long\n");
9081 if (*in
!= limit
) goto need_complex
;
9086 *len
= last
- start
;
9087 ret
= (xmlChar
*) start
;
9089 if (alloc
) *alloc
= 1;
9090 ret
= xmlStrndup(start
, last
- start
);
9093 ctxt
->input
->line
= line
;
9094 ctxt
->input
->col
= col
;
9095 if (alloc
) *alloc
= 0;
9098 if (alloc
) *alloc
= 1;
9099 return xmlParseAttValueComplex(ctxt
, len
, normalize
);
9103 * xmlParseAttribute2:
9104 * @ctxt: an XML parser context
9105 * @pref: the element prefix
9106 * @elem: the element name
9107 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9108 * @value: a xmlChar ** used to store the value of the attribute
9109 * @len: an int * to save the length of the attribute
9110 * @alloc: an int * to indicate if the attribute was allocated
9112 * parse an attribute in the new SAX2 framework.
9114 * Returns the attribute name, and the value in *value, .
9117 static const xmlChar
*
9118 xmlParseAttribute2(xmlParserCtxtPtr ctxt
,
9119 const xmlChar
* pref
, const xmlChar
* elem
,
9120 const xmlChar
** prefix
, xmlChar
** value
,
9121 int *len
, int *alloc
)
9123 const xmlChar
*name
;
9124 xmlChar
*val
, *internal_val
= NULL
;
9129 name
= xmlParseQName(ctxt
, prefix
);
9131 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9132 "error parsing attribute name\n");
9137 * get the type if needed
9139 if (ctxt
->attsSpecial
!= NULL
) {
9142 type
= (int) (ptrdiff_t) xmlHashQLookup2(ctxt
->attsSpecial
,
9143 pref
, elem
, *prefix
, name
);
9155 val
= xmlParseAttValueInternal(ctxt
, len
, alloc
, normalize
);
9158 * Sometimes a second normalisation pass for spaces is needed
9159 * but that only happens if charrefs or entities references
9160 * have been used in the attribute value, i.e. the attribute
9161 * value have been extracted in an allocated string already.
9164 const xmlChar
*val2
;
9166 val2
= xmlAttrNormalizeSpace2(ctxt
, val
, len
);
9167 if ((val2
!= NULL
) && (val2
!= val
)) {
9169 val
= (xmlChar
*) val2
;
9173 ctxt
->instate
= XML_PARSER_CONTENT
;
9175 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
9176 "Specification mandates value for attribute %s\n",
9181 if (*prefix
== ctxt
->str_xml
) {
9183 * Check that xml:lang conforms to the specification
9184 * No more registered as an error, just generate a warning now
9185 * since this was deprecated in XML second edition
9187 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"lang"))) {
9188 internal_val
= xmlStrndup(val
, *len
);
9189 if (!xmlCheckLanguageID(internal_val
)) {
9190 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
9191 "Malformed value for xml:lang : %s\n",
9192 internal_val
, NULL
);
9197 * Check that xml:space conforms to the specification
9199 if (xmlStrEqual(name
, BAD_CAST
"space")) {
9200 internal_val
= xmlStrndup(val
, *len
);
9201 if (xmlStrEqual(internal_val
, BAD_CAST
"default"))
9203 else if (xmlStrEqual(internal_val
, BAD_CAST
"preserve"))
9206 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
9207 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9208 internal_val
, NULL
);
9212 xmlFree(internal_val
);
9220 * xmlParseStartTag2:
9221 * @ctxt: an XML parser context
9223 * parse a start of tag either for rule element or
9224 * EmptyElement. In both case we don't parse the tag closing chars.
9225 * This routine is called when running SAX2 parsing
9227 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9229 * [ WFC: Unique Att Spec ]
9230 * No attribute name may appear more than once in the same start-tag or
9231 * empty-element tag.
9233 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9235 * [ WFC: Unique Att Spec ]
9236 * No attribute name may appear more than once in the same start-tag or
9237 * empty-element tag.
9241 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9243 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9245 * Returns the element name parsed
9248 static const xmlChar
*
9249 xmlParseStartTag2(xmlParserCtxtPtr ctxt
, const xmlChar
**pref
,
9250 const xmlChar
**URI
, int *tlen
) {
9251 const xmlChar
*localname
;
9252 const xmlChar
*prefix
;
9253 const xmlChar
*attname
;
9254 const xmlChar
*aprefix
;
9255 const xmlChar
*nsname
;
9257 const xmlChar
**atts
= ctxt
->atts
;
9258 int maxatts
= ctxt
->maxatts
;
9259 int nratts
, nbatts
, nbdef
, inputid
;
9260 int i
, j
, nbNs
, attval
;
9262 int nsNr
= ctxt
->nsNr
;
9264 if (RAW
!= '<') return(NULL
);
9268 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9269 * point since the attribute values may be stored as pointers to
9270 * the buffer and calling SHRINK would destroy them !
9271 * The Shrinking is only possible once the full set of attribute
9272 * callbacks have been done.
9275 cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
9276 inputid
= ctxt
->input
->id
;
9282 /* Forget any namespaces added during an earlier parse of this element. */
9285 localname
= xmlParseQName(ctxt
, &prefix
);
9286 if (localname
== NULL
) {
9287 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9288 "StartTag: invalid element name\n");
9291 *tlen
= ctxt
->input
->cur
- ctxt
->input
->base
- cur
;
9294 * Now parse the attributes, it ends up with the ending
9301 while (((RAW
!= '>') &&
9302 ((RAW
!= '/') || (NXT(1) != '>')) &&
9303 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
9304 int id
= ctxt
->input
->id
;
9305 unsigned long cons
= CUR_CONSUMED
;
9306 int len
= -1, alloc
= 0;
9308 attname
= xmlParseAttribute2(ctxt
, prefix
, localname
,
9309 &aprefix
, &attvalue
, &len
, &alloc
);
9310 if ((attname
== NULL
) || (attvalue
== NULL
))
9312 if (len
< 0) len
= xmlStrlen(attvalue
);
9314 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9315 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9319 xmlErrMemory(ctxt
, "dictionary allocation failure");
9320 if ((attvalue
!= NULL
) && (alloc
!= 0))
9326 uri
= xmlParseURI((const char *) URL
);
9328 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9329 "xmlns: '%s' is not a valid URI\n",
9332 if (uri
->scheme
== NULL
) {
9333 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9334 "xmlns: URI %s is not absolute\n",
9339 if (URL
== ctxt
->str_xml_ns
) {
9340 if (attname
!= ctxt
->str_xml
) {
9341 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9342 "xml namespace URI cannot be the default namespace\n",
9349 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9350 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9351 "reuse of the xmlns namespace name is forbidden\n",
9357 * check that it's not a defined namespace
9359 for (j
= 1;j
<= nbNs
;j
++)
9360 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9363 xmlErrAttributeDup(ctxt
, NULL
, attname
);
9365 if (nsPush(ctxt
, NULL
, URL
) > 0) nbNs
++;
9367 } else if (aprefix
== ctxt
->str_xmlns
) {
9368 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9371 if (attname
== ctxt
->str_xml
) {
9372 if (URL
!= ctxt
->str_xml_ns
) {
9373 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9374 "xml namespace prefix mapped to wrong URI\n",
9378 * Do not keep a namespace definition node
9382 if (URL
== ctxt
->str_xml_ns
) {
9383 if (attname
!= ctxt
->str_xml
) {
9384 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9385 "xml namespace URI mapped to wrong prefix\n",
9390 if (attname
== ctxt
->str_xmlns
) {
9391 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9392 "redefinition of the xmlns prefix is forbidden\n",
9398 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9399 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9400 "reuse of the xmlns namespace name is forbidden\n",
9404 if ((URL
== NULL
) || (URL
[0] == 0)) {
9405 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9406 "xmlns:%s: Empty XML namespace is not allowed\n",
9407 attname
, NULL
, NULL
);
9410 uri
= xmlParseURI((const char *) URL
);
9412 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9413 "xmlns:%s: '%s' is not a valid URI\n",
9414 attname
, URL
, NULL
);
9416 if ((ctxt
->pedantic
) && (uri
->scheme
== NULL
)) {
9417 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9418 "xmlns:%s: URI %s is not absolute\n",
9419 attname
, URL
, NULL
);
9426 * check that it's not a defined namespace
9428 for (j
= 1;j
<= nbNs
;j
++)
9429 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9432 xmlErrAttributeDup(ctxt
, aprefix
, attname
);
9434 if (nsPush(ctxt
, attname
, URL
) > 0) nbNs
++;
9438 * Add the pair to atts
9440 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9441 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9444 maxatts
= ctxt
->maxatts
;
9447 ctxt
->attallocs
[nratts
++] = alloc
;
9448 atts
[nbatts
++] = attname
;
9449 atts
[nbatts
++] = aprefix
;
9451 * The namespace URI field is used temporarily to point at the
9452 * base of the current input buffer for non-alloced attributes.
9453 * When the input buffer is reallocated, all the pointers become
9454 * invalid, but they can be reconstructed later.
9457 atts
[nbatts
++] = NULL
;
9459 atts
[nbatts
++] = ctxt
->input
->base
;
9460 atts
[nbatts
++] = attvalue
;
9462 atts
[nbatts
++] = attvalue
;
9464 * tag if some deallocation is needed
9466 if (alloc
!= 0) attval
= 1;
9467 attvalue
= NULL
; /* moved into atts */
9471 if ((attvalue
!= NULL
) && (alloc
!= 0)) {
9477 if (ctxt
->instate
== XML_PARSER_EOF
)
9479 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
9481 if (SKIP_BLANKS
== 0) {
9482 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
9483 "attributes construct error\n");
9486 if ((cons
== CUR_CONSUMED
) && (id
== ctxt
->input
->id
) &&
9487 (attname
== NULL
) && (attvalue
== NULL
)) {
9488 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9489 "xmlParseStartTag: problem parsing attributes\n");
9495 if (ctxt
->input
->id
!= inputid
) {
9496 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9497 "Unexpected change of input\n");
9502 /* Reconstruct attribute value pointers. */
9503 for (i
= 0, j
= 0; j
< nratts
; i
+= 5, j
++) {
9504 if (atts
[i
+2] != NULL
) {
9506 * Arithmetic on dangling pointers is technically undefined
9507 * behavior, but well...
9509 ptrdiff_t offset
= ctxt
->input
->base
- atts
[i
+2];
9510 atts
[i
+2] = NULL
; /* Reset repurposed namespace URI */
9511 atts
[i
+3] += offset
; /* value */
9512 atts
[i
+4] += offset
; /* valuend */
9517 * The attributes defaulting
9519 if (ctxt
->attsDefault
!= NULL
) {
9520 xmlDefAttrsPtr defaults
;
9522 defaults
= xmlHashLookup2(ctxt
->attsDefault
, localname
, prefix
);
9523 if (defaults
!= NULL
) {
9524 for (i
= 0;i
< defaults
->nbAttrs
;i
++) {
9525 attname
= defaults
->values
[5 * i
];
9526 aprefix
= defaults
->values
[5 * i
+ 1];
9529 * special work for namespaces defaulted defs
9531 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9533 * check that it's not a defined namespace
9535 for (j
= 1;j
<= nbNs
;j
++)
9536 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9538 if (j
<= nbNs
) continue;
9540 nsname
= xmlGetNamespace(ctxt
, NULL
);
9541 if (nsname
!= defaults
->values
[5 * i
+ 2]) {
9542 if (nsPush(ctxt
, NULL
,
9543 defaults
->values
[5 * i
+ 2]) > 0)
9546 } else if (aprefix
== ctxt
->str_xmlns
) {
9548 * check that it's not a defined namespace
9550 for (j
= 1;j
<= nbNs
;j
++)
9551 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9553 if (j
<= nbNs
) continue;
9555 nsname
= xmlGetNamespace(ctxt
, attname
);
9556 if (nsname
!= defaults
->values
[2]) {
9557 if (nsPush(ctxt
, attname
,
9558 defaults
->values
[5 * i
+ 2]) > 0)
9563 * check that it's not a defined attribute
9565 for (j
= 0;j
< nbatts
;j
+=5) {
9566 if ((attname
== atts
[j
]) && (aprefix
== atts
[j
+1]))
9569 if (j
< nbatts
) continue;
9571 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9572 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9576 maxatts
= ctxt
->maxatts
;
9579 atts
[nbatts
++] = attname
;
9580 atts
[nbatts
++] = aprefix
;
9581 if (aprefix
== NULL
)
9582 atts
[nbatts
++] = NULL
;
9584 atts
[nbatts
++] = xmlGetNamespace(ctxt
, aprefix
);
9585 atts
[nbatts
++] = defaults
->values
[5 * i
+ 2];
9586 atts
[nbatts
++] = defaults
->values
[5 * i
+ 3];
9587 if ((ctxt
->standalone
== 1) &&
9588 (defaults
->values
[5 * i
+ 4] != NULL
)) {
9589 xmlValidityError(ctxt
, XML_DTD_STANDALONE_DEFAULTED
,
9590 "standalone: attribute %s on %s defaulted from external subset\n",
9591 attname
, localname
);
9600 * The attributes checkings
9602 for (i
= 0; i
< nbatts
;i
+= 5) {
9604 * The default namespace does not apply to attribute names.
9606 if (atts
[i
+ 1] != NULL
) {
9607 nsname
= xmlGetNamespace(ctxt
, atts
[i
+ 1]);
9608 if (nsname
== NULL
) {
9609 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9610 "Namespace prefix %s for %s on %s is not defined\n",
9611 atts
[i
+ 1], atts
[i
], localname
);
9613 atts
[i
+ 2] = nsname
;
9617 * [ WFC: Unique Att Spec ]
9618 * No attribute name may appear more than once in the same
9619 * start-tag or empty-element tag.
9620 * As extended by the Namespace in XML REC.
9622 for (j
= 0; j
< i
;j
+= 5) {
9623 if (atts
[i
] == atts
[j
]) {
9624 if (atts
[i
+1] == atts
[j
+1]) {
9625 xmlErrAttributeDup(ctxt
, atts
[i
+1], atts
[i
]);
9628 if ((nsname
!= NULL
) && (atts
[j
+ 2] == nsname
)) {
9629 xmlNsErr(ctxt
, XML_NS_ERR_ATTRIBUTE_REDEFINED
,
9630 "Namespaced Attribute %s in '%s' redefined\n",
9631 atts
[i
], nsname
, NULL
);
9638 nsname
= xmlGetNamespace(ctxt
, prefix
);
9639 if ((prefix
!= NULL
) && (nsname
== NULL
)) {
9640 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9641 "Namespace prefix %s on %s is not defined\n",
9642 prefix
, localname
, NULL
);
9648 * SAX: Start of Element !
9650 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElementNs
!= NULL
) &&
9651 (!ctxt
->disableSAX
)) {
9653 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9654 nsname
, nbNs
, &ctxt
->nsTab
[ctxt
->nsNr
- 2 * nbNs
],
9655 nbatts
/ 5, nbdef
, atts
);
9657 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9658 nsname
, 0, NULL
, nbatts
/ 5, nbdef
, atts
);
9663 * Free up attribute allocated strings if needed
9666 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9667 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9668 xmlFree((xmlChar
*) atts
[i
]);
9676 * @ctxt: an XML parser context
9677 * @line: line of the start tag
9678 * @nsNr: number of namespaces on the start tag
9680 * parse an end of tag
9682 * [42] ETag ::= '</' Name S? '>'
9686 * [NS 9] ETag ::= '</' QName S? '>'
9690 xmlParseEndTag2(xmlParserCtxtPtr ctxt
, const xmlStartTag
*tag
) {
9691 const xmlChar
*name
;
9694 if ((RAW
!= '<') || (NXT(1) != '/')) {
9695 xmlFatalErr(ctxt
, XML_ERR_LTSLASH_REQUIRED
, NULL
);
9700 if (tag
->prefix
== NULL
)
9701 name
= xmlParseNameAndCompare(ctxt
, ctxt
->name
);
9703 name
= xmlParseQNameAndCompare(ctxt
, ctxt
->name
, tag
->prefix
);
9706 * We should definitely be at the ending "S? '>'" part
9709 if (ctxt
->instate
== XML_PARSER_EOF
)
9712 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
9713 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
9718 * [ WFC: Element Type Match ]
9719 * The Name in an element's end-tag must match the element type in the
9723 if (name
!= (xmlChar
*)1) {
9724 if (name
== NULL
) name
= BAD_CAST
"unparsable";
9725 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
9726 "Opening and ending tag mismatch: %s line %d and %s\n",
9727 ctxt
->name
, tag
->line
, name
);
9733 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
9734 (!ctxt
->disableSAX
))
9735 ctxt
->sax
->endElementNs(ctxt
->userData
, ctxt
->name
, tag
->prefix
,
9740 nsPop(ctxt
, tag
->nsNr
);
9745 * @ctxt: an XML parser context
9747 * Parse escaped pure raw content.
9749 * [18] CDSect ::= CDStart CData CDEnd
9751 * [19] CDStart ::= '<![CDATA['
9753 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9755 * [21] CDEnd ::= ']]>'
9758 xmlParseCDSect(xmlParserCtxtPtr ctxt
) {
9759 xmlChar
*buf
= NULL
;
9761 int size
= XML_PARSER_BUFFER_SIZE
;
9767 /* Check 2.6.0 was NXT(0) not RAW */
9768 if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9773 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
9776 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9777 ctxt
->instate
= XML_PARSER_CONTENT
;
9783 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9784 ctxt
->instate
= XML_PARSER_CONTENT
;
9789 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
9791 xmlErrMemory(ctxt
, NULL
);
9794 while (IS_CHAR(cur
) &&
9795 ((r
!= ']') || (s
!= ']') || (cur
!= '>'))) {
9796 if (len
+ 5 >= size
) {
9799 if ((size
> XML_MAX_TEXT_LENGTH
) &&
9800 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9801 xmlFatalErrMsgStr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9802 "CData section too big found", NULL
);
9806 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* 2 * sizeof(xmlChar
));
9809 xmlErrMemory(ctxt
, NULL
);
9815 COPY_BUF(rl
,buf
,len
,r
);
9824 if (ctxt
->instate
== XML_PARSER_EOF
) {
9834 ctxt
->instate
= XML_PARSER_CONTENT
;
9836 xmlFatalErrMsgStr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9837 "CData section not finished\n%.50s\n", buf
);
9844 * OK the buffer is to be consumed as cdata.
9846 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
9847 if (ctxt
->sax
->cdataBlock
!= NULL
)
9848 ctxt
->sax
->cdataBlock(ctxt
->userData
, buf
, len
);
9849 else if (ctxt
->sax
->characters
!= NULL
)
9850 ctxt
->sax
->characters(ctxt
->userData
, buf
, len
);
9856 * xmlParseContentInternal:
9857 * @ctxt: an XML parser context
9859 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9860 * unexpected EOF to the caller.
9864 xmlParseContentInternal(xmlParserCtxtPtr ctxt
) {
9865 int nameNr
= ctxt
->nameNr
;
9868 while ((RAW
!= 0) &&
9869 (ctxt
->instate
!= XML_PARSER_EOF
)) {
9870 int id
= ctxt
->input
->id
;
9871 unsigned long cons
= CUR_CONSUMED
;
9872 const xmlChar
*cur
= ctxt
->input
->cur
;
9875 * First case : a Processing Instruction.
9877 if ((*cur
== '<') && (cur
[1] == '?')) {
9882 * Second case : a CDSection
9884 /* 2.6.0 test was *cur not RAW */
9885 else if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9886 xmlParseCDSect(ctxt
);
9890 * Third case : a comment
9892 else if ((*cur
== '<') && (NXT(1) == '!') &&
9893 (NXT(2) == '-') && (NXT(3) == '-')) {
9894 xmlParseComment(ctxt
);
9895 ctxt
->instate
= XML_PARSER_CONTENT
;
9899 * Fourth case : a sub-element.
9901 else if (*cur
== '<') {
9902 if (NXT(1) == '/') {
9903 if (ctxt
->nameNr
<= nameNr
)
9905 xmlParseElementEnd(ctxt
);
9907 xmlParseElementStart(ctxt
);
9912 * Fifth case : a reference. If if has not been resolved,
9913 * parsing returns it's Name, create the node
9916 else if (*cur
== '&') {
9917 xmlParseReference(ctxt
);
9921 * Last case, text. Note that References are handled directly.
9924 xmlParseCharData(ctxt
, 0);
9930 if ((cons
== CUR_CONSUMED
) && (id
== ctxt
->input
->id
)) {
9931 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9932 "detected an error in element content\n");
9933 xmlHaltParser(ctxt
);
9941 * @ctxt: an XML parser context
9943 * Parse a content sequence. Stops at EOF or '</'.
9945 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9949 xmlParseContent(xmlParserCtxtPtr ctxt
) {
9950 int nameNr
= ctxt
->nameNr
;
9952 xmlParseContentInternal(ctxt
);
9954 if ((ctxt
->instate
!= XML_PARSER_EOF
) && (ctxt
->nameNr
> nameNr
)) {
9955 const xmlChar
*name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
9956 int line
= ctxt
->pushTab
[ctxt
->nameNr
- 1].line
;
9957 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9958 "Premature end of data in tag %s line %d\n",
9965 * @ctxt: an XML parser context
9967 * parse an XML element
9969 * [39] element ::= EmptyElemTag | STag content ETag
9971 * [ WFC: Element Type Match ]
9972 * The Name in an element's end-tag must match the element type in the
9978 xmlParseElement(xmlParserCtxtPtr ctxt
) {
9979 if (xmlParseElementStart(ctxt
) != 0)
9982 xmlParseContentInternal(ctxt
);
9983 if (ctxt
->instate
== XML_PARSER_EOF
)
9987 const xmlChar
*name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
9988 int line
= ctxt
->pushTab
[ctxt
->nameNr
- 1].line
;
9989 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9990 "Premature end of data in tag %s line %d\n",
9995 xmlParseElementEnd(ctxt
);
9999 * xmlParseElementStart:
10000 * @ctxt: an XML parser context
10002 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10003 * opening tag was parsed, 1 if an empty element was parsed.
10006 xmlParseElementStart(xmlParserCtxtPtr ctxt
) {
10007 const xmlChar
*name
;
10008 const xmlChar
*prefix
= NULL
;
10009 const xmlChar
*URI
= NULL
;
10010 xmlParserNodeInfo node_info
;
10011 int line
, tlen
= 0;
10013 int nsNr
= ctxt
->nsNr
;
10015 if (((unsigned int) ctxt
->nameNr
> xmlParserMaxDepth
) &&
10016 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
10017 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
10018 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10019 xmlParserMaxDepth
);
10020 xmlHaltParser(ctxt
);
10024 /* Capture start position */
10025 if (ctxt
->record_info
) {
10026 node_info
.begin_pos
= ctxt
->input
->consumed
+
10027 (CUR_PTR
- ctxt
->input
->base
);
10028 node_info
.begin_line
= ctxt
->input
->line
;
10031 if (ctxt
->spaceNr
== 0)
10032 spacePush(ctxt
, -1);
10033 else if (*ctxt
->space
== -2)
10034 spacePush(ctxt
, -1);
10036 spacePush(ctxt
, *ctxt
->space
);
10038 line
= ctxt
->input
->line
;
10039 #ifdef LIBXML_SAX1_ENABLED
10041 #endif /* LIBXML_SAX1_ENABLED */
10042 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
10043 #ifdef LIBXML_SAX1_ENABLED
10045 name
= xmlParseStartTag(ctxt
);
10046 #endif /* LIBXML_SAX1_ENABLED */
10047 if (ctxt
->instate
== XML_PARSER_EOF
)
10049 if (name
== NULL
) {
10053 nameNsPush(ctxt
, name
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
);
10056 #ifdef LIBXML_VALID_ENABLED
10058 * [ VC: Root Element Type ]
10059 * The Name in the document type declaration must match the element
10060 * type of the root element.
10062 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
10063 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
10064 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
10065 #endif /* LIBXML_VALID_ENABLED */
10068 * Check for an Empty Element.
10070 if ((RAW
== '/') && (NXT(1) == '>')) {
10073 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
10074 (!ctxt
->disableSAX
))
10075 ctxt
->sax
->endElementNs(ctxt
->userData
, name
, prefix
, URI
);
10076 #ifdef LIBXML_SAX1_ENABLED
10078 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
10079 (!ctxt
->disableSAX
))
10080 ctxt
->sax
->endElement(ctxt
->userData
, name
);
10081 #endif /* LIBXML_SAX1_ENABLED */
10085 if (nsNr
!= ctxt
->nsNr
)
10086 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10087 if ( ret
!= NULL
&& ctxt
->record_info
) {
10088 node_info
.end_pos
= ctxt
->input
->consumed
+
10089 (CUR_PTR
- ctxt
->input
->base
);
10090 node_info
.end_line
= ctxt
->input
->line
;
10091 node_info
.node
= ret
;
10092 xmlParserAddNodeInfo(ctxt
, &node_info
);
10099 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_GT_REQUIRED
,
10100 "Couldn't find end of Start Tag %s line %d\n",
10104 * end of parsing of this node.
10109 if (nsNr
!= ctxt
->nsNr
)
10110 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10113 * Capture end position and add node
10115 if ( ret
!= NULL
&& ctxt
->record_info
) {
10116 node_info
.end_pos
= ctxt
->input
->consumed
+
10117 (CUR_PTR
- ctxt
->input
->base
);
10118 node_info
.end_line
= ctxt
->input
->line
;
10119 node_info
.node
= ret
;
10120 xmlParserAddNodeInfo(ctxt
, &node_info
);
10129 * xmlParseElementEnd:
10130 * @ctxt: an XML parser context
10132 * Parse the end of an XML element.
10135 xmlParseElementEnd(xmlParserCtxtPtr ctxt
) {
10136 xmlParserNodeInfo node_info
;
10137 xmlNodePtr ret
= ctxt
->node
;
10139 if (ctxt
->nameNr
<= 0)
10143 * parse the end of tag: '</' should be here.
10146 xmlParseEndTag2(ctxt
, &ctxt
->pushTab
[ctxt
->nameNr
- 1]);
10149 #ifdef LIBXML_SAX1_ENABLED
10151 xmlParseEndTag1(ctxt
, 0);
10152 #endif /* LIBXML_SAX1_ENABLED */
10155 * Capture end position and add node
10157 if ( ret
!= NULL
&& ctxt
->record_info
) {
10158 node_info
.end_pos
= ctxt
->input
->consumed
+
10159 (CUR_PTR
- ctxt
->input
->base
);
10160 node_info
.end_line
= ctxt
->input
->line
;
10161 node_info
.node
= ret
;
10162 xmlParserAddNodeInfo(ctxt
, &node_info
);
10167 * xmlParseVersionNum:
10168 * @ctxt: an XML parser context
10170 * parse the XML version value.
10172 * [26] VersionNum ::= '1.' [0-9]+
10174 * In practice allow [0-9].[0-9]+ at that level
10176 * Returns the string giving the XML version number, or NULL
10179 xmlParseVersionNum(xmlParserCtxtPtr ctxt
) {
10180 xmlChar
*buf
= NULL
;
10185 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
10187 xmlErrMemory(ctxt
, NULL
);
10191 if (!((cur
>= '0') && (cur
<= '9'))) {
10205 while ((cur
>= '0') && (cur
<= '9')) {
10206 if (len
+ 1 >= size
) {
10210 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
10213 xmlErrMemory(ctxt
, NULL
);
10227 * xmlParseVersionInfo:
10228 * @ctxt: an XML parser context
10230 * parse the XML version.
10232 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10234 * [25] Eq ::= S? '=' S?
10236 * Returns the version string, e.g. "1.0"
10240 xmlParseVersionInfo(xmlParserCtxtPtr ctxt
) {
10241 xmlChar
*version
= NULL
;
10243 if (CMP7(CUR_PTR
, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10247 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10254 version
= xmlParseVersionNum(ctxt
);
10256 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10259 } else if (RAW
== '\''){
10261 version
= xmlParseVersionNum(ctxt
);
10263 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10267 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10275 * @ctxt: an XML parser context
10277 * parse the XML encoding name
10279 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10281 * Returns the encoding name value or NULL
10284 xmlParseEncName(xmlParserCtxtPtr ctxt
) {
10285 xmlChar
*buf
= NULL
;
10291 if (((cur
>= 'a') && (cur
<= 'z')) ||
10292 ((cur
>= 'A') && (cur
<= 'Z'))) {
10293 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
10295 xmlErrMemory(ctxt
, NULL
);
10302 while (((cur
>= 'a') && (cur
<= 'z')) ||
10303 ((cur
>= 'A') && (cur
<= 'Z')) ||
10304 ((cur
>= '0') && (cur
<= '9')) ||
10305 (cur
== '.') || (cur
== '_') ||
10307 if (len
+ 1 >= size
) {
10311 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
10313 xmlErrMemory(ctxt
, NULL
);
10330 xmlFatalErr(ctxt
, XML_ERR_ENCODING_NAME
, NULL
);
10336 * xmlParseEncodingDecl:
10337 * @ctxt: an XML parser context
10339 * parse the XML encoding declaration
10341 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10343 * this setups the conversion filters.
10345 * Returns the encoding value or NULL
10349 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt
) {
10350 xmlChar
*encoding
= NULL
;
10353 if (CMP8(CUR_PTR
, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10357 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10364 encoding
= xmlParseEncName(ctxt
);
10366 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10367 xmlFree((xmlChar
*) encoding
);
10371 } else if (RAW
== '\''){
10373 encoding
= xmlParseEncName(ctxt
);
10375 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10376 xmlFree((xmlChar
*) encoding
);
10381 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10385 * Non standard parsing, allowing the user to ignore encoding
10387 if (ctxt
->options
& XML_PARSE_IGNORE_ENC
) {
10388 xmlFree((xmlChar
*) encoding
);
10393 * UTF-16 encoding switch has already taken place at this stage,
10394 * more over the little-endian/big-endian selection is already done
10396 if ((encoding
!= NULL
) &&
10397 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-16")) ||
10398 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF16")))) {
10400 * If no encoding was passed to the parser, that we are
10401 * using UTF-16 and no decoder is present i.e. the
10402 * document is apparently UTF-8 compatible, then raise an
10403 * encoding mismatch fatal error
10405 if ((ctxt
->encoding
== NULL
) &&
10406 (ctxt
->input
->buf
!= NULL
) &&
10407 (ctxt
->input
->buf
->encoder
== NULL
)) {
10408 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_ENCODING
,
10409 "Document labelled UTF-16 but has UTF-8 content\n");
10411 if (ctxt
->encoding
!= NULL
)
10412 xmlFree((xmlChar
*) ctxt
->encoding
);
10413 ctxt
->encoding
= encoding
;
10416 * UTF-8 encoding is handled natively
10418 else if ((encoding
!= NULL
) &&
10419 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-8")) ||
10420 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF8")))) {
10421 if (ctxt
->encoding
!= NULL
)
10422 xmlFree((xmlChar
*) ctxt
->encoding
);
10423 ctxt
->encoding
= encoding
;
10425 else if (encoding
!= NULL
) {
10426 xmlCharEncodingHandlerPtr handler
;
10428 if (ctxt
->input
->encoding
!= NULL
)
10429 xmlFree((xmlChar
*) ctxt
->input
->encoding
);
10430 ctxt
->input
->encoding
= encoding
;
10432 handler
= xmlFindCharEncodingHandler((const char *) encoding
);
10433 if (handler
!= NULL
) {
10434 if (xmlSwitchToEncoding(ctxt
, handler
) < 0) {
10435 /* failed to convert */
10436 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
10440 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
10441 "Unsupported encoding %s\n", encoding
);
10451 * @ctxt: an XML parser context
10453 * parse the XML standalone declaration
10455 * [32] SDDecl ::= S 'standalone' Eq
10456 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10458 * [ VC: Standalone Document Declaration ]
10459 * TODO The standalone document declaration must have the value "no"
10460 * if any external markup declarations contain declarations of:
10461 * - attributes with default values, if elements to which these
10462 * attributes apply appear in the document without specifications
10463 * of values for these attributes, or
10464 * - entities (other than amp, lt, gt, apos, quot), if references
10465 * to those entities appear in the document, or
10466 * - attributes with values subject to normalization, where the
10467 * attribute appears in the document with a value which will change
10468 * as a result of normalization, or
10469 * - element types with element content, if white space occurs directly
10470 * within any instance of those types.
10473 * 1 if standalone="yes"
10474 * 0 if standalone="no"
10475 * -2 if standalone attribute is missing or invalid
10476 * (A standalone value of -2 means that the XML declaration was found,
10477 * but no value was specified for the standalone attribute).
10481 xmlParseSDDecl(xmlParserCtxtPtr ctxt
) {
10482 int standalone
= -2;
10485 if (CMP10(CUR_PTR
, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10489 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10490 return(standalone
);
10496 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10499 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10504 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10507 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10510 } else if (RAW
== '"'){
10512 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10515 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10520 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10523 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10527 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10530 return(standalone
);
10535 * @ctxt: an XML parser context
10537 * parse an XML declaration header
10539 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10543 xmlParseXMLDecl(xmlParserCtxtPtr ctxt
) {
10547 * This value for standalone indicates that the document has an
10548 * XML declaration but it does not have a standalone attribute.
10549 * It will be overwritten later if a standalone attribute is found.
10551 ctxt
->input
->standalone
= -2;
10554 * We know that '<?xml' is here.
10558 if (!IS_BLANK_CH(RAW
)) {
10559 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
10560 "Blank needed after '<?xml'\n");
10565 * We must have the VersionInfo here.
10567 version
= xmlParseVersionInfo(ctxt
);
10568 if (version
== NULL
) {
10569 xmlFatalErr(ctxt
, XML_ERR_VERSION_MISSING
, NULL
);
10571 if (!xmlStrEqual(version
, (const xmlChar
*) XML_DEFAULT_VERSION
)) {
10573 * Changed here for XML-1.0 5th edition
10575 if (ctxt
->options
& XML_PARSE_OLD10
) {
10576 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10577 "Unsupported version '%s'\n",
10580 if ((version
[0] == '1') && ((version
[1] == '.'))) {
10581 xmlWarningMsg(ctxt
, XML_WAR_UNKNOWN_VERSION
,
10582 "Unsupported version '%s'\n",
10585 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10586 "Unsupported version '%s'\n",
10591 if (ctxt
->version
!= NULL
)
10592 xmlFree((void *) ctxt
->version
);
10593 ctxt
->version
= version
;
10597 * We may have the encoding declaration
10599 if (!IS_BLANK_CH(RAW
)) {
10600 if ((RAW
== '?') && (NXT(1) == '>')) {
10604 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10606 xmlParseEncodingDecl(ctxt
);
10607 if ((ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) ||
10608 (ctxt
->instate
== XML_PARSER_EOF
)) {
10610 * The XML REC instructs us to stop parsing right here
10616 * We may have the standalone status.
10618 if ((ctxt
->input
->encoding
!= NULL
) && (!IS_BLANK_CH(RAW
))) {
10619 if ((RAW
== '?') && (NXT(1) == '>')) {
10623 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10627 * We can grow the input buffer freely at that point
10632 ctxt
->input
->standalone
= xmlParseSDDecl(ctxt
);
10635 if ((RAW
== '?') && (NXT(1) == '>')) {
10637 } else if (RAW
== '>') {
10638 /* Deprecated old WD ... */
10639 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10642 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10643 MOVETO_ENDTAG(CUR_PTR
);
10650 * @ctxt: an XML parser context
10652 * parse an XML Misc* optional field.
10654 * [27] Misc ::= Comment | PI | S
10658 xmlParseMisc(xmlParserCtxtPtr ctxt
) {
10659 while (ctxt
->instate
!= XML_PARSER_EOF
) {
10662 if ((RAW
== '<') && (NXT(1) == '?')) {
10664 } else if (CMP4(CUR_PTR
, '<', '!', '-', '-')) {
10665 xmlParseComment(ctxt
);
10673 * xmlParseDocument:
10674 * @ctxt: an XML parser context
10676 * parse an XML document (and build a tree if using the standard SAX
10679 * [1] document ::= prolog element Misc*
10681 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10683 * Returns 0, -1 in case of error. the parser context is augmented
10684 * as a result of the parsing.
10688 xmlParseDocument(xmlParserCtxtPtr ctxt
) {
10690 xmlCharEncoding enc
;
10694 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10700 * SAX: detecting the level.
10702 xmlDetectSAX2(ctxt
);
10705 * SAX: beginning of the document processing.
10707 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10708 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10709 if (ctxt
->instate
== XML_PARSER_EOF
)
10712 if ((ctxt
->encoding
== NULL
) &&
10713 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
10715 * Get the 4 first bytes and decode the charset
10716 * if enc != XML_CHAR_ENCODING_NONE
10717 * plug some encoding conversion routines.
10723 enc
= xmlDetectCharEncoding(&start
[0], 4);
10724 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10725 xmlSwitchEncoding(ctxt
, enc
);
10731 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10736 * Check for the XMLDecl in the Prolog.
10737 * do not GROW here to avoid the detected encoder to decode more
10738 * than just the first line, unless the amount of data is really
10739 * too small to hold "<?xml version="1.0" encoding="foo"
10741 if ((ctxt
->input
->end
- ctxt
->input
->cur
) < 35) {
10744 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10747 * Note that we will switch encoding on the fly.
10749 xmlParseXMLDecl(ctxt
);
10750 if ((ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) ||
10751 (ctxt
->instate
== XML_PARSER_EOF
)) {
10753 * The XML REC instructs us to stop parsing right here
10757 ctxt
->standalone
= ctxt
->input
->standalone
;
10760 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10762 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10763 ctxt
->sax
->startDocument(ctxt
->userData
);
10764 if (ctxt
->instate
== XML_PARSER_EOF
)
10766 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->input
!= NULL
) &&
10767 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->compressed
>= 0)) {
10768 ctxt
->myDoc
->compression
= ctxt
->input
->buf
->compressed
;
10772 * The Misc part of the Prolog
10774 xmlParseMisc(ctxt
);
10777 * Then possibly doc type declaration(s) and more Misc
10778 * (doctypedecl Misc*)?
10781 if (CMP9(CUR_PTR
, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10783 ctxt
->inSubset
= 1;
10784 xmlParseDocTypeDecl(ctxt
);
10786 ctxt
->instate
= XML_PARSER_DTD
;
10787 xmlParseInternalSubset(ctxt
);
10788 if (ctxt
->instate
== XML_PARSER_EOF
)
10793 * Create and update the external subset.
10795 ctxt
->inSubset
= 2;
10796 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->externalSubset
!= NULL
) &&
10797 (!ctxt
->disableSAX
))
10798 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
10799 ctxt
->extSubSystem
, ctxt
->extSubURI
);
10800 if (ctxt
->instate
== XML_PARSER_EOF
)
10802 ctxt
->inSubset
= 0;
10804 xmlCleanSpecialAttr(ctxt
);
10806 ctxt
->instate
= XML_PARSER_PROLOG
;
10807 xmlParseMisc(ctxt
);
10811 * Time to start parsing the tree itself
10815 xmlFatalErrMsg(ctxt
, XML_ERR_DOCUMENT_EMPTY
,
10816 "Start tag expected, '<' not found\n");
10818 ctxt
->instate
= XML_PARSER_CONTENT
;
10819 xmlParseElement(ctxt
);
10820 ctxt
->instate
= XML_PARSER_EPILOG
;
10824 * The Misc part at the end
10826 xmlParseMisc(ctxt
);
10829 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
10831 ctxt
->instate
= XML_PARSER_EOF
;
10835 * SAX: end of the document processing.
10837 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10838 ctxt
->sax
->endDocument(ctxt
->userData
);
10841 * Remove locally kept entity definitions if the tree was not built
10843 if ((ctxt
->myDoc
!= NULL
) &&
10844 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
10845 xmlFreeDoc(ctxt
->myDoc
);
10846 ctxt
->myDoc
= NULL
;
10849 if ((ctxt
->wellFormed
) && (ctxt
->myDoc
!= NULL
)) {
10850 ctxt
->myDoc
->properties
|= XML_DOC_WELLFORMED
;
10852 ctxt
->myDoc
->properties
|= XML_DOC_DTDVALID
;
10853 if (ctxt
->nsWellFormed
)
10854 ctxt
->myDoc
->properties
|= XML_DOC_NSVALID
;
10855 if (ctxt
->options
& XML_PARSE_OLD10
)
10856 ctxt
->myDoc
->properties
|= XML_DOC_OLD10
;
10858 if (! ctxt
->wellFormed
) {
10866 * xmlParseExtParsedEnt:
10867 * @ctxt: an XML parser context
10869 * parse a general parsed entity
10870 * An external general parsed entity is well-formed if it matches the
10871 * production labeled extParsedEnt.
10873 * [78] extParsedEnt ::= TextDecl? content
10875 * Returns 0, -1 in case of error. the parser context is augmented
10876 * as a result of the parsing.
10880 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt
) {
10882 xmlCharEncoding enc
;
10884 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10887 xmlDetectSAX2(ctxt
);
10892 * SAX: beginning of the document processing.
10894 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10895 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10898 * Get the 4 first bytes and decode the charset
10899 * if enc != XML_CHAR_ENCODING_NONE
10900 * plug some encoding conversion routines.
10902 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
10907 enc
= xmlDetectCharEncoding(start
, 4);
10908 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10909 xmlSwitchEncoding(ctxt
, enc
);
10915 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10919 * Check for the XMLDecl in the Prolog.
10922 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10925 * Note that we will switch encoding on the fly.
10927 xmlParseXMLDecl(ctxt
);
10928 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10930 * The XML REC instructs us to stop parsing right here
10936 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10938 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10939 ctxt
->sax
->startDocument(ctxt
->userData
);
10940 if (ctxt
->instate
== XML_PARSER_EOF
)
10944 * Doing validity checking on chunk doesn't make sense
10946 ctxt
->instate
= XML_PARSER_CONTENT
;
10947 ctxt
->validate
= 0;
10948 ctxt
->loadsubset
= 0;
10951 xmlParseContent(ctxt
);
10952 if (ctxt
->instate
== XML_PARSER_EOF
)
10955 if ((RAW
== '<') && (NXT(1) == '/')) {
10956 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
10957 } else if (RAW
!= 0) {
10958 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
10962 * SAX: end of the document processing.
10964 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10965 ctxt
->sax
->endDocument(ctxt
->userData
);
10967 if (! ctxt
->wellFormed
) return(-1);
10971 #ifdef LIBXML_PUSH_ENABLED
10972 /************************************************************************
10974 * Progressive parsing interfaces *
10976 ************************************************************************/
10979 * xmlParseLookupSequence:
10980 * @ctxt: an XML parser context
10981 * @first: the first char to lookup
10982 * @next: the next char to lookup or zero
10983 * @third: the next char to lookup or zero
10985 * Try to find if a sequence (first, next, third) or just (first next) or
10986 * (first) is available in the input stream.
10987 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10988 * to avoid rescanning sequences of bytes, it DOES change the state of the
10989 * parser, do not use liberally.
10991 * Returns the index to the current parsing point if the full sequence
10992 * is available, -1 otherwise.
10995 xmlParseLookupSequence(xmlParserCtxtPtr ctxt
, xmlChar first
,
10996 xmlChar next
, xmlChar third
) {
10998 xmlParserInputPtr in
;
10999 const xmlChar
*buf
;
11002 if (in
== NULL
) return(-1);
11003 base
= in
->cur
- in
->base
;
11004 if (base
< 0) return(-1);
11005 if (ctxt
->checkIndex
> base
)
11006 base
= ctxt
->checkIndex
;
11007 if (in
->buf
== NULL
) {
11011 buf
= xmlBufContent(in
->buf
->buffer
);
11012 len
= xmlBufUse(in
->buf
->buffer
);
11014 /* take into account the sequence length */
11015 if (third
) len
-= 2;
11016 else if (next
) len
--;
11017 for (;base
< len
;base
++) {
11018 if (buf
[base
] == first
) {
11020 if ((buf
[base
+ 1] != next
) ||
11021 (buf
[base
+ 2] != third
)) continue;
11022 } else if (next
!= 0) {
11023 if (buf
[base
+ 1] != next
) continue;
11025 ctxt
->checkIndex
= 0;
11028 xmlGenericError(xmlGenericErrorContext
,
11029 "PP: lookup '%c' found at %d\n",
11031 else if (third
== 0)
11032 xmlGenericError(xmlGenericErrorContext
,
11033 "PP: lookup '%c%c' found at %d\n",
11034 first
, next
, base
);
11036 xmlGenericError(xmlGenericErrorContext
,
11037 "PP: lookup '%c%c%c' found at %d\n",
11038 first
, next
, third
, base
);
11040 return(base
- (in
->cur
- in
->base
));
11043 ctxt
->checkIndex
= base
;
11046 xmlGenericError(xmlGenericErrorContext
,
11047 "PP: lookup '%c' failed\n", first
);
11048 else if (third
== 0)
11049 xmlGenericError(xmlGenericErrorContext
,
11050 "PP: lookup '%c%c' failed\n", first
, next
);
11052 xmlGenericError(xmlGenericErrorContext
,
11053 "PP: lookup '%c%c%c' failed\n", first
, next
, third
);
11059 * xmlParseGetLasts:
11060 * @ctxt: an XML parser context
11061 * @lastlt: pointer to store the last '<' from the input
11062 * @lastgt: pointer to store the last '>' from the input
11064 * Lookup the last < and > in the current chunk
11067 xmlParseGetLasts(xmlParserCtxtPtr ctxt
, const xmlChar
**lastlt
,
11068 const xmlChar
**lastgt
) {
11069 const xmlChar
*tmp
;
11071 if ((ctxt
== NULL
) || (lastlt
== NULL
) || (lastgt
== NULL
)) {
11072 xmlGenericError(xmlGenericErrorContext
,
11073 "Internal error: xmlParseGetLasts\n");
11076 if ((ctxt
->progressive
!= 0) && (ctxt
->inputNr
== 1)) {
11077 tmp
= ctxt
->input
->end
;
11079 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '<')) tmp
--;
11080 if (tmp
< ctxt
->input
->base
) {
11086 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '>')) {
11087 if (*tmp
== '\'') {
11089 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '\'')) tmp
++;
11090 if (tmp
< ctxt
->input
->end
) tmp
++;
11091 } else if (*tmp
== '"') {
11093 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '"')) tmp
++;
11094 if (tmp
< ctxt
->input
->end
) tmp
++;
11098 if (tmp
< ctxt
->input
->end
)
11103 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '>')) tmp
--;
11104 if (tmp
>= ctxt
->input
->base
)
11116 * xmlCheckCdataPush:
11117 * @cur: pointer to the block of characters
11118 * @len: length of the block in bytes
11119 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11121 * Check that the block of characters is okay as SCdata content [20]
11123 * Returns the number of bytes to pass if okay, a negative index where an
11124 * UTF-8 error occurred otherwise
11127 xmlCheckCdataPush(const xmlChar
*utf
, int len
, int complete
) {
11132 if ((utf
== NULL
) || (len
<= 0))
11135 for (ix
= 0; ix
< len
;) { /* string is 0-terminated */
11137 if ((c
& 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11140 else if ((c
== 0xA) || (c
== 0xD) || (c
== 0x9))
11144 } else if ((c
& 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11145 if (ix
+ 2 > len
) return(complete
? -ix
: ix
);
11146 if ((utf
[ix
+1] & 0xc0 ) != 0x80)
11148 codepoint
= (utf
[ix
] & 0x1f) << 6;
11149 codepoint
|= utf
[ix
+1] & 0x3f;
11150 if (!xmlIsCharQ(codepoint
))
11153 } else if ((c
& 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11154 if (ix
+ 3 > len
) return(complete
? -ix
: ix
);
11155 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11156 ((utf
[ix
+2] & 0xc0) != 0x80))
11158 codepoint
= (utf
[ix
] & 0xf) << 12;
11159 codepoint
|= (utf
[ix
+1] & 0x3f) << 6;
11160 codepoint
|= utf
[ix
+2] & 0x3f;
11161 if (!xmlIsCharQ(codepoint
))
11164 } else if ((c
& 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11165 if (ix
+ 4 > len
) return(complete
? -ix
: ix
);
11166 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11167 ((utf
[ix
+2] & 0xc0) != 0x80) ||
11168 ((utf
[ix
+3] & 0xc0) != 0x80))
11170 codepoint
= (utf
[ix
] & 0x7) << 18;
11171 codepoint
|= (utf
[ix
+1] & 0x3f) << 12;
11172 codepoint
|= (utf
[ix
+2] & 0x3f) << 6;
11173 codepoint
|= utf
[ix
+3] & 0x3f;
11174 if (!xmlIsCharQ(codepoint
))
11177 } else /* unknown encoding */
11184 * xmlParseTryOrFinish:
11185 * @ctxt: an XML parser context
11186 * @terminate: last chunk indicator
11188 * Try to progress on parsing
11190 * Returns zero if no parsing was possible
11193 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt
, int terminate
) {
11197 const xmlChar
*lastlt
, *lastgt
;
11199 if (ctxt
->input
== NULL
)
11203 switch (ctxt
->instate
) {
11204 case XML_PARSER_EOF
:
11205 xmlGenericError(xmlGenericErrorContext
,
11206 "PP: try EOF\n"); break;
11207 case XML_PARSER_START
:
11208 xmlGenericError(xmlGenericErrorContext
,
11209 "PP: try START\n"); break;
11210 case XML_PARSER_MISC
:
11211 xmlGenericError(xmlGenericErrorContext
,
11212 "PP: try MISC\n");break;
11213 case XML_PARSER_COMMENT
:
11214 xmlGenericError(xmlGenericErrorContext
,
11215 "PP: try COMMENT\n");break;
11216 case XML_PARSER_PROLOG
:
11217 xmlGenericError(xmlGenericErrorContext
,
11218 "PP: try PROLOG\n");break;
11219 case XML_PARSER_START_TAG
:
11220 xmlGenericError(xmlGenericErrorContext
,
11221 "PP: try START_TAG\n");break;
11222 case XML_PARSER_CONTENT
:
11223 xmlGenericError(xmlGenericErrorContext
,
11224 "PP: try CONTENT\n");break;
11225 case XML_PARSER_CDATA_SECTION
:
11226 xmlGenericError(xmlGenericErrorContext
,
11227 "PP: try CDATA_SECTION\n");break;
11228 case XML_PARSER_END_TAG
:
11229 xmlGenericError(xmlGenericErrorContext
,
11230 "PP: try END_TAG\n");break;
11231 case XML_PARSER_ENTITY_DECL
:
11232 xmlGenericError(xmlGenericErrorContext
,
11233 "PP: try ENTITY_DECL\n");break;
11234 case XML_PARSER_ENTITY_VALUE
:
11235 xmlGenericError(xmlGenericErrorContext
,
11236 "PP: try ENTITY_VALUE\n");break;
11237 case XML_PARSER_ATTRIBUTE_VALUE
:
11238 xmlGenericError(xmlGenericErrorContext
,
11239 "PP: try ATTRIBUTE_VALUE\n");break;
11240 case XML_PARSER_DTD
:
11241 xmlGenericError(xmlGenericErrorContext
,
11242 "PP: try DTD\n");break;
11243 case XML_PARSER_EPILOG
:
11244 xmlGenericError(xmlGenericErrorContext
,
11245 "PP: try EPILOG\n");break;
11246 case XML_PARSER_PI
:
11247 xmlGenericError(xmlGenericErrorContext
,
11248 "PP: try PI\n");break;
11249 case XML_PARSER_IGNORE
:
11250 xmlGenericError(xmlGenericErrorContext
,
11251 "PP: try IGNORE\n");break;
11255 if ((ctxt
->input
!= NULL
) &&
11256 (ctxt
->input
->cur
- ctxt
->input
->base
> 4096)) {
11258 ctxt
->checkIndex
= 0;
11260 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11262 while (ctxt
->instate
!= XML_PARSER_EOF
) {
11263 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
11266 if (ctxt
->input
== NULL
) break;
11267 if (ctxt
->input
->buf
== NULL
)
11268 avail
= ctxt
->input
->length
-
11269 (ctxt
->input
->cur
- ctxt
->input
->base
);
11272 * If we are operating on converted input, try to flush
11273 * remaining chars to avoid them stalling in the non-converted
11274 * buffer. But do not do this in document start where
11275 * encoding="..." may not have been read and we work on a
11276 * guessed encoding.
11278 if ((ctxt
->instate
!= XML_PARSER_START
) &&
11279 (ctxt
->input
->buf
->raw
!= NULL
) &&
11280 (xmlBufIsEmpty(ctxt
->input
->buf
->raw
) == 0)) {
11281 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
11283 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
11285 xmlParserInputBufferPush(ctxt
->input
->buf
, 0, "");
11286 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
11289 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11290 (ctxt
->input
->cur
- ctxt
->input
->base
);
11294 switch (ctxt
->instate
) {
11295 case XML_PARSER_EOF
:
11297 * Document parsing is done !
11300 case XML_PARSER_START
:
11301 if (ctxt
->charset
== XML_CHAR_ENCODING_NONE
) {
11303 xmlCharEncoding enc
;
11306 * Very first chars read from the document flow.
11312 * Get the 4 first bytes and decode the charset
11313 * if enc != XML_CHAR_ENCODING_NONE
11314 * plug some encoding conversion routines,
11315 * else xmlSwitchEncoding will set to (default)
11322 enc
= xmlDetectCharEncoding(start
, 4);
11323 xmlSwitchEncoding(ctxt
, enc
);
11329 cur
= ctxt
->input
->cur
[0];
11330 next
= ctxt
->input
->cur
[1];
11332 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11333 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11334 &xmlDefaultSAXLocator
);
11335 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11336 xmlHaltParser(ctxt
);
11338 xmlGenericError(xmlGenericErrorContext
,
11339 "PP: entering EOF\n");
11341 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11342 ctxt
->sax
->endDocument(ctxt
->userData
);
11345 if ((cur
== '<') && (next
== '?')) {
11346 /* PI or XML decl */
11347 if (avail
< 5) return(ret
);
11348 if ((!terminate
) &&
11349 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11351 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11352 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11353 &xmlDefaultSAXLocator
);
11354 if ((ctxt
->input
->cur
[2] == 'x') &&
11355 (ctxt
->input
->cur
[3] == 'm') &&
11356 (ctxt
->input
->cur
[4] == 'l') &&
11357 (IS_BLANK_CH(ctxt
->input
->cur
[5]))) {
11360 xmlGenericError(xmlGenericErrorContext
,
11361 "PP: Parsing XML Decl\n");
11363 xmlParseXMLDecl(ctxt
);
11364 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
11366 * The XML REC instructs us to stop parsing right
11369 xmlHaltParser(ctxt
);
11372 ctxt
->standalone
= ctxt
->input
->standalone
;
11373 if ((ctxt
->encoding
== NULL
) &&
11374 (ctxt
->input
->encoding
!= NULL
))
11375 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
11376 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11377 (!ctxt
->disableSAX
))
11378 ctxt
->sax
->startDocument(ctxt
->userData
);
11379 ctxt
->instate
= XML_PARSER_MISC
;
11381 xmlGenericError(xmlGenericErrorContext
,
11382 "PP: entering MISC\n");
11385 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11386 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11387 (!ctxt
->disableSAX
))
11388 ctxt
->sax
->startDocument(ctxt
->userData
);
11389 ctxt
->instate
= XML_PARSER_MISC
;
11391 xmlGenericError(xmlGenericErrorContext
,
11392 "PP: entering MISC\n");
11396 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11397 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11398 &xmlDefaultSAXLocator
);
11399 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11400 if (ctxt
->version
== NULL
) {
11401 xmlErrMemory(ctxt
, NULL
);
11404 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11405 (!ctxt
->disableSAX
))
11406 ctxt
->sax
->startDocument(ctxt
->userData
);
11407 ctxt
->instate
= XML_PARSER_MISC
;
11409 xmlGenericError(xmlGenericErrorContext
,
11410 "PP: entering MISC\n");
11414 case XML_PARSER_START_TAG
: {
11415 const xmlChar
*name
;
11416 const xmlChar
*prefix
= NULL
;
11417 const xmlChar
*URI
= NULL
;
11418 int line
= ctxt
->input
->line
;
11419 int nsNr
= ctxt
->nsNr
;
11421 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11423 cur
= ctxt
->input
->cur
[0];
11425 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11426 xmlHaltParser(ctxt
);
11427 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11428 ctxt
->sax
->endDocument(ctxt
->userData
);
11432 if (ctxt
->progressive
) {
11433 /* > can be found unescaped in attribute values */
11434 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
11436 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
11440 if (ctxt
->spaceNr
== 0)
11441 spacePush(ctxt
, -1);
11442 else if (*ctxt
->space
== -2)
11443 spacePush(ctxt
, -1);
11445 spacePush(ctxt
, *ctxt
->space
);
11446 #ifdef LIBXML_SAX1_ENABLED
11448 #endif /* LIBXML_SAX1_ENABLED */
11449 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
11450 #ifdef LIBXML_SAX1_ENABLED
11452 name
= xmlParseStartTag(ctxt
);
11453 #endif /* LIBXML_SAX1_ENABLED */
11454 if (ctxt
->instate
== XML_PARSER_EOF
)
11456 if (name
== NULL
) {
11458 xmlHaltParser(ctxt
);
11459 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11460 ctxt
->sax
->endDocument(ctxt
->userData
);
11463 #ifdef LIBXML_VALID_ENABLED
11465 * [ VC: Root Element Type ]
11466 * The Name in the document type declaration must match
11467 * the element type of the root element.
11469 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
11470 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
11471 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
11472 #endif /* LIBXML_VALID_ENABLED */
11475 * Check for an Empty Element.
11477 if ((RAW
== '/') && (NXT(1) == '>')) {
11481 if ((ctxt
->sax
!= NULL
) &&
11482 (ctxt
->sax
->endElementNs
!= NULL
) &&
11483 (!ctxt
->disableSAX
))
11484 ctxt
->sax
->endElementNs(ctxt
->userData
, name
,
11486 if (ctxt
->nsNr
- nsNr
> 0)
11487 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
11488 #ifdef LIBXML_SAX1_ENABLED
11490 if ((ctxt
->sax
!= NULL
) &&
11491 (ctxt
->sax
->endElement
!= NULL
) &&
11492 (!ctxt
->disableSAX
))
11493 ctxt
->sax
->endElement(ctxt
->userData
, name
);
11494 #endif /* LIBXML_SAX1_ENABLED */
11496 if (ctxt
->instate
== XML_PARSER_EOF
)
11499 if (ctxt
->nameNr
== 0) {
11500 ctxt
->instate
= XML_PARSER_EPILOG
;
11502 ctxt
->instate
= XML_PARSER_CONTENT
;
11504 ctxt
->progressive
= 1;
11510 xmlFatalErrMsgStr(ctxt
, XML_ERR_GT_REQUIRED
,
11511 "Couldn't find end of Start Tag %s\n",
11516 nameNsPush(ctxt
, name
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
);
11518 ctxt
->instate
= XML_PARSER_CONTENT
;
11519 ctxt
->progressive
= 1;
11522 case XML_PARSER_CONTENT
: {
11524 unsigned long cons
;
11525 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11527 cur
= ctxt
->input
->cur
[0];
11528 next
= ctxt
->input
->cur
[1];
11530 id
= ctxt
->input
->id
;
11531 cons
= CUR_CONSUMED
;
11532 if ((cur
== '<') && (next
== '/')) {
11533 ctxt
->instate
= XML_PARSER_END_TAG
;
11535 } else if ((cur
== '<') && (next
== '?')) {
11536 if ((!terminate
) &&
11537 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11538 ctxt
->progressive
= XML_PARSER_PI
;
11542 ctxt
->instate
= XML_PARSER_CONTENT
;
11543 ctxt
->progressive
= 1;
11544 } else if ((cur
== '<') && (next
!= '!')) {
11545 ctxt
->instate
= XML_PARSER_START_TAG
;
11547 } else if ((cur
== '<') && (next
== '!') &&
11548 (ctxt
->input
->cur
[2] == '-') &&
11549 (ctxt
->input
->cur
[3] == '-')) {
11554 ctxt
->input
->cur
+= 4;
11555 term
= xmlParseLookupSequence(ctxt
, '-', '-', '>');
11556 ctxt
->input
->cur
-= 4;
11557 if ((!terminate
) && (term
< 0)) {
11558 ctxt
->progressive
= XML_PARSER_COMMENT
;
11561 xmlParseComment(ctxt
);
11562 ctxt
->instate
= XML_PARSER_CONTENT
;
11563 ctxt
->progressive
= 1;
11564 } else if ((cur
== '<') && (ctxt
->input
->cur
[1] == '!') &&
11565 (ctxt
->input
->cur
[2] == '[') &&
11566 (ctxt
->input
->cur
[3] == 'C') &&
11567 (ctxt
->input
->cur
[4] == 'D') &&
11568 (ctxt
->input
->cur
[5] == 'A') &&
11569 (ctxt
->input
->cur
[6] == 'T') &&
11570 (ctxt
->input
->cur
[7] == 'A') &&
11571 (ctxt
->input
->cur
[8] == '[')) {
11573 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
11575 } else if ((cur
== '<') && (next
== '!') &&
11578 } else if (cur
== '&') {
11579 if ((!terminate
) &&
11580 (xmlParseLookupSequence(ctxt
, ';', 0, 0) < 0))
11582 xmlParseReference(ctxt
);
11584 /* TODO Avoid the extra copy, handle directly !!! */
11586 * Goal of the following test is:
11587 * - minimize calls to the SAX 'character' callback
11588 * when they are mergeable
11589 * - handle an problem for isBlank when we only parse
11590 * a sequence of blank chars and the next one is
11591 * not available to check against '<' presence.
11592 * - tries to homogenize the differences in SAX
11593 * callbacks between the push and pull versions
11596 if ((ctxt
->inputNr
== 1) &&
11597 (avail
< XML_PARSER_BIG_BUFFER_SIZE
)) {
11599 if (ctxt
->progressive
) {
11600 if ((lastlt
== NULL
) ||
11601 (ctxt
->input
->cur
> lastlt
))
11603 } else if (xmlParseLookupSequence(ctxt
,
11609 ctxt
->checkIndex
= 0;
11610 xmlParseCharData(ctxt
, 0);
11612 if ((cons
== CUR_CONSUMED
) && (id
== ctxt
->input
->id
)) {
11613 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
11614 "detected an error in element content\n");
11615 xmlHaltParser(ctxt
);
11620 case XML_PARSER_END_TAG
:
11624 if (ctxt
->progressive
) {
11625 /* > can be found unescaped in attribute values */
11626 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
11628 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
11633 xmlParseEndTag2(ctxt
, &ctxt
->pushTab
[ctxt
->nameNr
- 1]);
11636 #ifdef LIBXML_SAX1_ENABLED
11638 xmlParseEndTag1(ctxt
, 0);
11639 #endif /* LIBXML_SAX1_ENABLED */
11640 if (ctxt
->instate
== XML_PARSER_EOF
) {
11642 } else if (ctxt
->nameNr
== 0) {
11643 ctxt
->instate
= XML_PARSER_EPILOG
;
11645 ctxt
->instate
= XML_PARSER_CONTENT
;
11648 case XML_PARSER_CDATA_SECTION
: {
11650 * The Push mode need to have the SAX callback for
11651 * cdataBlock merge back contiguous callbacks.
11655 base
= xmlParseLookupSequence(ctxt
, ']', ']', '>');
11657 if (avail
>= XML_PARSER_BIG_BUFFER_SIZE
+ 2) {
11660 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
,
11661 XML_PARSER_BIG_BUFFER_SIZE
, 0);
11664 ctxt
->input
->cur
+= tmp
;
11665 goto encoding_error
;
11667 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
11668 if (ctxt
->sax
->cdataBlock
!= NULL
)
11669 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11670 ctxt
->input
->cur
, tmp
);
11671 else if (ctxt
->sax
->characters
!= NULL
)
11672 ctxt
->sax
->characters(ctxt
->userData
,
11673 ctxt
->input
->cur
, tmp
);
11675 if (ctxt
->instate
== XML_PARSER_EOF
)
11678 ctxt
->checkIndex
= 0;
11684 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
, base
, 1);
11685 if ((tmp
< 0) || (tmp
!= base
)) {
11687 ctxt
->input
->cur
+= tmp
;
11688 goto encoding_error
;
11690 if ((ctxt
->sax
!= NULL
) && (base
== 0) &&
11691 (ctxt
->sax
->cdataBlock
!= NULL
) &&
11692 (!ctxt
->disableSAX
)) {
11694 * Special case to provide identical behaviour
11695 * between pull and push parsers on enpty CDATA
11698 if ((ctxt
->input
->cur
- ctxt
->input
->base
>= 9) &&
11699 (!strncmp((const char *)&ctxt
->input
->cur
[-9],
11701 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11703 } else if ((ctxt
->sax
!= NULL
) && (base
> 0) &&
11704 (!ctxt
->disableSAX
)) {
11705 if (ctxt
->sax
->cdataBlock
!= NULL
)
11706 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11707 ctxt
->input
->cur
, base
);
11708 else if (ctxt
->sax
->characters
!= NULL
)
11709 ctxt
->sax
->characters(ctxt
->userData
,
11710 ctxt
->input
->cur
, base
);
11712 if (ctxt
->instate
== XML_PARSER_EOF
)
11715 ctxt
->checkIndex
= 0;
11716 ctxt
->instate
= XML_PARSER_CONTENT
;
11718 xmlGenericError(xmlGenericErrorContext
,
11719 "PP: entering CONTENT\n");
11724 case XML_PARSER_MISC
:
11726 if (ctxt
->input
->buf
== NULL
)
11727 avail
= ctxt
->input
->length
-
11728 (ctxt
->input
->cur
- ctxt
->input
->base
);
11730 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11731 (ctxt
->input
->cur
- ctxt
->input
->base
);
11734 cur
= ctxt
->input
->cur
[0];
11735 next
= ctxt
->input
->cur
[1];
11736 if ((cur
== '<') && (next
== '?')) {
11737 if ((!terminate
) &&
11738 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11739 ctxt
->progressive
= XML_PARSER_PI
;
11743 xmlGenericError(xmlGenericErrorContext
,
11744 "PP: Parsing PI\n");
11747 if (ctxt
->instate
== XML_PARSER_EOF
)
11749 ctxt
->instate
= XML_PARSER_MISC
;
11750 ctxt
->progressive
= 1;
11751 ctxt
->checkIndex
= 0;
11752 } else if ((cur
== '<') && (next
== '!') &&
11753 (ctxt
->input
->cur
[2] == '-') &&
11754 (ctxt
->input
->cur
[3] == '-')) {
11755 if ((!terminate
) &&
11756 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0)) {
11757 ctxt
->progressive
= XML_PARSER_COMMENT
;
11761 xmlGenericError(xmlGenericErrorContext
,
11762 "PP: Parsing Comment\n");
11764 xmlParseComment(ctxt
);
11765 if (ctxt
->instate
== XML_PARSER_EOF
)
11767 ctxt
->instate
= XML_PARSER_MISC
;
11768 ctxt
->progressive
= 1;
11769 ctxt
->checkIndex
= 0;
11770 } else if ((cur
== '<') && (next
== '!') &&
11771 (ctxt
->input
->cur
[2] == 'D') &&
11772 (ctxt
->input
->cur
[3] == 'O') &&
11773 (ctxt
->input
->cur
[4] == 'C') &&
11774 (ctxt
->input
->cur
[5] == 'T') &&
11775 (ctxt
->input
->cur
[6] == 'Y') &&
11776 (ctxt
->input
->cur
[7] == 'P') &&
11777 (ctxt
->input
->cur
[8] == 'E')) {
11778 if ((!terminate
) &&
11779 (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0)) {
11780 ctxt
->progressive
= XML_PARSER_DTD
;
11784 xmlGenericError(xmlGenericErrorContext
,
11785 "PP: Parsing internal subset\n");
11787 ctxt
->inSubset
= 1;
11788 ctxt
->progressive
= 0;
11789 ctxt
->checkIndex
= 0;
11790 xmlParseDocTypeDecl(ctxt
);
11791 if (ctxt
->instate
== XML_PARSER_EOF
)
11794 ctxt
->instate
= XML_PARSER_DTD
;
11796 xmlGenericError(xmlGenericErrorContext
,
11797 "PP: entering DTD\n");
11801 * Create and update the external subset.
11803 ctxt
->inSubset
= 2;
11804 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11805 (ctxt
->sax
->externalSubset
!= NULL
))
11806 ctxt
->sax
->externalSubset(ctxt
->userData
,
11807 ctxt
->intSubName
, ctxt
->extSubSystem
,
11809 ctxt
->inSubset
= 0;
11810 xmlCleanSpecialAttr(ctxt
);
11811 ctxt
->instate
= XML_PARSER_PROLOG
;
11813 xmlGenericError(xmlGenericErrorContext
,
11814 "PP: entering PROLOG\n");
11817 } else if ((cur
== '<') && (next
== '!') &&
11821 ctxt
->instate
= XML_PARSER_START_TAG
;
11822 ctxt
->progressive
= XML_PARSER_START_TAG
;
11823 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11825 xmlGenericError(xmlGenericErrorContext
,
11826 "PP: entering START_TAG\n");
11830 case XML_PARSER_PROLOG
:
11832 if (ctxt
->input
->buf
== NULL
)
11833 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11835 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11836 (ctxt
->input
->cur
- ctxt
->input
->base
);
11839 cur
= ctxt
->input
->cur
[0];
11840 next
= ctxt
->input
->cur
[1];
11841 if ((cur
== '<') && (next
== '?')) {
11842 if ((!terminate
) &&
11843 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11844 ctxt
->progressive
= XML_PARSER_PI
;
11848 xmlGenericError(xmlGenericErrorContext
,
11849 "PP: Parsing PI\n");
11852 if (ctxt
->instate
== XML_PARSER_EOF
)
11854 ctxt
->instate
= XML_PARSER_PROLOG
;
11855 ctxt
->progressive
= 1;
11856 } else if ((cur
== '<') && (next
== '!') &&
11857 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11858 if ((!terminate
) &&
11859 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0)) {
11860 ctxt
->progressive
= XML_PARSER_COMMENT
;
11864 xmlGenericError(xmlGenericErrorContext
,
11865 "PP: Parsing Comment\n");
11867 xmlParseComment(ctxt
);
11868 if (ctxt
->instate
== XML_PARSER_EOF
)
11870 ctxt
->instate
= XML_PARSER_PROLOG
;
11871 ctxt
->progressive
= 1;
11872 } else if ((cur
== '<') && (next
== '!') &&
11876 ctxt
->instate
= XML_PARSER_START_TAG
;
11877 if (ctxt
->progressive
== 0)
11878 ctxt
->progressive
= XML_PARSER_START_TAG
;
11879 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11881 xmlGenericError(xmlGenericErrorContext
,
11882 "PP: entering START_TAG\n");
11886 case XML_PARSER_EPILOG
:
11888 if (ctxt
->input
->buf
== NULL
)
11889 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11891 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
11892 (ctxt
->input
->cur
- ctxt
->input
->base
);
11895 cur
= ctxt
->input
->cur
[0];
11896 next
= ctxt
->input
->cur
[1];
11897 if ((cur
== '<') && (next
== '?')) {
11898 if ((!terminate
) &&
11899 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0)) {
11900 ctxt
->progressive
= XML_PARSER_PI
;
11904 xmlGenericError(xmlGenericErrorContext
,
11905 "PP: Parsing PI\n");
11908 if (ctxt
->instate
== XML_PARSER_EOF
)
11910 ctxt
->instate
= XML_PARSER_EPILOG
;
11911 ctxt
->progressive
= 1;
11912 } else if ((cur
== '<') && (next
== '!') &&
11913 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11914 if ((!terminate
) &&
11915 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0)) {
11916 ctxt
->progressive
= XML_PARSER_COMMENT
;
11920 xmlGenericError(xmlGenericErrorContext
,
11921 "PP: Parsing Comment\n");
11923 xmlParseComment(ctxt
);
11924 if (ctxt
->instate
== XML_PARSER_EOF
)
11926 ctxt
->instate
= XML_PARSER_EPILOG
;
11927 ctxt
->progressive
= 1;
11928 } else if ((cur
== '<') && (next
== '!') &&
11932 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11933 xmlHaltParser(ctxt
);
11935 xmlGenericError(xmlGenericErrorContext
,
11936 "PP: entering EOF\n");
11938 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11939 ctxt
->sax
->endDocument(ctxt
->userData
);
11943 case XML_PARSER_DTD
: {
11945 * Sorry but progressive parsing of the internal subset
11946 * is not expected to be supported. We first check that
11947 * the full content of the internal subset is available and
11948 * the parsing is launched only at that point.
11949 * Internal subset ends up with "']' S? '>'" in an unescaped
11950 * section and not in a ']]>' sequence which are conditional
11951 * sections (whoever argued to keep that crap in XML deserve
11952 * a place in hell !).
11959 base
= ctxt
->input
->cur
- ctxt
->input
->base
;
11960 if (base
< 0) return(0);
11961 if (ctxt
->checkIndex
> base
)
11962 base
= ctxt
->checkIndex
;
11963 buf
= xmlBufContent(ctxt
->input
->buf
->buffer
);
11964 use
= xmlBufUse(ctxt
->input
->buf
->buffer
);
11965 for (;(unsigned int) base
< use
; base
++) {
11967 if (buf
[base
] == quote
)
11971 if ((quote
== 0) && (buf
[base
] == '<')) {
11973 /* special handling of comments */
11974 if (((unsigned int) base
+ 4 < use
) &&
11975 (buf
[base
+ 1] == '!') &&
11976 (buf
[base
+ 2] == '-') &&
11977 (buf
[base
+ 3] == '-')) {
11978 for (;(unsigned int) base
+ 3 < use
; base
++) {
11979 if ((buf
[base
] == '-') &&
11980 (buf
[base
+ 1] == '-') &&
11981 (buf
[base
+ 2] == '>')) {
11989 fprintf(stderr
, "unfinished comment\n");
11996 if (buf
[base
] == '"') {
12000 if (buf
[base
] == '\'') {
12004 if (buf
[base
] == ']') {
12006 fprintf(stderr
, "%c%c%c%c: ", buf
[base
],
12007 buf
[base
+ 1], buf
[base
+ 2], buf
[base
+ 3]);
12009 if ((unsigned int) base
+1 >= use
)
12011 if (buf
[base
+ 1] == ']') {
12012 /* conditional crap, skip both ']' ! */
12016 for (i
= 1; (unsigned int) base
+ i
< use
; i
++) {
12017 if (buf
[base
+ i
] == '>') {
12019 fprintf(stderr
, "found\n");
12021 goto found_end_int_subset
;
12023 if (!IS_BLANK_CH(buf
[base
+ i
])) {
12025 fprintf(stderr
, "not found\n");
12027 goto not_end_of_int_subset
;
12031 fprintf(stderr
, "end of stream\n");
12036 not_end_of_int_subset
:
12037 continue; /* for */
12040 * We didn't found the end of the Internal subset
12043 ctxt
->checkIndex
= base
;
12045 ctxt
->checkIndex
= 0;
12048 xmlGenericError(xmlGenericErrorContext
,
12049 "PP: lookup of int subset end filed\n");
12053 found_end_int_subset
:
12054 ctxt
->checkIndex
= 0;
12055 xmlParseInternalSubset(ctxt
);
12056 if (ctxt
->instate
== XML_PARSER_EOF
)
12058 ctxt
->inSubset
= 2;
12059 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
12060 (ctxt
->sax
->externalSubset
!= NULL
))
12061 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
12062 ctxt
->extSubSystem
, ctxt
->extSubURI
);
12063 ctxt
->inSubset
= 0;
12064 xmlCleanSpecialAttr(ctxt
);
12065 if (ctxt
->instate
== XML_PARSER_EOF
)
12067 ctxt
->instate
= XML_PARSER_PROLOG
;
12068 ctxt
->checkIndex
= 0;
12070 xmlGenericError(xmlGenericErrorContext
,
12071 "PP: entering PROLOG\n");
12075 case XML_PARSER_COMMENT
:
12076 xmlGenericError(xmlGenericErrorContext
,
12077 "PP: internal error, state == COMMENT\n");
12078 ctxt
->instate
= XML_PARSER_CONTENT
;
12080 xmlGenericError(xmlGenericErrorContext
,
12081 "PP: entering CONTENT\n");
12084 case XML_PARSER_IGNORE
:
12085 xmlGenericError(xmlGenericErrorContext
,
12086 "PP: internal error, state == IGNORE");
12087 ctxt
->instate
= XML_PARSER_DTD
;
12089 xmlGenericError(xmlGenericErrorContext
,
12090 "PP: entering DTD\n");
12093 case XML_PARSER_PI
:
12094 xmlGenericError(xmlGenericErrorContext
,
12095 "PP: internal error, state == PI\n");
12096 ctxt
->instate
= XML_PARSER_CONTENT
;
12098 xmlGenericError(xmlGenericErrorContext
,
12099 "PP: entering CONTENT\n");
12102 case XML_PARSER_ENTITY_DECL
:
12103 xmlGenericError(xmlGenericErrorContext
,
12104 "PP: internal error, state == ENTITY_DECL\n");
12105 ctxt
->instate
= XML_PARSER_DTD
;
12107 xmlGenericError(xmlGenericErrorContext
,
12108 "PP: entering DTD\n");
12111 case XML_PARSER_ENTITY_VALUE
:
12112 xmlGenericError(xmlGenericErrorContext
,
12113 "PP: internal error, state == ENTITY_VALUE\n");
12114 ctxt
->instate
= XML_PARSER_CONTENT
;
12116 xmlGenericError(xmlGenericErrorContext
,
12117 "PP: entering DTD\n");
12120 case XML_PARSER_ATTRIBUTE_VALUE
:
12121 xmlGenericError(xmlGenericErrorContext
,
12122 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12123 ctxt
->instate
= XML_PARSER_START_TAG
;
12125 xmlGenericError(xmlGenericErrorContext
,
12126 "PP: entering START_TAG\n");
12129 case XML_PARSER_SYSTEM_LITERAL
:
12130 xmlGenericError(xmlGenericErrorContext
,
12131 "PP: internal error, state == SYSTEM_LITERAL\n");
12132 ctxt
->instate
= XML_PARSER_START_TAG
;
12134 xmlGenericError(xmlGenericErrorContext
,
12135 "PP: entering START_TAG\n");
12138 case XML_PARSER_PUBLIC_LITERAL
:
12139 xmlGenericError(xmlGenericErrorContext
,
12140 "PP: internal error, state == PUBLIC_LITERAL\n");
12141 ctxt
->instate
= XML_PARSER_START_TAG
;
12143 xmlGenericError(xmlGenericErrorContext
,
12144 "PP: entering START_TAG\n");
12151 xmlGenericError(xmlGenericErrorContext
, "PP: done %d\n", ret
);
12158 snprintf(buffer
, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12159 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
12160 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
12161 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
12162 "Input is not proper UTF-8, indicate encoding !\n%s",
12163 BAD_CAST buffer
, NULL
);
12169 * xmlParseCheckTransition:
12170 * @ctxt: an XML parser context
12171 * @chunk: a char array
12172 * @size: the size in byte of the chunk
12174 * Check depending on the current parser state if the chunk given must be
12175 * processed immediately or one need more data to advance on parsing.
12177 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12180 xmlParseCheckTransition(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
) {
12181 if ((ctxt
== NULL
) || (chunk
== NULL
) || (size
< 0))
12183 if (ctxt
->instate
== XML_PARSER_START_TAG
) {
12184 if (memchr(chunk
, '>', size
) != NULL
)
12188 if (ctxt
->progressive
== XML_PARSER_COMMENT
) {
12189 if (memchr(chunk
, '>', size
) != NULL
)
12193 if (ctxt
->instate
== XML_PARSER_CDATA_SECTION
) {
12194 if (memchr(chunk
, '>', size
) != NULL
)
12198 if (ctxt
->progressive
== XML_PARSER_PI
) {
12199 if (memchr(chunk
, '>', size
) != NULL
)
12203 if (ctxt
->instate
== XML_PARSER_END_TAG
) {
12204 if (memchr(chunk
, '>', size
) != NULL
)
12208 if ((ctxt
->progressive
== XML_PARSER_DTD
) ||
12209 (ctxt
->instate
== XML_PARSER_DTD
)) {
12210 if (memchr(chunk
, '>', size
) != NULL
)
12219 * @ctxt: an XML parser context
12220 * @chunk: an char array
12221 * @size: the size in byte of the chunk
12222 * @terminate: last chunk indicator
12224 * Parse a Chunk of memory
12226 * Returns zero if no error, the xmlParserErrors otherwise.
12229 xmlParseChunk(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
,
12233 size_t old_avail
= 0;
12237 return(XML_ERR_INTERNAL_ERROR
);
12238 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12239 return(ctxt
->errNo
);
12240 if (ctxt
->instate
== XML_PARSER_EOF
)
12242 if (ctxt
->instate
== XML_PARSER_START
)
12243 xmlDetectSAX2(ctxt
);
12244 if ((size
> 0) && (chunk
!= NULL
) && (!terminate
) &&
12245 (chunk
[size
- 1] == '\r')) {
12252 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
12253 (ctxt
->input
->buf
!= NULL
) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
12254 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12255 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12258 old_avail
= xmlBufUse(ctxt
->input
->buf
->buffer
);
12260 * Specific handling if we autodetected an encoding, we should not
12261 * push more than the first line ... which depend on the encoding
12262 * And only push the rest once the final encoding was detected
12264 if ((ctxt
->instate
== XML_PARSER_START
) && (ctxt
->input
!= NULL
) &&
12265 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->encoder
!= NULL
)) {
12266 unsigned int len
= 45;
12268 if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12269 BAD_CAST
"UTF-16")) ||
12270 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12271 BAD_CAST
"UTF16")))
12273 else if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12274 BAD_CAST
"UCS-4")) ||
12275 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
12279 if (ctxt
->input
->buf
->rawconsumed
< len
)
12280 len
-= ctxt
->input
->buf
->rawconsumed
;
12283 * Change size for reading the initial declaration only
12284 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12285 * will blindly copy extra bytes from memory.
12287 if ((unsigned int) size
> len
) {
12288 remain
= size
- len
;
12294 res
= xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12295 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12297 ctxt
->errNo
= XML_PARSER_EOF
;
12298 xmlHaltParser(ctxt
);
12299 return (XML_PARSER_EOF
);
12302 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12305 } else if (ctxt
->instate
!= XML_PARSER_EOF
) {
12306 if ((ctxt
->input
!= NULL
) && ctxt
->input
->buf
!= NULL
) {
12307 xmlParserInputBufferPtr in
= ctxt
->input
->buf
;
12308 if ((in
->encoder
!= NULL
) && (in
->buffer
!= NULL
) &&
12309 (in
->raw
!= NULL
)) {
12311 size_t base
= xmlBufGetInputBase(in
->buffer
, ctxt
->input
);
12312 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12314 nbchars
= xmlCharEncInput(in
, terminate
);
12315 xmlBufSetInputBaseCur(in
->buffer
, ctxt
->input
, base
, current
);
12318 xmlGenericError(xmlGenericErrorContext
,
12319 "xmlParseChunk: encoder error\n");
12320 xmlHaltParser(ctxt
);
12321 return(XML_ERR_INVALID_ENCODING
);
12327 xmlParseTryOrFinish(ctxt
, 0);
12329 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
))
12330 avail
= xmlBufUse(ctxt
->input
->buf
->buffer
);
12332 * Depending on the current state it may not be such
12333 * a good idea to try parsing if there is nothing in the chunk
12334 * which would be worth doing a parser state transition and we
12335 * need to wait for more data
12337 if ((terminate
) || (avail
> XML_MAX_TEXT_LENGTH
) ||
12338 (old_avail
== 0) || (avail
== 0) ||
12339 (xmlParseCheckTransition(ctxt
,
12340 (const char *)&ctxt
->input
->base
[old_avail
],
12341 avail
- old_avail
)))
12342 xmlParseTryOrFinish(ctxt
, terminate
);
12344 if (ctxt
->instate
== XML_PARSER_EOF
)
12345 return(ctxt
->errNo
);
12347 if ((ctxt
->input
!= NULL
) &&
12348 (((ctxt
->input
->end
- ctxt
->input
->cur
) > XML_MAX_LOOKUP_LIMIT
) ||
12349 ((ctxt
->input
->cur
- ctxt
->input
->base
) > XML_MAX_LOOKUP_LIMIT
)) &&
12350 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
12351 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "Huge input lookup");
12352 xmlHaltParser(ctxt
);
12354 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12355 return(ctxt
->errNo
);
12363 if ((end_in_lf
== 1) && (ctxt
->input
!= NULL
) &&
12364 (ctxt
->input
->buf
!= NULL
)) {
12365 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
12367 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12369 xmlParserInputBufferPush(ctxt
->input
->buf
, 1, "\r");
12371 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
12376 * Check for termination
12380 if (ctxt
->input
!= NULL
) {
12381 if (ctxt
->input
->buf
== NULL
)
12382 cur_avail
= ctxt
->input
->length
-
12383 (ctxt
->input
->cur
- ctxt
->input
->base
);
12385 cur_avail
= xmlBufUse(ctxt
->input
->buf
->buffer
) -
12386 (ctxt
->input
->cur
- ctxt
->input
->base
);
12389 if ((ctxt
->instate
!= XML_PARSER_EOF
) &&
12390 (ctxt
->instate
!= XML_PARSER_EPILOG
)) {
12391 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12393 if ((ctxt
->instate
== XML_PARSER_EPILOG
) && (cur_avail
> 0)) {
12394 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12396 if (ctxt
->instate
!= XML_PARSER_EOF
) {
12397 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
12398 ctxt
->sax
->endDocument(ctxt
->userData
);
12400 ctxt
->instate
= XML_PARSER_EOF
;
12402 if (ctxt
->wellFormed
== 0)
12403 return((xmlParserErrors
) ctxt
->errNo
);
12408 /************************************************************************
12410 * I/O front end functions to the parser *
12412 ************************************************************************/
12415 * xmlCreatePushParserCtxt:
12416 * @sax: a SAX handler
12417 * @user_data: The user data returned on SAX callbacks
12418 * @chunk: a pointer to an array of chars
12419 * @size: number of chars in the array
12420 * @filename: an optional file name or URI
12422 * Create a parser context for using the XML parser in push mode.
12423 * If @buffer and @size are non-NULL, the data is used to detect
12424 * the encoding. The remaining characters will be parsed so they
12425 * don't need to be fed in again through xmlParseChunk.
12426 * To allow content encoding detection, @size should be >= 4
12427 * The value of @filename is used for fetching external entities
12428 * and error/warning reports.
12430 * Returns the new parser context or NULL
12434 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12435 const char *chunk
, int size
, const char *filename
) {
12436 xmlParserCtxtPtr ctxt
;
12437 xmlParserInputPtr inputStream
;
12438 xmlParserInputBufferPtr buf
;
12439 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
12442 * plug some encoding conversion routines
12444 if ((chunk
!= NULL
) && (size
>= 4))
12445 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
12447 buf
= xmlAllocParserInputBuffer(enc
);
12448 if (buf
== NULL
) return(NULL
);
12450 ctxt
= xmlNewParserCtxt();
12451 if (ctxt
== NULL
) {
12452 xmlErrMemory(NULL
, "creating parser: out of memory\n");
12453 xmlFreeParserInputBuffer(buf
);
12456 ctxt
->dictNames
= 1;
12458 #ifdef LIBXML_SAX1_ENABLED
12459 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
12460 #endif /* LIBXML_SAX1_ENABLED */
12461 xmlFree(ctxt
->sax
);
12462 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
12463 if (ctxt
->sax
== NULL
) {
12464 xmlErrMemory(ctxt
, NULL
);
12465 xmlFreeParserInputBuffer(buf
);
12466 xmlFreeParserCtxt(ctxt
);
12469 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
12470 if (sax
->initialized
== XML_SAX2_MAGIC
)
12471 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
12473 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
12474 if (user_data
!= NULL
)
12475 ctxt
->userData
= user_data
;
12477 if (filename
== NULL
) {
12478 ctxt
->directory
= NULL
;
12480 ctxt
->directory
= xmlParserGetDirectory(filename
);
12483 inputStream
= xmlNewInputStream(ctxt
);
12484 if (inputStream
== NULL
) {
12485 xmlFreeParserCtxt(ctxt
);
12486 xmlFreeParserInputBuffer(buf
);
12490 if (filename
== NULL
)
12491 inputStream
->filename
= NULL
;
12493 inputStream
->filename
= (char *)
12494 xmlCanonicPath((const xmlChar
*) filename
);
12495 if (inputStream
->filename
== NULL
) {
12496 xmlFreeParserCtxt(ctxt
);
12497 xmlFreeParserInputBuffer(buf
);
12501 inputStream
->buf
= buf
;
12502 xmlBufResetInput(inputStream
->buf
->buffer
, inputStream
);
12503 inputPush(ctxt
, inputStream
);
12506 * If the caller didn't provide an initial 'chunk' for determining
12507 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12508 * that it can be automatically determined later
12510 if ((size
== 0) || (chunk
== NULL
)) {
12511 ctxt
->charset
= XML_CHAR_ENCODING_NONE
;
12512 } else if ((ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
12513 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12514 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12516 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12518 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12520 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12524 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12525 xmlSwitchEncoding(ctxt
, enc
);
12530 #endif /* LIBXML_PUSH_ENABLED */
12534 * @ctxt: an XML parser context
12536 * Blocks further parser processing don't override error
12540 xmlHaltParser(xmlParserCtxtPtr ctxt
) {
12543 ctxt
->instate
= XML_PARSER_EOF
;
12544 ctxt
->disableSAX
= 1;
12545 while (ctxt
->inputNr
> 1)
12546 xmlFreeInputStream(inputPop(ctxt
));
12547 if (ctxt
->input
!= NULL
) {
12549 * in case there was a specific allocation deallocate before
12552 if (ctxt
->input
->free
!= NULL
) {
12553 ctxt
->input
->free((xmlChar
*) ctxt
->input
->base
);
12554 ctxt
->input
->free
= NULL
;
12556 if (ctxt
->input
->buf
!= NULL
) {
12557 xmlFreeParserInputBuffer(ctxt
->input
->buf
);
12558 ctxt
->input
->buf
= NULL
;
12560 ctxt
->input
->cur
= BAD_CAST
"";
12561 ctxt
->input
->length
= 0;
12562 ctxt
->input
->base
= ctxt
->input
->cur
;
12563 ctxt
->input
->end
= ctxt
->input
->cur
;
12569 * @ctxt: an XML parser context
12571 * Blocks further parser processing
12574 xmlStopParser(xmlParserCtxtPtr ctxt
) {
12577 xmlHaltParser(ctxt
);
12578 ctxt
->errNo
= XML_ERR_USER_STOP
;
12582 * xmlCreateIOParserCtxt:
12583 * @sax: a SAX handler
12584 * @user_data: The user data returned on SAX callbacks
12585 * @ioread: an I/O read function
12586 * @ioclose: an I/O close function
12587 * @ioctx: an I/O handler
12588 * @enc: the charset encoding if known
12590 * Create a parser context for using the XML parser with an existing
12593 * Returns the new parser context or NULL
12596 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12597 xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
12598 void *ioctx
, xmlCharEncoding enc
) {
12599 xmlParserCtxtPtr ctxt
;
12600 xmlParserInputPtr inputStream
;
12601 xmlParserInputBufferPtr buf
;
12603 if (ioread
== NULL
) return(NULL
);
12605 buf
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
, enc
);
12607 if (ioclose
!= NULL
)
12612 ctxt
= xmlNewParserCtxt();
12613 if (ctxt
== NULL
) {
12614 xmlFreeParserInputBuffer(buf
);
12618 #ifdef LIBXML_SAX1_ENABLED
12619 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
12620 #endif /* LIBXML_SAX1_ENABLED */
12621 xmlFree(ctxt
->sax
);
12622 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
12623 if (ctxt
->sax
== NULL
) {
12624 xmlFreeParserInputBuffer(buf
);
12625 xmlErrMemory(ctxt
, NULL
);
12626 xmlFreeParserCtxt(ctxt
);
12629 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
12630 if (sax
->initialized
== XML_SAX2_MAGIC
)
12631 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
12633 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
12634 if (user_data
!= NULL
)
12635 ctxt
->userData
= user_data
;
12638 inputStream
= xmlNewIOInputStream(ctxt
, buf
, enc
);
12639 if (inputStream
== NULL
) {
12640 xmlFreeParserCtxt(ctxt
);
12643 inputPush(ctxt
, inputStream
);
12648 #ifdef LIBXML_VALID_ENABLED
12649 /************************************************************************
12651 * Front ends when parsing a DTD *
12653 ************************************************************************/
12657 * @sax: the SAX handler block or NULL
12658 * @input: an Input Buffer
12659 * @enc: the charset encoding if known
12661 * Load and parse a DTD
12663 * Returns the resulting xmlDtdPtr or NULL in case of error.
12664 * @input will be freed by the function in any case.
12668 xmlIOParseDTD(xmlSAXHandlerPtr sax
, xmlParserInputBufferPtr input
,
12669 xmlCharEncoding enc
) {
12670 xmlDtdPtr ret
= NULL
;
12671 xmlParserCtxtPtr ctxt
;
12672 xmlParserInputPtr pinput
= NULL
;
12678 ctxt
= xmlNewParserCtxt();
12679 if (ctxt
== NULL
) {
12680 xmlFreeParserInputBuffer(input
);
12684 /* We are loading a DTD */
12685 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12688 * Set-up the SAX context
12691 if (ctxt
->sax
!= NULL
)
12692 xmlFree(ctxt
->sax
);
12694 ctxt
->userData
= ctxt
;
12696 xmlDetectSAX2(ctxt
);
12699 * generate a parser input from the I/O handler
12702 pinput
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
12703 if (pinput
== NULL
) {
12704 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12705 xmlFreeParserInputBuffer(input
);
12706 xmlFreeParserCtxt(ctxt
);
12711 * plug some encoding conversion routines here.
12713 if (xmlPushInput(ctxt
, pinput
) < 0) {
12714 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12715 xmlFreeParserCtxt(ctxt
);
12718 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12719 xmlSwitchEncoding(ctxt
, enc
);
12722 pinput
->filename
= NULL
;
12725 pinput
->base
= ctxt
->input
->cur
;
12726 pinput
->cur
= ctxt
->input
->cur
;
12727 pinput
->free
= NULL
;
12730 * let's parse that entity knowing it's an external subset.
12732 ctxt
->inSubset
= 2;
12733 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12734 if (ctxt
->myDoc
== NULL
) {
12735 xmlErrMemory(ctxt
, "New Doc failed");
12738 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12739 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12740 BAD_CAST
"none", BAD_CAST
"none");
12742 if ((enc
== XML_CHAR_ENCODING_NONE
) &&
12743 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
12745 * Get the 4 first bytes and decode the charset
12746 * if enc != XML_CHAR_ENCODING_NONE
12747 * plug some encoding conversion routines.
12753 enc
= xmlDetectCharEncoding(start
, 4);
12754 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12755 xmlSwitchEncoding(ctxt
, enc
);
12759 xmlParseExternalSubset(ctxt
, BAD_CAST
"none", BAD_CAST
"none");
12761 if (ctxt
->myDoc
!= NULL
) {
12762 if (ctxt
->wellFormed
) {
12763 ret
= ctxt
->myDoc
->extSubset
;
12764 ctxt
->myDoc
->extSubset
= NULL
;
12769 tmp
= ret
->children
;
12770 while (tmp
!= NULL
) {
12778 xmlFreeDoc(ctxt
->myDoc
);
12779 ctxt
->myDoc
= NULL
;
12781 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12782 xmlFreeParserCtxt(ctxt
);
12789 * @sax: the SAX handler block
12790 * @ExternalID: a NAME* containing the External ID of the DTD
12791 * @SystemID: a NAME* containing the URL to the DTD
12793 * Load and parse an external subset.
12795 * Returns the resulting xmlDtdPtr or NULL in case of error.
12799 xmlSAXParseDTD(xmlSAXHandlerPtr sax
, const xmlChar
*ExternalID
,
12800 const xmlChar
*SystemID
) {
12801 xmlDtdPtr ret
= NULL
;
12802 xmlParserCtxtPtr ctxt
;
12803 xmlParserInputPtr input
= NULL
;
12804 xmlCharEncoding enc
;
12805 xmlChar
* systemIdCanonic
;
12807 if ((ExternalID
== NULL
) && (SystemID
== NULL
)) return(NULL
);
12809 ctxt
= xmlNewParserCtxt();
12810 if (ctxt
== NULL
) {
12814 /* We are loading a DTD */
12815 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12818 * Set-up the SAX context
12821 if (ctxt
->sax
!= NULL
)
12822 xmlFree(ctxt
->sax
);
12824 ctxt
->userData
= ctxt
;
12828 * Canonicalise the system ID
12830 systemIdCanonic
= xmlCanonicPath(SystemID
);
12831 if ((SystemID
!= NULL
) && (systemIdCanonic
== NULL
)) {
12832 xmlFreeParserCtxt(ctxt
);
12837 * Ask the Entity resolver to load the damn thing
12840 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->resolveEntity
!= NULL
))
12841 input
= ctxt
->sax
->resolveEntity(ctxt
->userData
, ExternalID
,
12843 if (input
== NULL
) {
12844 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12845 xmlFreeParserCtxt(ctxt
);
12846 if (systemIdCanonic
!= NULL
)
12847 xmlFree(systemIdCanonic
);
12852 * plug some encoding conversion routines here.
12854 if (xmlPushInput(ctxt
, input
) < 0) {
12855 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12856 xmlFreeParserCtxt(ctxt
);
12857 if (systemIdCanonic
!= NULL
)
12858 xmlFree(systemIdCanonic
);
12861 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12862 enc
= xmlDetectCharEncoding(ctxt
->input
->cur
, 4);
12863 xmlSwitchEncoding(ctxt
, enc
);
12866 if (input
->filename
== NULL
)
12867 input
->filename
= (char *) systemIdCanonic
;
12869 xmlFree(systemIdCanonic
);
12872 input
->base
= ctxt
->input
->cur
;
12873 input
->cur
= ctxt
->input
->cur
;
12874 input
->free
= NULL
;
12877 * let's parse that entity knowing it's an external subset.
12879 ctxt
->inSubset
= 2;
12880 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12881 if (ctxt
->myDoc
== NULL
) {
12882 xmlErrMemory(ctxt
, "New Doc failed");
12883 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12884 xmlFreeParserCtxt(ctxt
);
12887 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12888 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12889 ExternalID
, SystemID
);
12890 xmlParseExternalSubset(ctxt
, ExternalID
, SystemID
);
12892 if (ctxt
->myDoc
!= NULL
) {
12893 if (ctxt
->wellFormed
) {
12894 ret
= ctxt
->myDoc
->extSubset
;
12895 ctxt
->myDoc
->extSubset
= NULL
;
12900 tmp
= ret
->children
;
12901 while (tmp
!= NULL
) {
12909 xmlFreeDoc(ctxt
->myDoc
);
12910 ctxt
->myDoc
= NULL
;
12912 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12913 xmlFreeParserCtxt(ctxt
);
12921 * @ExternalID: a NAME* containing the External ID of the DTD
12922 * @SystemID: a NAME* containing the URL to the DTD
12924 * Load and parse an external subset.
12926 * Returns the resulting xmlDtdPtr or NULL in case of error.
12930 xmlParseDTD(const xmlChar
*ExternalID
, const xmlChar
*SystemID
) {
12931 return(xmlSAXParseDTD(NULL
, ExternalID
, SystemID
));
12933 #endif /* LIBXML_VALID_ENABLED */
12935 /************************************************************************
12937 * Front ends when parsing an Entity *
12939 ************************************************************************/
12942 * xmlParseCtxtExternalEntity:
12943 * @ctx: the existing parsing context
12944 * @URL: the URL for the entity to load
12945 * @ID: the System ID for the entity to load
12946 * @lst: the return value for the set of parsed nodes
12948 * Parse an external general entity within an existing parsing context
12949 * An external general parsed entity is well-formed if it matches the
12950 * production labeled extParsedEnt.
12952 * [78] extParsedEnt ::= TextDecl? content
12954 * Returns 0 if the entity is well formed, -1 in case of args problem and
12955 * the parser error code otherwise
12959 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx
, const xmlChar
*URL
,
12960 const xmlChar
*ID
, xmlNodePtr
*lst
) {
12963 if (ctx
== NULL
) return(-1);
12965 * If the user provided their own SAX callbacks, then reuse the
12966 * userData callback field, otherwise the expected setup in a
12967 * DOM builder is to have userData == ctxt
12969 if (ctx
->userData
== ctx
)
12972 userData
= ctx
->userData
;
12973 return xmlParseExternalEntityPrivate(ctx
->myDoc
, ctx
, ctx
->sax
,
12974 userData
, ctx
->depth
+ 1,
12979 * xmlParseExternalEntityPrivate:
12980 * @doc: the document the chunk pertains to
12981 * @oldctxt: the previous parser context if available
12982 * @sax: the SAX handler block (possibly NULL)
12983 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12984 * @depth: Used for loop detection, use 0
12985 * @URL: the URL for the entity to load
12986 * @ID: the System ID for the entity to load
12987 * @list: the return value for the set of parsed nodes
12989 * Private version of xmlParseExternalEntity()
12991 * Returns 0 if the entity is well formed, -1 in case of args problem and
12992 * the parser error code otherwise
12995 static xmlParserErrors
12996 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
12997 xmlSAXHandlerPtr sax
,
12998 void *user_data
, int depth
, const xmlChar
*URL
,
12999 const xmlChar
*ID
, xmlNodePtr
*list
) {
13000 xmlParserCtxtPtr ctxt
;
13002 xmlNodePtr newRoot
;
13003 xmlSAXHandlerPtr oldsax
= NULL
;
13004 xmlParserErrors ret
= XML_ERR_OK
;
13006 xmlCharEncoding enc
;
13008 if (((depth
> 40) &&
13009 ((oldctxt
== NULL
) || (oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
13011 return(XML_ERR_ENTITY_LOOP
);
13016 if ((URL
== NULL
) && (ID
== NULL
))
13017 return(XML_ERR_INTERNAL_ERROR
);
13019 return(XML_ERR_INTERNAL_ERROR
);
13022 ctxt
= xmlCreateEntityParserCtxtInternal(URL
, ID
, NULL
, oldctxt
);
13023 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
13024 ctxt
->userData
= ctxt
;
13026 oldsax
= ctxt
->sax
;
13028 if (user_data
!= NULL
)
13029 ctxt
->userData
= user_data
;
13031 xmlDetectSAX2(ctxt
);
13032 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13033 if (newDoc
== NULL
) {
13034 xmlFreeParserCtxt(ctxt
);
13035 return(XML_ERR_INTERNAL_ERROR
);
13037 newDoc
->properties
= XML_DOC_INTERNAL
;
13039 newDoc
->intSubset
= doc
->intSubset
;
13040 newDoc
->extSubset
= doc
->extSubset
;
13042 newDoc
->dict
= doc
->dict
;
13043 xmlDictReference(newDoc
->dict
);
13045 if (doc
->URL
!= NULL
) {
13046 newDoc
->URL
= xmlStrdup(doc
->URL
);
13049 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13050 if (newRoot
== NULL
) {
13052 ctxt
->sax
= oldsax
;
13053 xmlFreeParserCtxt(ctxt
);
13054 newDoc
->intSubset
= NULL
;
13055 newDoc
->extSubset
= NULL
;
13056 xmlFreeDoc(newDoc
);
13057 return(XML_ERR_INTERNAL_ERROR
);
13059 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13060 nodePush(ctxt
, newDoc
->children
);
13062 ctxt
->myDoc
= newDoc
;
13065 newRoot
->doc
= doc
;
13069 * Get the 4 first bytes and decode the charset
13070 * if enc != XML_CHAR_ENCODING_NONE
13071 * plug some encoding conversion routines.
13074 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
13079 enc
= xmlDetectCharEncoding(start
, 4);
13080 if (enc
!= XML_CHAR_ENCODING_NONE
) {
13081 xmlSwitchEncoding(ctxt
, enc
);
13086 * Parse a possible text declaration first
13088 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13089 xmlParseTextDecl(ctxt
);
13091 * An XML-1.0 document can't reference an entity not XML-1.0
13093 if ((xmlStrEqual(oldctxt
->version
, BAD_CAST
"1.0")) &&
13094 (!xmlStrEqual(ctxt
->input
->version
, BAD_CAST
"1.0"))) {
13095 xmlFatalErrMsg(ctxt
, XML_ERR_VERSION_MISMATCH
,
13096 "Version mismatch between document and entity\n");
13100 ctxt
->instate
= XML_PARSER_CONTENT
;
13101 ctxt
->depth
= depth
;
13102 if (oldctxt
!= NULL
) {
13103 ctxt
->_private
= oldctxt
->_private
;
13104 ctxt
->loadsubset
= oldctxt
->loadsubset
;
13105 ctxt
->validate
= oldctxt
->validate
;
13106 ctxt
->valid
= oldctxt
->valid
;
13107 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
13108 if (oldctxt
->validate
) {
13109 ctxt
->vctxt
.error
= oldctxt
->vctxt
.error
;
13110 ctxt
->vctxt
.warning
= oldctxt
->vctxt
.warning
;
13111 ctxt
->vctxt
.userData
= oldctxt
->vctxt
.userData
;
13113 ctxt
->external
= oldctxt
->external
;
13114 if (ctxt
->dict
) xmlDictFree(ctxt
->dict
);
13115 ctxt
->dict
= oldctxt
->dict
;
13116 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13117 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13118 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13119 ctxt
->dictNames
= oldctxt
->dictNames
;
13120 ctxt
->attsDefault
= oldctxt
->attsDefault
;
13121 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
13122 ctxt
->linenumbers
= oldctxt
->linenumbers
;
13123 ctxt
->record_info
= oldctxt
->record_info
;
13124 ctxt
->node_seq
.maximum
= oldctxt
->node_seq
.maximum
;
13125 ctxt
->node_seq
.length
= oldctxt
->node_seq
.length
;
13126 ctxt
->node_seq
.buffer
= oldctxt
->node_seq
.buffer
;
13129 * Doing validity checking on chunk without context
13130 * doesn't make sense
13132 ctxt
->_private
= NULL
;
13133 ctxt
->validate
= 0;
13134 ctxt
->external
= 2;
13135 ctxt
->loadsubset
= 0;
13138 xmlParseContent(ctxt
);
13140 if ((RAW
== '<') && (NXT(1) == '/')) {
13141 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13142 } else if (RAW
!= 0) {
13143 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13145 if (ctxt
->node
!= newDoc
->children
) {
13146 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13149 if (!ctxt
->wellFormed
) {
13150 if (ctxt
->errNo
== 0)
13151 ret
= XML_ERR_INTERNAL_ERROR
;
13153 ret
= (xmlParserErrors
)ctxt
->errNo
;
13155 if (list
!= NULL
) {
13159 * Return the newly created nodeset after unlinking it from
13160 * they pseudo parent.
13162 cur
= newDoc
->children
->children
;
13164 while (cur
!= NULL
) {
13165 cur
->parent
= NULL
;
13168 newDoc
->children
->children
= NULL
;
13174 * Record in the parent context the number of entities replacement
13175 * done when parsing that reference.
13177 if (oldctxt
!= NULL
)
13178 oldctxt
->nbentities
+= ctxt
->nbentities
;
13181 * Also record the size of the entity parsed
13183 if (ctxt
->input
!= NULL
&& oldctxt
!= NULL
) {
13184 oldctxt
->sizeentities
+= ctxt
->input
->consumed
;
13185 oldctxt
->sizeentities
+= (ctxt
->input
->cur
- ctxt
->input
->base
);
13188 * And record the last error if any
13190 if ((oldctxt
!= NULL
) && (ctxt
->lastError
.code
!= XML_ERR_OK
))
13191 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
13194 ctxt
->sax
= oldsax
;
13195 if (oldctxt
!= NULL
) {
13197 ctxt
->attsDefault
= NULL
;
13198 ctxt
->attsSpecial
= NULL
;
13199 oldctxt
->validate
= ctxt
->validate
;
13200 oldctxt
->valid
= ctxt
->valid
;
13201 oldctxt
->node_seq
.maximum
= ctxt
->node_seq
.maximum
;
13202 oldctxt
->node_seq
.length
= ctxt
->node_seq
.length
;
13203 oldctxt
->node_seq
.buffer
= ctxt
->node_seq
.buffer
;
13205 ctxt
->node_seq
.maximum
= 0;
13206 ctxt
->node_seq
.length
= 0;
13207 ctxt
->node_seq
.buffer
= NULL
;
13208 xmlFreeParserCtxt(ctxt
);
13209 newDoc
->intSubset
= NULL
;
13210 newDoc
->extSubset
= NULL
;
13211 xmlFreeDoc(newDoc
);
13216 #ifdef LIBXML_SAX1_ENABLED
13218 * xmlParseExternalEntity:
13219 * @doc: the document the chunk pertains to
13220 * @sax: the SAX handler block (possibly NULL)
13221 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13222 * @depth: Used for loop detection, use 0
13223 * @URL: the URL for the entity to load
13224 * @ID: the System ID for the entity to load
13225 * @lst: the return value for the set of parsed nodes
13227 * Parse an external general entity
13228 * An external general parsed entity is well-formed if it matches the
13229 * production labeled extParsedEnt.
13231 * [78] extParsedEnt ::= TextDecl? content
13233 * Returns 0 if the entity is well formed, -1 in case of args problem and
13234 * the parser error code otherwise
13238 xmlParseExternalEntity(xmlDocPtr doc
, xmlSAXHandlerPtr sax
, void *user_data
,
13239 int depth
, const xmlChar
*URL
, const xmlChar
*ID
, xmlNodePtr
*lst
) {
13240 return(xmlParseExternalEntityPrivate(doc
, NULL
, sax
, user_data
, depth
, URL
,
13245 * xmlParseBalancedChunkMemory:
13246 * @doc: the document the chunk pertains to (must not be NULL)
13247 * @sax: the SAX handler block (possibly NULL)
13248 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13249 * @depth: Used for loop detection, use 0
13250 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13251 * @lst: the return value for the set of parsed nodes
13253 * Parse a well-balanced chunk of an XML document
13254 * called by the parser
13255 * The allowed sequence for the Well Balanced Chunk is the one defined by
13256 * the content production in the XML grammar:
13258 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13260 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13261 * the parser error code otherwise
13265 xmlParseBalancedChunkMemory(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13266 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
) {
13267 return xmlParseBalancedChunkMemoryRecover( doc
, sax
, user_data
,
13268 depth
, string
, lst
, 0 );
13270 #endif /* LIBXML_SAX1_ENABLED */
13273 * xmlParseBalancedChunkMemoryInternal:
13274 * @oldctxt: the existing parsing context
13275 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13276 * @user_data: the user data field for the parser context
13277 * @lst: the return value for the set of parsed nodes
13280 * Parse a well-balanced chunk of an XML document
13281 * called by the parser
13282 * The allowed sequence for the Well Balanced Chunk is the one defined by
13283 * the content production in the XML grammar:
13285 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13287 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13288 * error code otherwise
13290 * In case recover is set to 1, the nodelist will not be empty even if
13291 * the parsed chunk is not well balanced.
13293 static xmlParserErrors
13294 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
13295 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
) {
13296 xmlParserCtxtPtr ctxt
;
13297 xmlDocPtr newDoc
= NULL
;
13298 xmlNodePtr newRoot
;
13299 xmlSAXHandlerPtr oldsax
= NULL
;
13300 xmlNodePtr content
= NULL
;
13301 xmlNodePtr last
= NULL
;
13303 xmlParserErrors ret
= XML_ERR_OK
;
13308 if (((oldctxt
->depth
> 40) && ((oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
13309 (oldctxt
->depth
> 1024)) {
13310 return(XML_ERR_ENTITY_LOOP
);
13316 if (string
== NULL
)
13317 return(XML_ERR_INTERNAL_ERROR
);
13319 size
= xmlStrlen(string
);
13321 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13322 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
13323 if (user_data
!= NULL
)
13324 ctxt
->userData
= user_data
;
13326 ctxt
->userData
= ctxt
;
13327 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
13328 ctxt
->dict
= oldctxt
->dict
;
13329 ctxt
->input_id
= oldctxt
->input_id
+ 1;
13330 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13331 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13332 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13335 /* propagate namespaces down the entity */
13336 for (i
= 0;i
< oldctxt
->nsNr
;i
+= 2) {
13337 nsPush(ctxt
, oldctxt
->nsTab
[i
], oldctxt
->nsTab
[i
+1]);
13341 oldsax
= ctxt
->sax
;
13342 ctxt
->sax
= oldctxt
->sax
;
13343 xmlDetectSAX2(ctxt
);
13344 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
13345 ctxt
->options
= oldctxt
->options
;
13347 ctxt
->_private
= oldctxt
->_private
;
13348 if (oldctxt
->myDoc
== NULL
) {
13349 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13350 if (newDoc
== NULL
) {
13351 ctxt
->sax
= oldsax
;
13353 xmlFreeParserCtxt(ctxt
);
13354 return(XML_ERR_INTERNAL_ERROR
);
13356 newDoc
->properties
= XML_DOC_INTERNAL
;
13357 newDoc
->dict
= ctxt
->dict
;
13358 xmlDictReference(newDoc
->dict
);
13359 ctxt
->myDoc
= newDoc
;
13361 ctxt
->myDoc
= oldctxt
->myDoc
;
13362 content
= ctxt
->myDoc
->children
;
13363 last
= ctxt
->myDoc
->last
;
13365 newRoot
= xmlNewDocNode(ctxt
->myDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13366 if (newRoot
== NULL
) {
13367 ctxt
->sax
= oldsax
;
13369 xmlFreeParserCtxt(ctxt
);
13370 if (newDoc
!= NULL
) {
13371 xmlFreeDoc(newDoc
);
13373 return(XML_ERR_INTERNAL_ERROR
);
13375 ctxt
->myDoc
->children
= NULL
;
13376 ctxt
->myDoc
->last
= NULL
;
13377 xmlAddChild((xmlNodePtr
) ctxt
->myDoc
, newRoot
);
13378 nodePush(ctxt
, ctxt
->myDoc
->children
);
13379 ctxt
->instate
= XML_PARSER_CONTENT
;
13380 ctxt
->depth
= oldctxt
->depth
+ 1;
13382 ctxt
->validate
= 0;
13383 ctxt
->loadsubset
= oldctxt
->loadsubset
;
13384 if ((oldctxt
->validate
) || (oldctxt
->replaceEntities
!= 0)) {
13386 * ID/IDREF registration will be done in xmlValidateElement below
13388 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13390 ctxt
->dictNames
= oldctxt
->dictNames
;
13391 ctxt
->attsDefault
= oldctxt
->attsDefault
;
13392 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
13394 xmlParseContent(ctxt
);
13395 if ((RAW
== '<') && (NXT(1) == '/')) {
13396 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13397 } else if (RAW
!= 0) {
13398 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13400 if (ctxt
->node
!= ctxt
->myDoc
->children
) {
13401 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13404 if (!ctxt
->wellFormed
) {
13405 if (ctxt
->errNo
== 0)
13406 ret
= XML_ERR_INTERNAL_ERROR
;
13408 ret
= (xmlParserErrors
)ctxt
->errNo
;
13413 if ((lst
!= NULL
) && (ret
== XML_ERR_OK
)) {
13417 * Return the newly created nodeset after unlinking it from
13418 * they pseudo parent.
13420 cur
= ctxt
->myDoc
->children
->children
;
13422 while (cur
!= NULL
) {
13423 #ifdef LIBXML_VALID_ENABLED
13424 if ((oldctxt
->validate
) && (oldctxt
->wellFormed
) &&
13425 (oldctxt
->myDoc
) && (oldctxt
->myDoc
->intSubset
) &&
13426 (cur
->type
== XML_ELEMENT_NODE
)) {
13427 oldctxt
->valid
&= xmlValidateElement(&oldctxt
->vctxt
,
13428 oldctxt
->myDoc
, cur
);
13430 #endif /* LIBXML_VALID_ENABLED */
13431 cur
->parent
= NULL
;
13434 ctxt
->myDoc
->children
->children
= NULL
;
13436 if (ctxt
->myDoc
!= NULL
) {
13437 xmlFreeNode(ctxt
->myDoc
->children
);
13438 ctxt
->myDoc
->children
= content
;
13439 ctxt
->myDoc
->last
= last
;
13443 * Record in the parent context the number of entities replacement
13444 * done when parsing that reference.
13446 if (oldctxt
!= NULL
)
13447 oldctxt
->nbentities
+= ctxt
->nbentities
;
13450 * Also record the last error if any
13452 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
13453 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
13455 ctxt
->sax
= oldsax
;
13457 ctxt
->attsDefault
= NULL
;
13458 ctxt
->attsSpecial
= NULL
;
13459 xmlFreeParserCtxt(ctxt
);
13460 if (newDoc
!= NULL
) {
13461 xmlFreeDoc(newDoc
);
13468 * xmlParseInNodeContext:
13469 * @node: the context node
13470 * @data: the input string
13471 * @datalen: the input string length in bytes
13472 * @options: a combination of xmlParserOption
13473 * @lst: the return value for the set of parsed nodes
13475 * Parse a well-balanced chunk of an XML document
13476 * within the context (DTD, namespaces, etc ...) of the given node.
13478 * The allowed sequence for the data is a Well Balanced Chunk defined by
13479 * the content production in the XML grammar:
13481 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13483 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13484 * error code otherwise
13487 xmlParseInNodeContext(xmlNodePtr node
, const char *data
, int datalen
,
13488 int options
, xmlNodePtr
*lst
) {
13490 xmlParserCtxtPtr ctxt
;
13491 xmlDocPtr doc
= NULL
;
13492 xmlNodePtr fake
, cur
;
13495 xmlParserErrors ret
= XML_ERR_OK
;
13498 * check all input parameters, grab the document
13500 if ((lst
== NULL
) || (node
== NULL
) || (data
== NULL
) || (datalen
< 0))
13501 return(XML_ERR_INTERNAL_ERROR
);
13502 switch (node
->type
) {
13503 case XML_ELEMENT_NODE
:
13504 case XML_ATTRIBUTE_NODE
:
13505 case XML_TEXT_NODE
:
13506 case XML_CDATA_SECTION_NODE
:
13507 case XML_ENTITY_REF_NODE
:
13509 case XML_COMMENT_NODE
:
13510 case XML_DOCUMENT_NODE
:
13511 case XML_HTML_DOCUMENT_NODE
:
13514 return(XML_ERR_INTERNAL_ERROR
);
13517 while ((node
!= NULL
) && (node
->type
!= XML_ELEMENT_NODE
) &&
13518 (node
->type
!= XML_DOCUMENT_NODE
) &&
13519 (node
->type
!= XML_HTML_DOCUMENT_NODE
))
13520 node
= node
->parent
;
13522 return(XML_ERR_INTERNAL_ERROR
);
13523 if (node
->type
== XML_ELEMENT_NODE
)
13526 doc
= (xmlDocPtr
) node
;
13528 return(XML_ERR_INTERNAL_ERROR
);
13531 * allocate a context and set-up everything not related to the
13532 * node position in the tree
13534 if (doc
->type
== XML_DOCUMENT_NODE
)
13535 ctxt
= xmlCreateMemoryParserCtxt((char *) data
, datalen
);
13536 #ifdef LIBXML_HTML_ENABLED
13537 else if (doc
->type
== XML_HTML_DOCUMENT_NODE
) {
13538 ctxt
= htmlCreateMemoryParserCtxt((char *) data
, datalen
);
13540 * When parsing in context, it makes no sense to add implied
13541 * elements like html/body/etc...
13543 options
|= HTML_PARSE_NOIMPLIED
;
13547 return(XML_ERR_INTERNAL_ERROR
);
13550 return(XML_ERR_NO_MEMORY
);
13553 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13554 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13555 * we must wait until the last moment to free the original one.
13557 if (doc
->dict
!= NULL
) {
13558 if (ctxt
->dict
!= NULL
)
13559 xmlDictFree(ctxt
->dict
);
13560 ctxt
->dict
= doc
->dict
;
13562 options
|= XML_PARSE_NODICT
;
13564 if (doc
->encoding
!= NULL
) {
13565 xmlCharEncodingHandlerPtr hdlr
;
13567 if (ctxt
->encoding
!= NULL
)
13568 xmlFree((xmlChar
*) ctxt
->encoding
);
13569 ctxt
->encoding
= xmlStrdup((const xmlChar
*) doc
->encoding
);
13571 hdlr
= xmlFindCharEncodingHandler((const char *) doc
->encoding
);
13572 if (hdlr
!= NULL
) {
13573 xmlSwitchToEncoding(ctxt
, hdlr
);
13575 return(XML_ERR_UNSUPPORTED_ENCODING
);
13579 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13580 xmlDetectSAX2(ctxt
);
13582 /* parsing in context, i.e. as within existing content */
13583 ctxt
->input_id
= 2;
13584 ctxt
->instate
= XML_PARSER_CONTENT
;
13586 fake
= xmlNewDocComment(node
->doc
, NULL
);
13587 if (fake
== NULL
) {
13588 xmlFreeParserCtxt(ctxt
);
13589 return(XML_ERR_NO_MEMORY
);
13591 xmlAddChild(node
, fake
);
13593 if (node
->type
== XML_ELEMENT_NODE
) {
13594 nodePush(ctxt
, node
);
13596 * initialize the SAX2 namespaces stack
13599 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_NODE
)) {
13600 xmlNsPtr ns
= cur
->nsDef
;
13601 const xmlChar
*iprefix
, *ihref
;
13603 while (ns
!= NULL
) {
13605 iprefix
= xmlDictLookup(ctxt
->dict
, ns
->prefix
, -1);
13606 ihref
= xmlDictLookup(ctxt
->dict
, ns
->href
, -1);
13608 iprefix
= ns
->prefix
;
13612 if (xmlGetNamespace(ctxt
, iprefix
) == NULL
) {
13613 nsPush(ctxt
, iprefix
, ihref
);
13622 if ((ctxt
->validate
) || (ctxt
->replaceEntities
!= 0)) {
13624 * ID/IDREF registration will be done in xmlValidateElement below
13626 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13629 #ifdef LIBXML_HTML_ENABLED
13630 if (doc
->type
== XML_HTML_DOCUMENT_NODE
)
13631 __htmlParseContent(ctxt
);
13634 xmlParseContent(ctxt
);
13637 if ((RAW
== '<') && (NXT(1) == '/')) {
13638 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13639 } else if (RAW
!= 0) {
13640 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13642 if ((ctxt
->node
!= NULL
) && (ctxt
->node
!= node
)) {
13643 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13644 ctxt
->wellFormed
= 0;
13647 if (!ctxt
->wellFormed
) {
13648 if (ctxt
->errNo
== 0)
13649 ret
= XML_ERR_INTERNAL_ERROR
;
13651 ret
= (xmlParserErrors
)ctxt
->errNo
;
13657 * Return the newly created nodeset after unlinking it from
13658 * the pseudo sibling.
13671 while (cur
!= NULL
) {
13672 cur
->parent
= NULL
;
13676 xmlUnlinkNode(fake
);
13680 if (ret
!= XML_ERR_OK
) {
13681 xmlFreeNodeList(*lst
);
13685 if (doc
->dict
!= NULL
)
13687 xmlFreeParserCtxt(ctxt
);
13691 return(XML_ERR_INTERNAL_ERROR
);
13695 #ifdef LIBXML_SAX1_ENABLED
13697 * xmlParseBalancedChunkMemoryRecover:
13698 * @doc: the document the chunk pertains to (must not be NULL)
13699 * @sax: the SAX handler block (possibly NULL)
13700 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13701 * @depth: Used for loop detection, use 0
13702 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13703 * @lst: the return value for the set of parsed nodes
13704 * @recover: return nodes even if the data is broken (use 0)
13707 * Parse a well-balanced chunk of an XML document
13708 * called by the parser
13709 * The allowed sequence for the Well Balanced Chunk is the one defined by
13710 * the content production in the XML grammar:
13712 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13714 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13715 * the parser error code otherwise
13717 * In case recover is set to 1, the nodelist will not be empty even if
13718 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13722 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13723 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
,
13725 xmlParserCtxtPtr ctxt
;
13727 xmlSAXHandlerPtr oldsax
= NULL
;
13728 xmlNodePtr content
, newRoot
;
13733 return(XML_ERR_ENTITY_LOOP
);
13739 if (string
== NULL
)
13742 size
= xmlStrlen(string
);
13744 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13745 if (ctxt
== NULL
) return(-1);
13746 ctxt
->userData
= ctxt
;
13748 oldsax
= ctxt
->sax
;
13750 if (user_data
!= NULL
)
13751 ctxt
->userData
= user_data
;
13753 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13754 if (newDoc
== NULL
) {
13755 xmlFreeParserCtxt(ctxt
);
13758 newDoc
->properties
= XML_DOC_INTERNAL
;
13759 if ((doc
!= NULL
) && (doc
->dict
!= NULL
)) {
13760 xmlDictFree(ctxt
->dict
);
13761 ctxt
->dict
= doc
->dict
;
13762 xmlDictReference(ctxt
->dict
);
13763 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13764 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13765 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13766 ctxt
->dictNames
= 1;
13768 xmlCtxtUseOptionsInternal(ctxt
, XML_PARSE_NODICT
, NULL
);
13770 /* doc == NULL is only supported for historic reasons */
13772 newDoc
->intSubset
= doc
->intSubset
;
13773 newDoc
->extSubset
= doc
->extSubset
;
13775 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13776 if (newRoot
== NULL
) {
13778 ctxt
->sax
= oldsax
;
13779 xmlFreeParserCtxt(ctxt
);
13780 newDoc
->intSubset
= NULL
;
13781 newDoc
->extSubset
= NULL
;
13782 xmlFreeDoc(newDoc
);
13785 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13786 nodePush(ctxt
, newRoot
);
13787 /* doc == NULL is only supported for historic reasons */
13789 ctxt
->myDoc
= newDoc
;
13791 ctxt
->myDoc
= newDoc
;
13792 newDoc
->children
->doc
= doc
;
13793 /* Ensure that doc has XML spec namespace */
13794 xmlSearchNsByHref(doc
, (xmlNodePtr
)doc
, XML_XML_NAMESPACE
);
13795 newDoc
->oldNs
= doc
->oldNs
;
13797 ctxt
->instate
= XML_PARSER_CONTENT
;
13798 ctxt
->input_id
= 2;
13799 ctxt
->depth
= depth
;
13802 * Doing validity checking on chunk doesn't make sense
13804 ctxt
->validate
= 0;
13805 ctxt
->loadsubset
= 0;
13806 xmlDetectSAX2(ctxt
);
13808 if ( doc
!= NULL
){
13809 content
= doc
->children
;
13810 doc
->children
= NULL
;
13811 xmlParseContent(ctxt
);
13812 doc
->children
= content
;
13815 xmlParseContent(ctxt
);
13817 if ((RAW
== '<') && (NXT(1) == '/')) {
13818 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13819 } else if (RAW
!= 0) {
13820 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13822 if (ctxt
->node
!= newDoc
->children
) {
13823 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13826 if (!ctxt
->wellFormed
) {
13827 if (ctxt
->errNo
== 0)
13835 if ((lst
!= NULL
) && ((ret
== 0) || (recover
== 1))) {
13839 * Return the newly created nodeset after unlinking it from
13840 * they pseudo parent.
13842 cur
= newDoc
->children
->children
;
13844 while (cur
!= NULL
) {
13845 xmlSetTreeDoc(cur
, doc
);
13846 cur
->parent
= NULL
;
13849 newDoc
->children
->children
= NULL
;
13853 ctxt
->sax
= oldsax
;
13854 xmlFreeParserCtxt(ctxt
);
13855 newDoc
->intSubset
= NULL
;
13856 newDoc
->extSubset
= NULL
;
13857 /* This leaks the namespace list if doc == NULL */
13858 newDoc
->oldNs
= NULL
;
13859 xmlFreeDoc(newDoc
);
13865 * xmlSAXParseEntity:
13866 * @sax: the SAX handler block
13867 * @filename: the filename
13869 * parse an XML external entity out of context and build a tree.
13870 * It use the given SAX function block to handle the parsing callback.
13871 * If sax is NULL, fallback to the default DOM tree building routines.
13873 * [78] extParsedEnt ::= TextDecl? content
13875 * This correspond to a "Well Balanced" chunk
13877 * Returns the resulting document tree
13881 xmlSAXParseEntity(xmlSAXHandlerPtr sax
, const char *filename
) {
13883 xmlParserCtxtPtr ctxt
;
13885 ctxt
= xmlCreateFileParserCtxt(filename
);
13886 if (ctxt
== NULL
) {
13890 if (ctxt
->sax
!= NULL
)
13891 xmlFree(ctxt
->sax
);
13893 ctxt
->userData
= NULL
;
13896 xmlParseExtParsedEnt(ctxt
);
13898 if (ctxt
->wellFormed
)
13902 xmlFreeDoc(ctxt
->myDoc
);
13903 ctxt
->myDoc
= NULL
;
13907 xmlFreeParserCtxt(ctxt
);
13914 * @filename: the filename
13916 * parse an XML external entity out of context and build a tree.
13918 * [78] extParsedEnt ::= TextDecl? content
13920 * This correspond to a "Well Balanced" chunk
13922 * Returns the resulting document tree
13926 xmlParseEntity(const char *filename
) {
13927 return(xmlSAXParseEntity(NULL
, filename
));
13929 #endif /* LIBXML_SAX1_ENABLED */
13932 * xmlCreateEntityParserCtxtInternal:
13933 * @URL: the entity URL
13934 * @ID: the entity PUBLIC ID
13935 * @base: a possible base for the target URI
13936 * @pctx: parser context used to set options on new context
13938 * Create a parser context for an external entity
13939 * Automatic support for ZLIB/Compress compressed document is provided
13940 * by default if found at compile-time.
13942 * Returns the new parser context or NULL
13944 static xmlParserCtxtPtr
13945 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
13946 const xmlChar
*base
, xmlParserCtxtPtr pctx
) {
13947 xmlParserCtxtPtr ctxt
;
13948 xmlParserInputPtr inputStream
;
13949 char *directory
= NULL
;
13952 ctxt
= xmlNewParserCtxt();
13953 if (ctxt
== NULL
) {
13957 if (pctx
!= NULL
) {
13958 ctxt
->options
= pctx
->options
;
13959 ctxt
->_private
= pctx
->_private
;
13961 * this is a subparser of pctx, so the input_id should be
13962 * incremented to distinguish from main entity
13964 ctxt
->input_id
= pctx
->input_id
+ 1;
13967 /* Don't read from stdin. */
13968 if (xmlStrcmp(URL
, BAD_CAST
"-") == 0)
13969 URL
= BAD_CAST
"./-";
13971 uri
= xmlBuildURI(URL
, base
);
13974 inputStream
= xmlLoadExternalEntity((char *)URL
, (char *)ID
, ctxt
);
13975 if (inputStream
== NULL
) {
13976 xmlFreeParserCtxt(ctxt
);
13980 inputPush(ctxt
, inputStream
);
13982 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13983 directory
= xmlParserGetDirectory((char *)URL
);
13984 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13985 ctxt
->directory
= directory
;
13987 inputStream
= xmlLoadExternalEntity((char *)uri
, (char *)ID
, ctxt
);
13988 if (inputStream
== NULL
) {
13990 xmlFreeParserCtxt(ctxt
);
13994 inputPush(ctxt
, inputStream
);
13996 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13997 directory
= xmlParserGetDirectory((char *)uri
);
13998 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13999 ctxt
->directory
= directory
;
14006 * xmlCreateEntityParserCtxt:
14007 * @URL: the entity URL
14008 * @ID: the entity PUBLIC ID
14009 * @base: a possible base for the target URI
14011 * Create a parser context for an external entity
14012 * Automatic support for ZLIB/Compress compressed document is provided
14013 * by default if found at compile-time.
14015 * Returns the new parser context or NULL
14018 xmlCreateEntityParserCtxt(const xmlChar
*URL
, const xmlChar
*ID
,
14019 const xmlChar
*base
) {
14020 return xmlCreateEntityParserCtxtInternal(URL
, ID
, base
, NULL
);
14024 /************************************************************************
14026 * Front ends when parsing from a file *
14028 ************************************************************************/
14031 * xmlCreateURLParserCtxt:
14032 * @filename: the filename or URL
14033 * @options: a combination of xmlParserOption
14035 * Create a parser context for a file or URL content.
14036 * Automatic support for ZLIB/Compress compressed document is provided
14037 * by default if found at compile-time and for file accesses
14039 * Returns the new parser context or NULL
14042 xmlCreateURLParserCtxt(const char *filename
, int options
)
14044 xmlParserCtxtPtr ctxt
;
14045 xmlParserInputPtr inputStream
;
14046 char *directory
= NULL
;
14048 ctxt
= xmlNewParserCtxt();
14049 if (ctxt
== NULL
) {
14050 xmlErrMemory(NULL
, "cannot allocate parser context");
14055 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
14056 ctxt
->linenumbers
= 1;
14058 inputStream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
14059 if (inputStream
== NULL
) {
14060 xmlFreeParserCtxt(ctxt
);
14064 inputPush(ctxt
, inputStream
);
14065 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
14066 directory
= xmlParserGetDirectory(filename
);
14067 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
14068 ctxt
->directory
= directory
;
14074 * xmlCreateFileParserCtxt:
14075 * @filename: the filename
14077 * Create a parser context for a file content.
14078 * Automatic support for ZLIB/Compress compressed document is provided
14079 * by default if found at compile-time.
14081 * Returns the new parser context or NULL
14084 xmlCreateFileParserCtxt(const char *filename
)
14086 return(xmlCreateURLParserCtxt(filename
, 0));
14089 #ifdef LIBXML_SAX1_ENABLED
14091 * xmlSAXParseFileWithData:
14092 * @sax: the SAX handler block
14093 * @filename: the filename
14094 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14096 * @data: the userdata
14098 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14099 * compressed document is provided by default if found at compile-time.
14100 * It use the given SAX function block to handle the parsing callback.
14101 * If sax is NULL, fallback to the default DOM tree building routines.
14103 * User data (void *) is stored within the parser context in the
14104 * context's _private member, so it is available nearly everywhere in libxml
14106 * Returns the resulting document tree
14110 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax
, const char *filename
,
14111 int recovery
, void *data
) {
14113 xmlParserCtxtPtr ctxt
;
14117 ctxt
= xmlCreateFileParserCtxt(filename
);
14118 if (ctxt
== NULL
) {
14122 if (ctxt
->sax
!= NULL
)
14123 xmlFree(ctxt
->sax
);
14126 xmlDetectSAX2(ctxt
);
14128 ctxt
->_private
= data
;
14131 if (ctxt
->directory
== NULL
)
14132 ctxt
->directory
= xmlParserGetDirectory(filename
);
14134 ctxt
->recovery
= recovery
;
14136 xmlParseDocument(ctxt
);
14138 if ((ctxt
->wellFormed
) || recovery
) {
14140 if ((ret
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
14141 if (ctxt
->input
->buf
->compressed
> 0)
14142 ret
->compression
= 9;
14144 ret
->compression
= ctxt
->input
->buf
->compressed
;
14149 xmlFreeDoc(ctxt
->myDoc
);
14150 ctxt
->myDoc
= NULL
;
14154 xmlFreeParserCtxt(ctxt
);
14161 * @sax: the SAX handler block
14162 * @filename: the filename
14163 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14166 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14167 * compressed document is provided by default if found at compile-time.
14168 * It use the given SAX function block to handle the parsing callback.
14169 * If sax is NULL, fallback to the default DOM tree building routines.
14171 * Returns the resulting document tree
14175 xmlSAXParseFile(xmlSAXHandlerPtr sax
, const char *filename
,
14177 return(xmlSAXParseFileWithData(sax
,filename
,recovery
,NULL
));
14182 * @cur: a pointer to an array of xmlChar
14184 * parse an XML in-memory document and build a tree.
14185 * In the case the document is not Well Formed, a attempt to build a
14186 * tree is tried anyway
14188 * Returns the resulting document tree or NULL in case of failure
14192 xmlRecoverDoc(const xmlChar
*cur
) {
14193 return(xmlSAXParseDoc(NULL
, cur
, 1));
14198 * @filename: the filename
14200 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14201 * compressed document is provided by default if found at compile-time.
14203 * Returns the resulting document tree if the file was wellformed,
14208 xmlParseFile(const char *filename
) {
14209 return(xmlSAXParseFile(NULL
, filename
, 0));
14214 * @filename: the filename
14216 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14217 * compressed document is provided by default if found at compile-time.
14218 * In the case the document is not Well Formed, it attempts to build
14221 * Returns the resulting document tree or NULL in case of failure
14225 xmlRecoverFile(const char *filename
) {
14226 return(xmlSAXParseFile(NULL
, filename
, 1));
14231 * xmlSetupParserForBuffer:
14232 * @ctxt: an XML parser context
14233 * @buffer: a xmlChar * buffer
14234 * @filename: a file name
14236 * Setup the parser context to parse a new buffer; Clears any prior
14237 * contents from the parser context. The buffer parameter must not be
14238 * NULL, but the filename parameter can be
14241 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt
, const xmlChar
* buffer
,
14242 const char* filename
)
14244 xmlParserInputPtr input
;
14246 if ((ctxt
== NULL
) || (buffer
== NULL
))
14249 input
= xmlNewInputStream(ctxt
);
14250 if (input
== NULL
) {
14251 xmlErrMemory(NULL
, "parsing new buffer: out of memory\n");
14252 xmlClearParserCtxt(ctxt
);
14256 xmlClearParserCtxt(ctxt
);
14257 if (filename
!= NULL
)
14258 input
->filename
= (char *) xmlCanonicPath((const xmlChar
*)filename
);
14259 input
->base
= buffer
;
14260 input
->cur
= buffer
;
14261 input
->end
= &buffer
[xmlStrlen(buffer
)];
14262 inputPush(ctxt
, input
);
14266 * xmlSAXUserParseFile:
14267 * @sax: a SAX handler
14268 * @user_data: The user data returned on SAX callbacks
14269 * @filename: a file name
14271 * parse an XML file and call the given SAX handler routines.
14272 * Automatic support for ZLIB/Compress compressed document is provided
14274 * Returns 0 in case of success or a error number otherwise
14277 xmlSAXUserParseFile(xmlSAXHandlerPtr sax
, void *user_data
,
14278 const char *filename
) {
14280 xmlParserCtxtPtr ctxt
;
14282 ctxt
= xmlCreateFileParserCtxt(filename
);
14283 if (ctxt
== NULL
) return -1;
14284 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
14285 xmlFree(ctxt
->sax
);
14287 xmlDetectSAX2(ctxt
);
14289 if (user_data
!= NULL
)
14290 ctxt
->userData
= user_data
;
14292 xmlParseDocument(ctxt
);
14294 if (ctxt
->wellFormed
)
14297 if (ctxt
->errNo
!= 0)
14304 if (ctxt
->myDoc
!= NULL
) {
14305 xmlFreeDoc(ctxt
->myDoc
);
14306 ctxt
->myDoc
= NULL
;
14308 xmlFreeParserCtxt(ctxt
);
14312 #endif /* LIBXML_SAX1_ENABLED */
14314 /************************************************************************
14316 * Front ends when parsing from memory *
14318 ************************************************************************/
14321 * xmlCreateMemoryParserCtxt:
14322 * @buffer: a pointer to a char array
14323 * @size: the size of the array
14325 * Create a parser context for an XML in-memory document.
14327 * Returns the new parser context or NULL
14330 xmlCreateMemoryParserCtxt(const char *buffer
, int size
) {
14331 xmlParserCtxtPtr ctxt
;
14332 xmlParserInputPtr input
;
14333 xmlParserInputBufferPtr buf
;
14335 if (buffer
== NULL
)
14340 ctxt
= xmlNewParserCtxt();
14344 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14345 buf
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
14347 xmlFreeParserCtxt(ctxt
);
14351 input
= xmlNewInputStream(ctxt
);
14352 if (input
== NULL
) {
14353 xmlFreeParserInputBuffer(buf
);
14354 xmlFreeParserCtxt(ctxt
);
14358 input
->filename
= NULL
;
14360 xmlBufResetInput(input
->buf
->buffer
, input
);
14362 inputPush(ctxt
, input
);
14366 #ifdef LIBXML_SAX1_ENABLED
14368 * xmlSAXParseMemoryWithData:
14369 * @sax: the SAX handler block
14370 * @buffer: an pointer to a char array
14371 * @size: the size of the array
14372 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14374 * @data: the userdata
14376 * parse an XML in-memory block and use the given SAX function block
14377 * to handle the parsing callback. If sax is NULL, fallback to the default
14378 * DOM tree building routines.
14380 * User data (void *) is stored within the parser context in the
14381 * context's _private member, so it is available nearly everywhere in libxml
14383 * Returns the resulting document tree
14387 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax
, const char *buffer
,
14388 int size
, int recovery
, void *data
) {
14390 xmlParserCtxtPtr ctxt
;
14394 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14395 if (ctxt
== NULL
) return(NULL
);
14397 if (ctxt
->sax
!= NULL
)
14398 xmlFree(ctxt
->sax
);
14401 xmlDetectSAX2(ctxt
);
14403 ctxt
->_private
=data
;
14406 ctxt
->recovery
= recovery
;
14408 xmlParseDocument(ctxt
);
14410 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14413 xmlFreeDoc(ctxt
->myDoc
);
14414 ctxt
->myDoc
= NULL
;
14418 xmlFreeParserCtxt(ctxt
);
14424 * xmlSAXParseMemory:
14425 * @sax: the SAX handler block
14426 * @buffer: an pointer to a char array
14427 * @size: the size of the array
14428 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14431 * parse an XML in-memory block and use the given SAX function block
14432 * to handle the parsing callback. If sax is NULL, fallback to the default
14433 * DOM tree building routines.
14435 * Returns the resulting document tree
14438 xmlSAXParseMemory(xmlSAXHandlerPtr sax
, const char *buffer
,
14439 int size
, int recovery
) {
14440 return xmlSAXParseMemoryWithData(sax
, buffer
, size
, recovery
, NULL
);
14445 * @buffer: an pointer to a char array
14446 * @size: the size of the array
14448 * parse an XML in-memory block and build a tree.
14450 * Returns the resulting document tree
14453 xmlDocPtr
xmlParseMemory(const char *buffer
, int size
) {
14454 return(xmlSAXParseMemory(NULL
, buffer
, size
, 0));
14458 * xmlRecoverMemory:
14459 * @buffer: an pointer to a char array
14460 * @size: the size of the array
14462 * parse an XML in-memory block and build a tree.
14463 * In the case the document is not Well Formed, an attempt to
14464 * build a tree is tried anyway
14466 * Returns the resulting document tree or NULL in case of error
14469 xmlDocPtr
xmlRecoverMemory(const char *buffer
, int size
) {
14470 return(xmlSAXParseMemory(NULL
, buffer
, size
, 1));
14474 * xmlSAXUserParseMemory:
14475 * @sax: a SAX handler
14476 * @user_data: The user data returned on SAX callbacks
14477 * @buffer: an in-memory XML document input
14478 * @size: the length of the XML document in bytes
14480 * A better SAX parsing routine.
14481 * parse an XML in-memory buffer and call the given SAX handler routines.
14483 * Returns 0 in case of success or a error number otherwise
14485 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax
, void *user_data
,
14486 const char *buffer
, int size
) {
14488 xmlParserCtxtPtr ctxt
;
14492 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14493 if (ctxt
== NULL
) return -1;
14494 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
14495 xmlFree(ctxt
->sax
);
14497 xmlDetectSAX2(ctxt
);
14499 if (user_data
!= NULL
)
14500 ctxt
->userData
= user_data
;
14502 xmlParseDocument(ctxt
);
14504 if (ctxt
->wellFormed
)
14507 if (ctxt
->errNo
!= 0)
14514 if (ctxt
->myDoc
!= NULL
) {
14515 xmlFreeDoc(ctxt
->myDoc
);
14516 ctxt
->myDoc
= NULL
;
14518 xmlFreeParserCtxt(ctxt
);
14522 #endif /* LIBXML_SAX1_ENABLED */
14525 * xmlCreateDocParserCtxt:
14526 * @cur: a pointer to an array of xmlChar
14528 * Creates a parser context for an XML in-memory document.
14530 * Returns the new parser context or NULL
14533 xmlCreateDocParserCtxt(const xmlChar
*cur
) {
14538 len
= xmlStrlen(cur
);
14539 return(xmlCreateMemoryParserCtxt((const char *)cur
, len
));
14542 #ifdef LIBXML_SAX1_ENABLED
14545 * @sax: the SAX handler block
14546 * @cur: a pointer to an array of xmlChar
14547 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14550 * parse an XML in-memory document and build a tree.
14551 * It use the given SAX function block to handle the parsing callback.
14552 * If sax is NULL, fallback to the default DOM tree building routines.
14554 * Returns the resulting document tree
14558 xmlSAXParseDoc(xmlSAXHandlerPtr sax
, const xmlChar
*cur
, int recovery
) {
14560 xmlParserCtxtPtr ctxt
;
14561 xmlSAXHandlerPtr oldsax
= NULL
;
14563 if (cur
== NULL
) return(NULL
);
14566 ctxt
= xmlCreateDocParserCtxt(cur
);
14567 if (ctxt
== NULL
) return(NULL
);
14569 oldsax
= ctxt
->sax
;
14571 ctxt
->userData
= NULL
;
14573 xmlDetectSAX2(ctxt
);
14575 xmlParseDocument(ctxt
);
14576 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14579 xmlFreeDoc(ctxt
->myDoc
);
14580 ctxt
->myDoc
= NULL
;
14583 ctxt
->sax
= oldsax
;
14584 xmlFreeParserCtxt(ctxt
);
14591 * @cur: a pointer to an array of xmlChar
14593 * parse an XML in-memory document and build a tree.
14595 * Returns the resulting document tree
14599 xmlParseDoc(const xmlChar
*cur
) {
14600 return(xmlSAXParseDoc(NULL
, cur
, 0));
14602 #endif /* LIBXML_SAX1_ENABLED */
14604 #ifdef LIBXML_LEGACY_ENABLED
14605 /************************************************************************
14607 * Specific function to keep track of entities references *
14608 * and used by the XSLT debugger *
14610 ************************************************************************/
14612 static xmlEntityReferenceFunc xmlEntityRefFunc
= NULL
;
14615 * xmlAddEntityReference:
14616 * @ent : A valid entity
14617 * @firstNode : A valid first node for children of entity
14618 * @lastNode : A valid last node of children entity
14620 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14623 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
14624 xmlNodePtr lastNode
)
14626 if (xmlEntityRefFunc
!= NULL
) {
14627 (*xmlEntityRefFunc
) (ent
, firstNode
, lastNode
);
14633 * xmlSetEntityReferenceFunc:
14634 * @func: A valid function
14636 * Set the function to call call back when a xml reference has been made
14639 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func
)
14641 xmlEntityRefFunc
= func
;
14643 #endif /* LIBXML_LEGACY_ENABLED */
14645 /************************************************************************
14649 ************************************************************************/
14651 #ifdef LIBXML_XPATH_ENABLED
14652 #include <libxml/xpath.h>
14655 extern void XMLCDECL
xmlGenericErrorDefaultFunc(void *ctx
, const char *msg
, ...);
14656 static int xmlParserInitialized
= 0;
14661 * Initialization function for the XML parser.
14662 * This is not reentrant. Call once before processing in case of
14663 * use in multithreaded programs.
14667 xmlInitParser(void) {
14668 if (xmlParserInitialized
!= 0)
14671 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14672 if (xmlFree
== free
)
14673 atexit(xmlCleanupParser
);
14676 #ifdef LIBXML_THREAD_ENABLED
14677 __xmlGlobalInitMutexLock();
14678 if (xmlParserInitialized
== 0) {
14682 if ((xmlGenericError
== xmlGenericErrorDefaultFunc
) ||
14683 (xmlGenericError
== NULL
))
14684 initGenericErrorDefaultFunc(NULL
);
14686 xmlInitializeDict();
14687 xmlInitCharEncodingHandlers();
14688 xmlDefaultSAXHandlerInit();
14689 xmlRegisterDefaultInputCallbacks();
14690 #ifdef LIBXML_OUTPUT_ENABLED
14691 xmlRegisterDefaultOutputCallbacks();
14692 #endif /* LIBXML_OUTPUT_ENABLED */
14693 #ifdef LIBXML_HTML_ENABLED
14694 htmlInitAutoClose();
14695 htmlDefaultSAXHandlerInit();
14697 #ifdef LIBXML_XPATH_ENABLED
14700 xmlParserInitialized
= 1;
14701 #ifdef LIBXML_THREAD_ENABLED
14703 __xmlGlobalInitMutexUnlock();
14708 * xmlCleanupParser:
14710 * This function name is somewhat misleading. It does not clean up
14711 * parser state, it cleans up memory allocated by the library itself.
14712 * It is a cleanup function for the XML library. It tries to reclaim all
14713 * related global memory allocated for the library processing.
14714 * It doesn't deallocate any document related memory. One should
14715 * call xmlCleanupParser() only when the process has finished using
14716 * the library and all XML/HTML documents built with it.
14717 * See also xmlInitParser() which has the opposite function of preparing
14718 * the library for operations.
14720 * WARNING: if your application is multithreaded or has plugin support
14721 * calling this may crash the application if another thread or
14722 * a plugin is still using libxml2. It's sometimes very hard to
14723 * guess if libxml2 is in use in the application, some libraries
14724 * or plugins may use it without notice. In case of doubt abstain
14725 * from calling this function or do it just before calling exit()
14726 * to avoid leak reports from valgrind !
14730 xmlCleanupParser(void) {
14731 if (!xmlParserInitialized
)
14734 xmlCleanupCharEncodingHandlers();
14735 #ifdef LIBXML_CATALOG_ENABLED
14736 xmlCatalogCleanup();
14739 xmlCleanupInputCallbacks();
14740 #ifdef LIBXML_OUTPUT_ENABLED
14741 xmlCleanupOutputCallbacks();
14743 #ifdef LIBXML_SCHEMAS_ENABLED
14744 xmlSchemaCleanupTypes();
14745 xmlRelaxNGCleanupTypes();
14747 xmlCleanupGlobals();
14748 xmlCleanupThreads(); /* must be last if called not from the main thread */
14749 xmlCleanupMemory();
14750 xmlParserInitialized
= 0;
14753 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14756 ATTRIBUTE_DESTRUCTOR
14757 xmlDestructor(void) {
14759 * Calling custom deallocation functions in a destructor can cause
14760 * problems, for example with Nokogiri.
14762 if (xmlFree
== free
)
14763 xmlCleanupParser();
14767 /************************************************************************
14769 * New set (2.6.0) of simpler and more flexible APIs *
14771 ************************************************************************/
14777 * Free a string if it is not owned by the "dict" dictionary in the
14780 #define DICT_FREE(str) \
14781 if ((str) && ((!dict) || \
14782 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14783 xmlFree((char *)(str));
14787 * @ctxt: an XML parser context
14789 * Reset a parser context
14792 xmlCtxtReset(xmlParserCtxtPtr ctxt
)
14794 xmlParserInputPtr input
;
14802 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
14803 xmlFreeInputStream(input
);
14806 ctxt
->input
= NULL
;
14809 if (ctxt
->spaceTab
!= NULL
) {
14810 ctxt
->spaceTab
[0] = -1;
14811 ctxt
->space
= &ctxt
->spaceTab
[0];
14813 ctxt
->space
= NULL
;
14825 DICT_FREE(ctxt
->version
);
14826 ctxt
->version
= NULL
;
14827 DICT_FREE(ctxt
->encoding
);
14828 ctxt
->encoding
= NULL
;
14829 DICT_FREE(ctxt
->directory
);
14830 ctxt
->directory
= NULL
;
14831 DICT_FREE(ctxt
->extSubURI
);
14832 ctxt
->extSubURI
= NULL
;
14833 DICT_FREE(ctxt
->extSubSystem
);
14834 ctxt
->extSubSystem
= NULL
;
14835 if (ctxt
->myDoc
!= NULL
)
14836 xmlFreeDoc(ctxt
->myDoc
);
14837 ctxt
->myDoc
= NULL
;
14839 ctxt
->standalone
= -1;
14840 ctxt
->hasExternalSubset
= 0;
14841 ctxt
->hasPErefs
= 0;
14843 ctxt
->external
= 0;
14844 ctxt
->instate
= XML_PARSER_START
;
14847 ctxt
->wellFormed
= 1;
14848 ctxt
->nsWellFormed
= 1;
14849 ctxt
->disableSAX
= 0;
14852 ctxt
->vctxt
.userData
= ctxt
;
14853 ctxt
->vctxt
.error
= xmlParserValidityError
;
14854 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
14856 ctxt
->record_info
= 0;
14857 ctxt
->checkIndex
= 0;
14858 ctxt
->inSubset
= 0;
14859 ctxt
->errNo
= XML_ERR_OK
;
14861 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
14862 ctxt
->catalogs
= NULL
;
14863 ctxt
->nbentities
= 0;
14864 ctxt
->sizeentities
= 0;
14865 ctxt
->sizeentcopy
= 0;
14866 xmlInitNodeInfoSeq(&ctxt
->node_seq
);
14868 if (ctxt
->attsDefault
!= NULL
) {
14869 xmlHashFree(ctxt
->attsDefault
, xmlHashDefaultDeallocator
);
14870 ctxt
->attsDefault
= NULL
;
14872 if (ctxt
->attsSpecial
!= NULL
) {
14873 xmlHashFree(ctxt
->attsSpecial
, NULL
);
14874 ctxt
->attsSpecial
= NULL
;
14877 #ifdef LIBXML_CATALOG_ENABLED
14878 if (ctxt
->catalogs
!= NULL
)
14879 xmlCatalogFreeLocal(ctxt
->catalogs
);
14881 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
14882 xmlResetError(&ctxt
->lastError
);
14886 * xmlCtxtResetPush:
14887 * @ctxt: an XML parser context
14888 * @chunk: a pointer to an array of chars
14889 * @size: number of chars in the array
14890 * @filename: an optional file name or URI
14891 * @encoding: the document encoding, or NULL
14893 * Reset a push parser context
14895 * Returns 0 in case of success and 1 in case of error
14898 xmlCtxtResetPush(xmlParserCtxtPtr ctxt
, const char *chunk
,
14899 int size
, const char *filename
, const char *encoding
)
14901 xmlParserInputPtr inputStream
;
14902 xmlParserInputBufferPtr buf
;
14903 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
14908 if ((encoding
== NULL
) && (chunk
!= NULL
) && (size
>= 4))
14909 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
14911 buf
= xmlAllocParserInputBuffer(enc
);
14915 if (ctxt
== NULL
) {
14916 xmlFreeParserInputBuffer(buf
);
14920 xmlCtxtReset(ctxt
);
14922 if (filename
== NULL
) {
14923 ctxt
->directory
= NULL
;
14925 ctxt
->directory
= xmlParserGetDirectory(filename
);
14928 inputStream
= xmlNewInputStream(ctxt
);
14929 if (inputStream
== NULL
) {
14930 xmlFreeParserInputBuffer(buf
);
14934 if (filename
== NULL
)
14935 inputStream
->filename
= NULL
;
14937 inputStream
->filename
= (char *)
14938 xmlCanonicPath((const xmlChar
*) filename
);
14939 inputStream
->buf
= buf
;
14940 xmlBufResetInput(buf
->buffer
, inputStream
);
14942 inputPush(ctxt
, inputStream
);
14944 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
14945 (ctxt
->input
->buf
!= NULL
)) {
14946 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
14947 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
14949 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
14951 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
14953 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
14957 if (encoding
!= NULL
) {
14958 xmlCharEncodingHandlerPtr hdlr
;
14960 if (ctxt
->encoding
!= NULL
)
14961 xmlFree((xmlChar
*) ctxt
->encoding
);
14962 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14964 hdlr
= xmlFindCharEncodingHandler(encoding
);
14965 if (hdlr
!= NULL
) {
14966 xmlSwitchToEncoding(ctxt
, hdlr
);
14968 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
14969 "Unsupported encoding %s\n", BAD_CAST encoding
);
14971 } else if (enc
!= XML_CHAR_ENCODING_NONE
) {
14972 xmlSwitchEncoding(ctxt
, enc
);
14980 * xmlCtxtUseOptionsInternal:
14981 * @ctxt: an XML parser context
14982 * @options: a combination of xmlParserOption
14983 * @encoding: the user provided encoding to use
14985 * Applies the options to the parser context
14987 * Returns 0 in case of success, the set of unknown or unimplemented options
14988 * in case of error.
14991 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
, const char *encoding
)
14995 if (encoding
!= NULL
) {
14996 if (ctxt
->encoding
!= NULL
)
14997 xmlFree((xmlChar
*) ctxt
->encoding
);
14998 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
15000 if (options
& XML_PARSE_RECOVER
) {
15001 ctxt
->recovery
= 1;
15002 options
-= XML_PARSE_RECOVER
;
15003 ctxt
->options
|= XML_PARSE_RECOVER
;
15005 ctxt
->recovery
= 0;
15006 if (options
& XML_PARSE_DTDLOAD
) {
15007 ctxt
->loadsubset
= XML_DETECT_IDS
;
15008 options
-= XML_PARSE_DTDLOAD
;
15009 ctxt
->options
|= XML_PARSE_DTDLOAD
;
15011 ctxt
->loadsubset
= 0;
15012 if (options
& XML_PARSE_DTDATTR
) {
15013 ctxt
->loadsubset
|= XML_COMPLETE_ATTRS
;
15014 options
-= XML_PARSE_DTDATTR
;
15015 ctxt
->options
|= XML_PARSE_DTDATTR
;
15017 if (options
& XML_PARSE_NOENT
) {
15018 ctxt
->replaceEntities
= 1;
15019 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15020 options
-= XML_PARSE_NOENT
;
15021 ctxt
->options
|= XML_PARSE_NOENT
;
15023 ctxt
->replaceEntities
= 0;
15024 if (options
& XML_PARSE_PEDANTIC
) {
15025 ctxt
->pedantic
= 1;
15026 options
-= XML_PARSE_PEDANTIC
;
15027 ctxt
->options
|= XML_PARSE_PEDANTIC
;
15029 ctxt
->pedantic
= 0;
15030 if (options
& XML_PARSE_NOBLANKS
) {
15031 ctxt
->keepBlanks
= 0;
15032 ctxt
->sax
->ignorableWhitespace
= xmlSAX2IgnorableWhitespace
;
15033 options
-= XML_PARSE_NOBLANKS
;
15034 ctxt
->options
|= XML_PARSE_NOBLANKS
;
15036 ctxt
->keepBlanks
= 1;
15037 if (options
& XML_PARSE_DTDVALID
) {
15038 ctxt
->validate
= 1;
15039 if (options
& XML_PARSE_NOWARNING
)
15040 ctxt
->vctxt
.warning
= NULL
;
15041 if (options
& XML_PARSE_NOERROR
)
15042 ctxt
->vctxt
.error
= NULL
;
15043 options
-= XML_PARSE_DTDVALID
;
15044 ctxt
->options
|= XML_PARSE_DTDVALID
;
15046 ctxt
->validate
= 0;
15047 if (options
& XML_PARSE_NOWARNING
) {
15048 ctxt
->sax
->warning
= NULL
;
15049 options
-= XML_PARSE_NOWARNING
;
15051 if (options
& XML_PARSE_NOERROR
) {
15052 ctxt
->sax
->error
= NULL
;
15053 ctxt
->sax
->fatalError
= NULL
;
15054 options
-= XML_PARSE_NOERROR
;
15056 #ifdef LIBXML_SAX1_ENABLED
15057 if (options
& XML_PARSE_SAX1
) {
15058 ctxt
->sax
->startElement
= xmlSAX2StartElement
;
15059 ctxt
->sax
->endElement
= xmlSAX2EndElement
;
15060 ctxt
->sax
->startElementNs
= NULL
;
15061 ctxt
->sax
->endElementNs
= NULL
;
15062 ctxt
->sax
->initialized
= 1;
15063 options
-= XML_PARSE_SAX1
;
15064 ctxt
->options
|= XML_PARSE_SAX1
;
15066 #endif /* LIBXML_SAX1_ENABLED */
15067 if (options
& XML_PARSE_NODICT
) {
15068 ctxt
->dictNames
= 0;
15069 options
-= XML_PARSE_NODICT
;
15070 ctxt
->options
|= XML_PARSE_NODICT
;
15072 ctxt
->dictNames
= 1;
15074 if (options
& XML_PARSE_NOCDATA
) {
15075 ctxt
->sax
->cdataBlock
= NULL
;
15076 options
-= XML_PARSE_NOCDATA
;
15077 ctxt
->options
|= XML_PARSE_NOCDATA
;
15079 if (options
& XML_PARSE_NSCLEAN
) {
15080 ctxt
->options
|= XML_PARSE_NSCLEAN
;
15081 options
-= XML_PARSE_NSCLEAN
;
15083 if (options
& XML_PARSE_NONET
) {
15084 ctxt
->options
|= XML_PARSE_NONET
;
15085 options
-= XML_PARSE_NONET
;
15087 if (options
& XML_PARSE_COMPACT
) {
15088 ctxt
->options
|= XML_PARSE_COMPACT
;
15089 options
-= XML_PARSE_COMPACT
;
15091 if (options
& XML_PARSE_OLD10
) {
15092 ctxt
->options
|= XML_PARSE_OLD10
;
15093 options
-= XML_PARSE_OLD10
;
15095 if (options
& XML_PARSE_NOBASEFIX
) {
15096 ctxt
->options
|= XML_PARSE_NOBASEFIX
;
15097 options
-= XML_PARSE_NOBASEFIX
;
15099 if (options
& XML_PARSE_HUGE
) {
15100 ctxt
->options
|= XML_PARSE_HUGE
;
15101 options
-= XML_PARSE_HUGE
;
15102 if (ctxt
->dict
!= NULL
)
15103 xmlDictSetLimit(ctxt
->dict
, 0);
15105 if (options
& XML_PARSE_OLDSAX
) {
15106 ctxt
->options
|= XML_PARSE_OLDSAX
;
15107 options
-= XML_PARSE_OLDSAX
;
15109 if (options
& XML_PARSE_IGNORE_ENC
) {
15110 ctxt
->options
|= XML_PARSE_IGNORE_ENC
;
15111 options
-= XML_PARSE_IGNORE_ENC
;
15113 if (options
& XML_PARSE_BIG_LINES
) {
15114 ctxt
->options
|= XML_PARSE_BIG_LINES
;
15115 options
-= XML_PARSE_BIG_LINES
;
15117 ctxt
->linenumbers
= 1;
15122 * xmlCtxtUseOptions:
15123 * @ctxt: an XML parser context
15124 * @options: a combination of xmlParserOption
15126 * Applies the options to the parser context
15128 * Returns 0 in case of success, the set of unknown or unimplemented options
15129 * in case of error.
15132 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt
, int options
)
15134 return(xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
));
15139 * @ctxt: an XML parser context
15140 * @URL: the base URL to use for the document
15141 * @encoding: the document encoding, or NULL
15142 * @options: a combination of xmlParserOption
15143 * @reuse: keep the context for reuse
15145 * Common front-end for the xmlRead functions
15147 * Returns the resulting document tree or NULL
15150 xmlDoRead(xmlParserCtxtPtr ctxt
, const char *URL
, const char *encoding
,
15151 int options
, int reuse
)
15155 xmlCtxtUseOptionsInternal(ctxt
, options
, encoding
);
15156 if (encoding
!= NULL
) {
15157 xmlCharEncodingHandlerPtr hdlr
;
15159 hdlr
= xmlFindCharEncodingHandler(encoding
);
15161 xmlSwitchToEncoding(ctxt
, hdlr
);
15163 if ((URL
!= NULL
) && (ctxt
->input
!= NULL
) &&
15164 (ctxt
->input
->filename
== NULL
))
15165 ctxt
->input
->filename
= (char *) xmlStrdup((const xmlChar
*) URL
);
15166 xmlParseDocument(ctxt
);
15167 if ((ctxt
->wellFormed
) || ctxt
->recovery
)
15171 if (ctxt
->myDoc
!= NULL
) {
15172 xmlFreeDoc(ctxt
->myDoc
);
15175 ctxt
->myDoc
= NULL
;
15177 xmlFreeParserCtxt(ctxt
);
15185 * @cur: a pointer to a zero terminated string
15186 * @URL: the base URL to use for the document
15187 * @encoding: the document encoding, or NULL
15188 * @options: a combination of xmlParserOption
15190 * parse an XML in-memory document and build a tree.
15192 * Returns the resulting document tree
15195 xmlReadDoc(const xmlChar
* cur
, const char *URL
, const char *encoding
, int options
)
15197 xmlParserCtxtPtr ctxt
;
15203 ctxt
= xmlCreateDocParserCtxt(cur
);
15206 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15211 * @filename: a file or URL
15212 * @encoding: the document encoding, or NULL
15213 * @options: a combination of xmlParserOption
15215 * parse an XML file from the filesystem or the network.
15217 * Returns the resulting document tree
15220 xmlReadFile(const char *filename
, const char *encoding
, int options
)
15222 xmlParserCtxtPtr ctxt
;
15225 ctxt
= xmlCreateURLParserCtxt(filename
, options
);
15228 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 0));
15233 * @buffer: a pointer to a char array
15234 * @size: the size of the array
15235 * @URL: the base URL to use for the document
15236 * @encoding: the document encoding, or NULL
15237 * @options: a combination of xmlParserOption
15239 * parse an XML in-memory document and build a tree.
15241 * Returns the resulting document tree
15244 xmlReadMemory(const char *buffer
, int size
, const char *URL
, const char *encoding
, int options
)
15246 xmlParserCtxtPtr ctxt
;
15249 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
15252 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15257 * @fd: an open file descriptor
15258 * @URL: the base URL to use for the document
15259 * @encoding: the document encoding, or NULL
15260 * @options: a combination of xmlParserOption
15262 * parse an XML from a file descriptor and build a tree.
15263 * NOTE that the file descriptor will not be closed when the
15264 * reader is closed or reset.
15266 * Returns the resulting document tree
15269 xmlReadFd(int fd
, const char *URL
, const char *encoding
, int options
)
15271 xmlParserCtxtPtr ctxt
;
15272 xmlParserInputBufferPtr input
;
15273 xmlParserInputPtr stream
;
15279 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
15282 input
->closecallback
= NULL
;
15283 ctxt
= xmlNewParserCtxt();
15284 if (ctxt
== NULL
) {
15285 xmlFreeParserInputBuffer(input
);
15288 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15289 if (stream
== NULL
) {
15290 xmlFreeParserInputBuffer(input
);
15291 xmlFreeParserCtxt(ctxt
);
15294 inputPush(ctxt
, stream
);
15295 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15300 * @ioread: an I/O read function
15301 * @ioclose: an I/O close function
15302 * @ioctx: an I/O handler
15303 * @URL: the base URL to use for the document
15304 * @encoding: the document encoding, or NULL
15305 * @options: a combination of xmlParserOption
15307 * parse an XML document from I/O functions and source and build a tree.
15309 * Returns the resulting document tree
15312 xmlReadIO(xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
15313 void *ioctx
, const char *URL
, const char *encoding
, int options
)
15315 xmlParserCtxtPtr ctxt
;
15316 xmlParserInputBufferPtr input
;
15317 xmlParserInputPtr stream
;
15319 if (ioread
== NULL
)
15323 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
15324 XML_CHAR_ENCODING_NONE
);
15325 if (input
== NULL
) {
15326 if (ioclose
!= NULL
)
15330 ctxt
= xmlNewParserCtxt();
15331 if (ctxt
== NULL
) {
15332 xmlFreeParserInputBuffer(input
);
15335 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15336 if (stream
== NULL
) {
15337 xmlFreeParserInputBuffer(input
);
15338 xmlFreeParserCtxt(ctxt
);
15341 inputPush(ctxt
, stream
);
15342 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
15347 * @ctxt: an XML parser context
15348 * @cur: a pointer to a zero terminated string
15349 * @URL: the base URL to use for the document
15350 * @encoding: the document encoding, or NULL
15351 * @options: a combination of xmlParserOption
15353 * parse an XML in-memory document and build a tree.
15354 * This reuses the existing @ctxt parser context
15356 * Returns the resulting document tree
15359 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt
, const xmlChar
* cur
,
15360 const char *URL
, const char *encoding
, int options
)
15362 xmlParserInputPtr stream
;
15370 xmlCtxtReset(ctxt
);
15372 stream
= xmlNewStringInputStream(ctxt
, cur
);
15373 if (stream
== NULL
) {
15376 inputPush(ctxt
, stream
);
15377 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15382 * @ctxt: an XML parser context
15383 * @filename: a file or URL
15384 * @encoding: the document encoding, or NULL
15385 * @options: a combination of xmlParserOption
15387 * parse an XML file from the filesystem or the network.
15388 * This reuses the existing @ctxt parser context
15390 * Returns the resulting document tree
15393 xmlCtxtReadFile(xmlParserCtxtPtr ctxt
, const char *filename
,
15394 const char *encoding
, int options
)
15396 xmlParserInputPtr stream
;
15398 if (filename
== NULL
)
15404 xmlCtxtReset(ctxt
);
15406 stream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
15407 if (stream
== NULL
) {
15410 inputPush(ctxt
, stream
);
15411 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 1));
15415 * xmlCtxtReadMemory:
15416 * @ctxt: an XML parser context
15417 * @buffer: a pointer to a char array
15418 * @size: the size of the array
15419 * @URL: the base URL to use for the document
15420 * @encoding: the document encoding, or NULL
15421 * @options: a combination of xmlParserOption
15423 * parse an XML in-memory document and build a tree.
15424 * This reuses the existing @ctxt parser context
15426 * Returns the resulting document tree
15429 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt
, const char *buffer
, int size
,
15430 const char *URL
, const char *encoding
, int options
)
15432 xmlParserInputBufferPtr input
;
15433 xmlParserInputPtr stream
;
15437 if (buffer
== NULL
)
15441 xmlCtxtReset(ctxt
);
15443 input
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
15444 if (input
== NULL
) {
15448 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15449 if (stream
== NULL
) {
15450 xmlFreeParserInputBuffer(input
);
15454 inputPush(ctxt
, stream
);
15455 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15460 * @ctxt: an XML parser context
15461 * @fd: an open file descriptor
15462 * @URL: the base URL to use for the document
15463 * @encoding: the document encoding, or NULL
15464 * @options: a combination of xmlParserOption
15466 * parse an XML from a file descriptor and build a tree.
15467 * This reuses the existing @ctxt parser context
15468 * NOTE that the file descriptor will not be closed when the
15469 * reader is closed or reset.
15471 * Returns the resulting document tree
15474 xmlCtxtReadFd(xmlParserCtxtPtr ctxt
, int fd
,
15475 const char *URL
, const char *encoding
, int options
)
15477 xmlParserInputBufferPtr input
;
15478 xmlParserInputPtr stream
;
15486 xmlCtxtReset(ctxt
);
15489 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
15492 input
->closecallback
= NULL
;
15493 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15494 if (stream
== NULL
) {
15495 xmlFreeParserInputBuffer(input
);
15498 inputPush(ctxt
, stream
);
15499 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15504 * @ctxt: an XML parser context
15505 * @ioread: an I/O read function
15506 * @ioclose: an I/O close function
15507 * @ioctx: an I/O handler
15508 * @URL: the base URL to use for the document
15509 * @encoding: the document encoding, or NULL
15510 * @options: a combination of xmlParserOption
15512 * parse an XML document from I/O functions and source and build a tree.
15513 * This reuses the existing @ctxt parser context
15515 * Returns the resulting document tree
15518 xmlCtxtReadIO(xmlParserCtxtPtr ctxt
, xmlInputReadCallback ioread
,
15519 xmlInputCloseCallback ioclose
, void *ioctx
,
15521 const char *encoding
, int options
)
15523 xmlParserInputBufferPtr input
;
15524 xmlParserInputPtr stream
;
15526 if (ioread
== NULL
)
15532 xmlCtxtReset(ctxt
);
15534 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
15535 XML_CHAR_ENCODING_NONE
);
15536 if (input
== NULL
) {
15537 if (ioclose
!= NULL
)
15541 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15542 if (stream
== NULL
) {
15543 xmlFreeParserInputBuffer(input
);
15546 inputPush(ctxt
, stream
);
15547 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));