2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
33 /* To avoid EBCDIC trouble when parsing on zOS */
35 #pragma convert("ISO8859-1")
42 #define XML_DIR_SEP '\\'
44 #define XML_DIR_SEP '/'
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/HTMLparser.h>
61 #include <libxml/valid.h>
62 #include <libxml/entities.h>
63 #include <libxml/xmlerror.h>
64 #include <libxml/encoding.h>
65 #include <libxml/xmlIO.h>
66 #include <libxml/uri.h>
67 #ifdef LIBXML_CATALOG_ENABLED
68 #include <libxml/catalog.h>
70 #ifdef LIBXML_SCHEMAS_ENABLED
71 #include <libxml/xmlschemastypes.h>
72 #include <libxml/relaxng.h>
74 #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75 #include <libxml/xpath.h>
78 #include "private/buf.h"
79 #include "private/dict.h"
80 #include "private/enc.h"
81 #include "private/entities.h"
82 #include "private/error.h"
83 #include "private/globals.h"
84 #include "private/html.h"
85 #include "private/io.h"
86 #include "private/memory.h"
87 #include "private/parser.h"
88 #include "private/threads.h"
89 #include "private/xpath.h"
92 const xmlChar
*prefix
;
98 static xmlParserCtxtPtr
99 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax
, void *userData
,
100 const xmlChar
*URL
, const xmlChar
*ID
, const xmlChar
*base
,
101 xmlParserCtxtPtr pctx
);
104 xmlParseElementStart(xmlParserCtxtPtr ctxt
);
107 xmlParseElementEnd(xmlParserCtxtPtr ctxt
);
109 /************************************************************************
111 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
113 ************************************************************************/
115 #define XML_PARSER_BIG_ENTITY 1000
116 #define XML_PARSER_LOT_ENTITY 5000
119 * Constants for protection against abusive entity expansion
120 * ("billion laughs").
124 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125 * of serialized output after entity expansion.
127 #define XML_PARSER_NON_LINEAR 5
130 * A certain amount is always allowed.
132 #define XML_PARSER_ALLOWED_EXPANSION 1000000
135 * Fixed cost for each entity reference. This crudely models processing time
136 * as well to protect, for example, against exponential expansion of empty
137 * or very short entities.
139 #define XML_ENT_FIXED_COST 20
144 * arbitrary depth limit for the XML documents that we allow to
145 * process. This is not a limitation of the parser but a safety
146 * boundary feature. It can be disabled with the XML_PARSE_HUGE
149 unsigned int xmlParserMaxDepth
= 256;
154 #define XML_PARSER_BIG_BUFFER_SIZE 300
155 #define XML_PARSER_BUFFER_SIZE 100
156 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
159 * XML_PARSER_CHUNK_SIZE
161 * When calling GROW that's the minimal amount of data
162 * the parser expected to have received. It is not a hard
163 * limit but an optimization when reading strings like Names
164 * It is not strictly needed as long as inputs available characters
165 * are followed by 0, which should be provided by the I/O level
167 #define XML_PARSER_CHUNK_SIZE 100
170 * List of XML prefixed PI allowed by W3C specs
173 static const char* const xmlW3CPIs
[] = {
180 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt
,
182 const xmlChar
**str
);
184 static xmlParserErrors
185 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
186 xmlSAXHandlerPtr sax
,
187 void *user_data
, int depth
, const xmlChar
*URL
,
188 const xmlChar
*ID
, xmlNodePtr
*list
);
191 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
,
192 const char *encoding
);
193 #ifdef LIBXML_LEGACY_ENABLED
195 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
196 xmlNodePtr lastNode
);
197 #endif /* LIBXML_LEGACY_ENABLED */
199 static xmlParserErrors
200 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
201 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
);
204 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
);
206 /************************************************************************
208 * Some factorized error routines *
210 ************************************************************************/
213 * xmlErrAttributeDup:
214 * @ctxt: an XML parser context
215 * @prefix: the attribute prefix
216 * @localname: the attribute localname
218 * Handle a redefinition of attribute error
221 xmlErrAttributeDup(xmlParserCtxtPtr ctxt
, const xmlChar
* prefix
,
222 const xmlChar
* localname
)
224 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
225 (ctxt
->instate
== XML_PARSER_EOF
))
228 ctxt
->errNo
= XML_ERR_ATTRIBUTE_REDEFINED
;
231 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
232 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
233 (const char *) localname
, NULL
, NULL
, 0, 0,
234 "Attribute %s redefined\n", localname
);
236 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
237 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
238 (const char *) prefix
, (const char *) localname
,
239 NULL
, 0, 0, "Attribute %s:%s redefined\n", prefix
,
242 ctxt
->wellFormed
= 0;
243 if (ctxt
->recovery
== 0)
244 ctxt
->disableSAX
= 1;
250 * @ctxt: an XML parser context
251 * @error: the error number
252 * @extra: extra information string
254 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
257 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
)
261 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
262 (ctxt
->instate
== XML_PARSER_EOF
))
265 case XML_ERR_INVALID_HEX_CHARREF
:
266 errmsg
= "CharRef: invalid hexadecimal value";
268 case XML_ERR_INVALID_DEC_CHARREF
:
269 errmsg
= "CharRef: invalid decimal value";
271 case XML_ERR_INVALID_CHARREF
:
272 errmsg
= "CharRef: invalid value";
274 case XML_ERR_INTERNAL_ERROR
:
275 errmsg
= "internal error";
277 case XML_ERR_PEREF_AT_EOF
:
278 errmsg
= "PEReference at end of document";
280 case XML_ERR_PEREF_IN_PROLOG
:
281 errmsg
= "PEReference in prolog";
283 case XML_ERR_PEREF_IN_EPILOG
:
284 errmsg
= "PEReference in epilog";
286 case XML_ERR_PEREF_NO_NAME
:
287 errmsg
= "PEReference: no name";
289 case XML_ERR_PEREF_SEMICOL_MISSING
:
290 errmsg
= "PEReference: expecting ';'";
292 case XML_ERR_ENTITY_LOOP
:
293 errmsg
= "Detected an entity reference loop";
295 case XML_ERR_ENTITY_NOT_STARTED
:
296 errmsg
= "EntityValue: \" or ' expected";
298 case XML_ERR_ENTITY_PE_INTERNAL
:
299 errmsg
= "PEReferences forbidden in internal subset";
301 case XML_ERR_ENTITY_NOT_FINISHED
:
302 errmsg
= "EntityValue: \" or ' expected";
304 case XML_ERR_ATTRIBUTE_NOT_STARTED
:
305 errmsg
= "AttValue: \" or ' expected";
307 case XML_ERR_LT_IN_ATTRIBUTE
:
308 errmsg
= "Unescaped '<' not allowed in attributes values";
310 case XML_ERR_LITERAL_NOT_STARTED
:
311 errmsg
= "SystemLiteral \" or ' expected";
313 case XML_ERR_LITERAL_NOT_FINISHED
:
314 errmsg
= "Unfinished System or Public ID \" or ' expected";
316 case XML_ERR_MISPLACED_CDATA_END
:
317 errmsg
= "Sequence ']]>' not allowed in content";
319 case XML_ERR_URI_REQUIRED
:
320 errmsg
= "SYSTEM or PUBLIC, the URI is missing";
322 case XML_ERR_PUBID_REQUIRED
:
323 errmsg
= "PUBLIC, the Public Identifier is missing";
325 case XML_ERR_HYPHEN_IN_COMMENT
:
326 errmsg
= "Comment must not contain '--' (double-hyphen)";
328 case XML_ERR_PI_NOT_STARTED
:
329 errmsg
= "xmlParsePI : no target name";
331 case XML_ERR_RESERVED_XML_NAME
:
332 errmsg
= "Invalid PI name";
334 case XML_ERR_NOTATION_NOT_STARTED
:
335 errmsg
= "NOTATION: Name expected here";
337 case XML_ERR_NOTATION_NOT_FINISHED
:
338 errmsg
= "'>' required to close NOTATION declaration";
340 case XML_ERR_VALUE_REQUIRED
:
341 errmsg
= "Entity value required";
343 case XML_ERR_URI_FRAGMENT
:
344 errmsg
= "Fragment not allowed";
346 case XML_ERR_ATTLIST_NOT_STARTED
:
347 errmsg
= "'(' required to start ATTLIST enumeration";
349 case XML_ERR_NMTOKEN_REQUIRED
:
350 errmsg
= "NmToken expected in ATTLIST enumeration";
352 case XML_ERR_ATTLIST_NOT_FINISHED
:
353 errmsg
= "')' required to finish ATTLIST enumeration";
355 case XML_ERR_MIXED_NOT_STARTED
:
356 errmsg
= "MixedContentDecl : '|' or ')*' expected";
358 case XML_ERR_PCDATA_REQUIRED
:
359 errmsg
= "MixedContentDecl : '#PCDATA' expected";
361 case XML_ERR_ELEMCONTENT_NOT_STARTED
:
362 errmsg
= "ContentDecl : Name or '(' expected";
364 case XML_ERR_ELEMCONTENT_NOT_FINISHED
:
365 errmsg
= "ContentDecl : ',' '|' or ')' expected";
367 case XML_ERR_PEREF_IN_INT_SUBSET
:
369 "PEReference: forbidden within markup decl in internal subset";
371 case XML_ERR_GT_REQUIRED
:
372 errmsg
= "expected '>'";
374 case XML_ERR_CONDSEC_INVALID
:
375 errmsg
= "XML conditional section '[' expected";
377 case XML_ERR_EXT_SUBSET_NOT_FINISHED
:
378 errmsg
= "Content error in the external subset";
380 case XML_ERR_CONDSEC_INVALID_KEYWORD
:
382 "conditional section INCLUDE or IGNORE keyword expected";
384 case XML_ERR_CONDSEC_NOT_FINISHED
:
385 errmsg
= "XML conditional section not closed";
387 case XML_ERR_XMLDECL_NOT_STARTED
:
388 errmsg
= "Text declaration '<?xml' required";
390 case XML_ERR_XMLDECL_NOT_FINISHED
:
391 errmsg
= "parsing XML declaration: '?>' expected";
393 case XML_ERR_EXT_ENTITY_STANDALONE
:
394 errmsg
= "external parsed entities cannot be standalone";
396 case XML_ERR_ENTITYREF_SEMICOL_MISSING
:
397 errmsg
= "EntityRef: expecting ';'";
399 case XML_ERR_DOCTYPE_NOT_FINISHED
:
400 errmsg
= "DOCTYPE improperly terminated";
402 case XML_ERR_LTSLASH_REQUIRED
:
403 errmsg
= "EndTag: '</' not found";
405 case XML_ERR_EQUAL_REQUIRED
:
406 errmsg
= "expected '='";
408 case XML_ERR_STRING_NOT_CLOSED
:
409 errmsg
= "String not closed expecting \" or '";
411 case XML_ERR_STRING_NOT_STARTED
:
412 errmsg
= "String not started expecting ' or \"";
414 case XML_ERR_ENCODING_NAME
:
415 errmsg
= "Invalid XML encoding name";
417 case XML_ERR_STANDALONE_VALUE
:
418 errmsg
= "standalone accepts only 'yes' or 'no'";
420 case XML_ERR_DOCUMENT_EMPTY
:
421 errmsg
= "Document is empty";
423 case XML_ERR_DOCUMENT_END
:
424 errmsg
= "Extra content at the end of the document";
426 case XML_ERR_NOT_WELL_BALANCED
:
427 errmsg
= "chunk is not well balanced";
429 case XML_ERR_EXTRA_CONTENT
:
430 errmsg
= "extra content at the end of well balanced chunk";
432 case XML_ERR_VERSION_MISSING
:
433 errmsg
= "Malformed declaration expecting version";
435 case XML_ERR_NAME_TOO_LONG
:
436 errmsg
= "Name too long";
444 errmsg
= "Unregistered error message";
449 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
450 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, "%s\n",
453 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
454 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, "%s: %s\n",
458 ctxt
->wellFormed
= 0;
459 if (ctxt
->recovery
== 0)
460 ctxt
->disableSAX
= 1;
466 * @ctxt: an XML parser context
467 * @error: the error number
468 * @msg: the error message
470 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
472 static void LIBXML_ATTR_FORMAT(3,0)
473 xmlFatalErrMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
476 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
477 (ctxt
->instate
== XML_PARSER_EOF
))
481 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
482 XML_ERR_FATAL
, NULL
, 0, NULL
, NULL
, NULL
, 0, 0, "%s", msg
);
484 ctxt
->wellFormed
= 0;
485 if (ctxt
->recovery
== 0)
486 ctxt
->disableSAX
= 1;
492 * @ctxt: an XML parser context
493 * @error: the error number
494 * @msg: the error message
500 static void LIBXML_ATTR_FORMAT(3,0)
501 xmlWarningMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
502 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
504 xmlStructuredErrorFunc schannel
= NULL
;
506 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
507 (ctxt
->instate
== XML_PARSER_EOF
))
509 if ((ctxt
!= NULL
) && (ctxt
->sax
!= NULL
) &&
510 (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
511 schannel
= ctxt
->sax
->serror
;
513 __xmlRaiseError(schannel
,
514 (ctxt
->sax
) ? ctxt
->sax
->warning
: NULL
,
516 ctxt
, NULL
, XML_FROM_PARSER
, error
,
517 XML_ERR_WARNING
, NULL
, 0,
518 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
519 msg
, (const char *) str1
, (const char *) str2
);
521 __xmlRaiseError(schannel
, NULL
, NULL
,
522 ctxt
, NULL
, XML_FROM_PARSER
, error
,
523 XML_ERR_WARNING
, NULL
, 0,
524 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
525 msg
, (const char *) str1
, (const char *) str2
);
531 * @ctxt: an XML parser context
532 * @error: the error number
533 * @msg: the error message
536 * Handle a validity error.
538 static void LIBXML_ATTR_FORMAT(3,0)
539 xmlValidityError(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
540 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
542 xmlStructuredErrorFunc schannel
= NULL
;
544 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
545 (ctxt
->instate
== XML_PARSER_EOF
))
549 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
550 schannel
= ctxt
->sax
->serror
;
553 __xmlRaiseError(schannel
,
554 ctxt
->vctxt
.error
, ctxt
->vctxt
.userData
,
555 ctxt
, NULL
, XML_FROM_DTD
, error
,
556 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
557 (const char *) str2
, NULL
, 0, 0,
558 msg
, (const char *) str1
, (const char *) str2
);
561 __xmlRaiseError(schannel
, NULL
, NULL
,
562 ctxt
, NULL
, XML_FROM_DTD
, error
,
563 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
564 (const char *) str2
, NULL
, 0, 0,
565 msg
, (const char *) str1
, (const char *) str2
);
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
578 static void LIBXML_ATTR_FORMAT(3,0)
579 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
580 const char *msg
, int val
)
582 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
583 (ctxt
->instate
== XML_PARSER_EOF
))
587 __xmlRaiseError(NULL
, NULL
, NULL
,
588 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
589 NULL
, 0, NULL
, NULL
, NULL
, val
, 0, msg
, val
);
591 ctxt
->wellFormed
= 0;
592 if (ctxt
->recovery
== 0)
593 ctxt
->disableSAX
= 1;
598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
608 static void LIBXML_ATTR_FORMAT(3,0)
609 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
610 const char *msg
, const xmlChar
*str1
, int val
,
613 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
614 (ctxt
->instate
== XML_PARSER_EOF
))
618 __xmlRaiseError(NULL
, NULL
, NULL
,
619 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
620 NULL
, 0, (const char *) str1
, (const char *) str2
,
621 NULL
, val
, 0, msg
, str1
, val
, str2
);
623 ctxt
->wellFormed
= 0;
624 if (ctxt
->recovery
== 0)
625 ctxt
->disableSAX
= 1;
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
638 static void LIBXML_ATTR_FORMAT(3,0)
639 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
640 const char *msg
, const xmlChar
* val
)
642 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
643 (ctxt
->instate
== XML_PARSER_EOF
))
647 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
648 XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
649 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
652 ctxt
->wellFormed
= 0;
653 if (ctxt
->recovery
== 0)
654 ctxt
->disableSAX
= 1;
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
665 * Handle a non fatal parser error
667 static void LIBXML_ATTR_FORMAT(3,0)
668 xmlErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
669 const char *msg
, const xmlChar
* val
)
671 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
672 (ctxt
->instate
== XML_PARSER_EOF
))
676 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
677 XML_FROM_PARSER
, error
, XML_ERR_ERROR
,
678 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
684 * @ctxt: an XML parser context
685 * @error: the error number
687 * @info1: extra information string
688 * @info2: extra information string
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
692 static void LIBXML_ATTR_FORMAT(3,0)
693 xmlNsErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
695 const xmlChar
* info1
, const xmlChar
* info2
,
696 const xmlChar
* info3
)
698 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
699 (ctxt
->instate
== XML_PARSER_EOF
))
703 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
704 XML_ERR_ERROR
, NULL
, 0, (const char *) info1
,
705 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
706 info1
, info2
, info3
);
708 ctxt
->nsWellFormed
= 0;
713 * @ctxt: an XML parser context
714 * @error: the error number
716 * @info1: extra information string
717 * @info2: extra information string
719 * Handle a namespace warning error
721 static void LIBXML_ATTR_FORMAT(3,0)
722 xmlNsWarn(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
724 const xmlChar
* info1
, const xmlChar
* info2
,
725 const xmlChar
* info3
)
727 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
728 (ctxt
->instate
== XML_PARSER_EOF
))
730 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
731 XML_ERR_WARNING
, NULL
, 0, (const char *) info1
,
732 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
733 info1
, info2
, info3
);
737 xmlSaturatedAdd(unsigned long *dst
, unsigned long val
) {
738 if (val
> ULONG_MAX
- *dst
)
745 xmlSaturatedAddSizeT(unsigned long *dst
, unsigned long val
) {
746 if (val
> ULONG_MAX
- *dst
)
753 * xmlParserEntityCheck:
754 * @ctxt: parser context
755 * @extra: sum of unexpanded entity sizes
757 * Check for non-linear entity expansion behaviour.
759 * In some cases like xmlStringDecodeEntities, this function is called
760 * for each, possibly nested entity and its unexpanded content length.
762 * In other cases like xmlParseReference, it's only called for each
763 * top-level entity with its unexpanded content length plus the sum of
764 * the unexpanded content lengths (plus fixed cost) of all nested
767 * Summing the unexpanded lengths also adds the length of the reference.
768 * This is by design. Taking the length of the entity name into account
769 * discourages attacks that try to waste CPU time with abusively long
770 * entity names. See test/recurse/lol6.xml for example. Each call also
771 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
774 * Returns 1 on error, 0 on success.
777 xmlParserEntityCheck(xmlParserCtxtPtr ctxt
, unsigned long extra
)
779 unsigned long consumed
;
780 xmlParserInputPtr input
= ctxt
->input
;
781 xmlEntityPtr entity
= input
->entity
;
784 * Compute total consumed bytes so far, including input streams of
787 consumed
= input
->parentConsumed
;
788 if ((entity
== NULL
) ||
789 ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
790 ((entity
->flags
& XML_ENT_PARSED
) == 0))) {
791 xmlSaturatedAdd(&consumed
, input
->consumed
);
792 xmlSaturatedAddSizeT(&consumed
, input
->cur
- input
->base
);
794 xmlSaturatedAdd(&consumed
, ctxt
->sizeentities
);
797 * Add extra cost and some fixed cost.
799 xmlSaturatedAdd(&ctxt
->sizeentcopy
, extra
);
800 xmlSaturatedAdd(&ctxt
->sizeentcopy
, XML_ENT_FIXED_COST
);
803 * It's important to always use saturation arithmetic when tracking
804 * entity sizes to make the size checks reliable. If "sizeentcopy"
805 * overflows, we have to abort.
807 if ((ctxt
->sizeentcopy
> XML_PARSER_ALLOWED_EXPANSION
) &&
808 ((ctxt
->sizeentcopy
>= ULONG_MAX
) ||
809 (ctxt
->sizeentcopy
/ XML_PARSER_NON_LINEAR
> consumed
))) {
810 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_LOOP
,
811 "Maximum entity amplification factor exceeded");
819 /************************************************************************
821 * Library wide options *
823 ************************************************************************/
827 * @feature: the feature to be examined
829 * Examines if the library has been compiled with a given feature.
831 * Returns a non-zero value if the feature exist, otherwise zero.
832 * Returns zero (0) if the feature does not exist or an unknown
833 * unknown feature is requested, non-zero otherwise.
836 xmlHasFeature(xmlFeature feature
)
839 case XML_WITH_THREAD
:
840 #ifdef LIBXML_THREAD_ENABLED
846 #ifdef LIBXML_TREE_ENABLED
851 case XML_WITH_OUTPUT
:
852 #ifdef LIBXML_OUTPUT_ENABLED
858 #ifdef LIBXML_PUSH_ENABLED
863 case XML_WITH_READER
:
864 #ifdef LIBXML_READER_ENABLED
869 case XML_WITH_PATTERN
:
870 #ifdef LIBXML_PATTERN_ENABLED
875 case XML_WITH_WRITER
:
876 #ifdef LIBXML_WRITER_ENABLED
882 #ifdef LIBXML_SAX1_ENABLED
888 #ifdef LIBXML_FTP_ENABLED
894 #ifdef LIBXML_HTTP_ENABLED
900 #ifdef LIBXML_VALID_ENABLED
906 #ifdef LIBXML_HTML_ENABLED
911 case XML_WITH_LEGACY
:
912 #ifdef LIBXML_LEGACY_ENABLED
918 #ifdef LIBXML_C14N_ENABLED
923 case XML_WITH_CATALOG
:
924 #ifdef LIBXML_CATALOG_ENABLED
930 #ifdef LIBXML_XPATH_ENABLED
936 #ifdef LIBXML_XPTR_ENABLED
941 case XML_WITH_XINCLUDE
:
942 #ifdef LIBXML_XINCLUDE_ENABLED
948 #ifdef LIBXML_ICONV_ENABLED
953 case XML_WITH_ISO8859X
:
954 #ifdef LIBXML_ISO8859X_ENABLED
959 case XML_WITH_UNICODE
:
960 #ifdef LIBXML_UNICODE_ENABLED
965 case XML_WITH_REGEXP
:
966 #ifdef LIBXML_REGEXP_ENABLED
971 case XML_WITH_AUTOMATA
:
972 #ifdef LIBXML_AUTOMATA_ENABLED
978 #ifdef LIBXML_EXPR_ENABLED
983 case XML_WITH_SCHEMAS
:
984 #ifdef LIBXML_SCHEMAS_ENABLED
989 case XML_WITH_SCHEMATRON
:
990 #ifdef LIBXML_SCHEMATRON_ENABLED
995 case XML_WITH_MODULES
:
996 #ifdef LIBXML_MODULES_ENABLED
1001 case XML_WITH_DEBUG
:
1002 #ifdef LIBXML_DEBUG_ENABLED
1007 case XML_WITH_DEBUG_MEM
:
1008 #ifdef DEBUG_MEMORY_LOCATION
1013 case XML_WITH_DEBUG_RUN
:
1016 #ifdef LIBXML_ZLIB_ENABLED
1022 #ifdef LIBXML_LZMA_ENABLED
1028 #ifdef LIBXML_ICU_ENABLED
1039 /************************************************************************
1041 * SAX2 defaulted attributes handling *
1043 ************************************************************************/
1047 * @ctxt: an XML parser context
1049 * Do the SAX2 detection and specific initialization
1052 xmlDetectSAX2(xmlParserCtxtPtr ctxt
) {
1053 xmlSAXHandlerPtr sax
;
1055 /* Avoid unused variable warning if features are disabled. */
1058 if (ctxt
== NULL
) return;
1060 #ifdef LIBXML_SAX1_ENABLED
1061 if ((sax
) && (sax
->initialized
== XML_SAX2_MAGIC
) &&
1062 ((sax
->startElementNs
!= NULL
) ||
1063 (sax
->endElementNs
!= NULL
) ||
1064 ((sax
->startElement
== NULL
) && (sax
->endElement
== NULL
))))
1068 #endif /* LIBXML_SAX1_ENABLED */
1070 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
1071 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
1072 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
1073 if ((ctxt
->str_xml
==NULL
) || (ctxt
->str_xmlns
==NULL
) ||
1074 (ctxt
->str_xml_ns
== NULL
)) {
1075 xmlErrMemory(ctxt
, NULL
);
1079 typedef struct _xmlDefAttrs xmlDefAttrs
;
1080 typedef xmlDefAttrs
*xmlDefAttrsPtr
;
1081 struct _xmlDefAttrs
{
1082 int nbAttrs
; /* number of defaulted attributes on that element */
1083 int maxAttrs
; /* the size of the array */
1084 #if __STDC_VERSION__ >= 199901L
1085 /* Using a C99 flexible array member avoids UBSan errors. */
1086 const xmlChar
*values
[]; /* array of localname/prefix/values/external */
1088 const xmlChar
*values
[5];
1093 * xmlAttrNormalizeSpace:
1094 * @src: the source string
1095 * @dst: the target string
1097 * Normalize the space in non CDATA attribute values:
1098 * If the attribute type is not CDATA, then the XML processor MUST further
1099 * process the normalized attribute value by discarding any leading and
1100 * trailing space (#x20) characters, and by replacing sequences of space
1101 * (#x20) characters by a single space (#x20) character.
1102 * Note that the size of dst need to be at least src, and if one doesn't need
1103 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1104 * passing src as dst is just fine.
1106 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1110 xmlAttrNormalizeSpace(const xmlChar
*src
, xmlChar
*dst
)
1112 if ((src
== NULL
) || (dst
== NULL
))
1115 while (*src
== 0x20) src
++;
1118 while (*src
== 0x20) src
++;
1132 * xmlAttrNormalizeSpace2:
1133 * @src: the source string
1135 * Normalize the space in non CDATA attribute values, a slightly more complex
1136 * front end to avoid allocation problems when running on attribute values
1137 * coming from the input.
1139 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1142 static const xmlChar
*
1143 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt
, xmlChar
*src
, int *len
)
1146 int remove_head
= 0;
1147 int need_realloc
= 0;
1150 if ((ctxt
== NULL
) || (src
== NULL
) || (len
== NULL
))
1157 while (*cur
== 0x20) {
1164 if ((*cur
== 0x20) || (*cur
== 0)) {
1174 ret
= xmlStrndup(src
+ remove_head
, i
- remove_head
+ 1);
1176 xmlErrMemory(ctxt
, NULL
);
1179 xmlAttrNormalizeSpace(ret
, ret
);
1180 *len
= strlen((const char *)ret
);
1182 } else if (remove_head
) {
1183 *len
-= remove_head
;
1184 memmove(src
, src
+ remove_head
, 1 + *len
);
1192 * @ctxt: an XML parser context
1193 * @fullname: the element fullname
1194 * @fullattr: the attribute fullname
1195 * @value: the attribute value
1197 * Add a defaulted attribute for an element
1200 xmlAddDefAttrs(xmlParserCtxtPtr ctxt
,
1201 const xmlChar
*fullname
,
1202 const xmlChar
*fullattr
,
1203 const xmlChar
*value
) {
1204 xmlDefAttrsPtr defaults
;
1206 const xmlChar
*name
;
1207 const xmlChar
*prefix
;
1210 * Allows to detect attribute redefinitions
1212 if (ctxt
->attsSpecial
!= NULL
) {
1213 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1217 if (ctxt
->attsDefault
== NULL
) {
1218 ctxt
->attsDefault
= xmlHashCreateDict(10, ctxt
->dict
);
1219 if (ctxt
->attsDefault
== NULL
)
1224 * split the element name into prefix:localname , the string found
1225 * are within the DTD and then not associated to namespace names.
1227 name
= xmlSplitQName3(fullname
, &len
);
1229 name
= xmlDictLookup(ctxt
->dict
, fullname
, -1);
1232 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1233 prefix
= xmlDictLookup(ctxt
->dict
, fullname
, len
);
1237 * make sure there is some storage
1239 defaults
= xmlHashLookup2(ctxt
->attsDefault
, name
, prefix
);
1240 if (defaults
== NULL
) {
1241 defaults
= (xmlDefAttrsPtr
) xmlMalloc(sizeof(xmlDefAttrs
) +
1242 (4 * 5) * sizeof(const xmlChar
*));
1243 if (defaults
== NULL
)
1245 defaults
->nbAttrs
= 0;
1246 defaults
->maxAttrs
= 4;
1247 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1248 defaults
, NULL
) < 0) {
1252 } else if (defaults
->nbAttrs
>= defaults
->maxAttrs
) {
1253 xmlDefAttrsPtr temp
;
1255 temp
= (xmlDefAttrsPtr
) xmlRealloc(defaults
, sizeof(xmlDefAttrs
) +
1256 (2 * defaults
->maxAttrs
* 5) * sizeof(const xmlChar
*));
1260 defaults
->maxAttrs
*= 2;
1261 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1262 defaults
, NULL
) < 0) {
1269 * Split the element name into prefix:localname , the string found
1270 * are within the DTD and hen not associated to namespace names.
1272 name
= xmlSplitQName3(fullattr
, &len
);
1274 name
= xmlDictLookup(ctxt
->dict
, fullattr
, -1);
1277 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1278 prefix
= xmlDictLookup(ctxt
->dict
, fullattr
, len
);
1281 defaults
->values
[5 * defaults
->nbAttrs
] = name
;
1282 defaults
->values
[5 * defaults
->nbAttrs
+ 1] = prefix
;
1283 /* intern the string and precompute the end */
1284 len
= xmlStrlen(value
);
1285 value
= xmlDictLookup(ctxt
->dict
, value
, len
);
1288 defaults
->values
[5 * defaults
->nbAttrs
+ 2] = value
;
1289 defaults
->values
[5 * defaults
->nbAttrs
+ 3] = value
+ len
;
1291 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = BAD_CAST
"external";
1293 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = NULL
;
1294 defaults
->nbAttrs
++;
1299 xmlErrMemory(ctxt
, NULL
);
1304 * xmlAddSpecialAttr:
1305 * @ctxt: an XML parser context
1306 * @fullname: the element fullname
1307 * @fullattr: the attribute fullname
1308 * @type: the attribute type
1310 * Register this attribute type
1313 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt
,
1314 const xmlChar
*fullname
,
1315 const xmlChar
*fullattr
,
1318 if (ctxt
->attsSpecial
== NULL
) {
1319 ctxt
->attsSpecial
= xmlHashCreateDict(10, ctxt
->dict
);
1320 if (ctxt
->attsSpecial
== NULL
)
1324 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1327 xmlHashAddEntry2(ctxt
->attsSpecial
, fullname
, fullattr
,
1328 (void *) (ptrdiff_t) type
);
1332 xmlErrMemory(ctxt
, NULL
);
1337 * xmlCleanSpecialAttrCallback:
1339 * Removes CDATA attributes from the special attribute table
1342 xmlCleanSpecialAttrCallback(void *payload
, void *data
,
1343 const xmlChar
*fullname
, const xmlChar
*fullattr
,
1344 const xmlChar
*unused ATTRIBUTE_UNUSED
) {
1345 xmlParserCtxtPtr ctxt
= (xmlParserCtxtPtr
) data
;
1347 if (((ptrdiff_t) payload
) == XML_ATTRIBUTE_CDATA
) {
1348 xmlHashRemoveEntry2(ctxt
->attsSpecial
, fullname
, fullattr
, NULL
);
1353 * xmlCleanSpecialAttr:
1354 * @ctxt: an XML parser context
1356 * Trim the list of attributes defined to remove all those of type
1357 * CDATA as they are not special. This call should be done when finishing
1358 * to parse the DTD and before starting to parse the document root.
1361 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt
)
1363 if (ctxt
->attsSpecial
== NULL
)
1366 xmlHashScanFull(ctxt
->attsSpecial
, xmlCleanSpecialAttrCallback
, ctxt
);
1368 if (xmlHashSize(ctxt
->attsSpecial
) == 0) {
1369 xmlHashFree(ctxt
->attsSpecial
, NULL
);
1370 ctxt
->attsSpecial
= NULL
;
1376 * xmlCheckLanguageID:
1377 * @lang: pointer to the string value
1379 * DEPRECATED: Internal function, do not use.
1381 * Checks that the value conforms to the LanguageID production:
1383 * NOTE: this is somewhat deprecated, those productions were removed from
1384 * the XML Second edition.
1386 * [33] LanguageID ::= Langcode ('-' Subcode)*
1387 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1388 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1389 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1390 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1391 * [38] Subcode ::= ([a-z] | [A-Z])+
1393 * The current REC reference the successors of RFC 1766, currently 5646
1395 * http://www.rfc-editor.org/rfc/rfc5646.txt
1396 * langtag = language
1402 * language = 2*3ALPHA ; shortest ISO 639 code
1403 * ["-" extlang] ; sometimes followed by
1404 * ; extended language subtags
1405 * / 4ALPHA ; or reserved for future use
1406 * / 5*8ALPHA ; or registered language subtag
1408 * extlang = 3ALPHA ; selected ISO 639 codes
1409 * *2("-" 3ALPHA) ; permanently reserved
1411 * script = 4ALPHA ; ISO 15924 code
1413 * region = 2ALPHA ; ISO 3166-1 code
1414 * / 3DIGIT ; UN M.49 code
1416 * variant = 5*8alphanum ; registered variants
1417 * / (DIGIT 3alphanum)
1419 * extension = singleton 1*("-" (2*8alphanum))
1421 * ; Single alphanumerics
1422 * ; "x" reserved for private use
1423 * singleton = DIGIT ; 0 - 9
1429 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1430 * The parser below doesn't try to cope with extension or privateuse
1431 * that could be added but that's not interoperable anyway
1433 * Returns 1 if correct 0 otherwise
1436 xmlCheckLanguageID(const xmlChar
* lang
)
1438 const xmlChar
*cur
= lang
, *nxt
;
1442 if (((cur
[0] == 'i') && (cur
[1] == '-')) ||
1443 ((cur
[0] == 'I') && (cur
[1] == '-')) ||
1444 ((cur
[0] == 'x') && (cur
[1] == '-')) ||
1445 ((cur
[0] == 'X') && (cur
[1] == '-'))) {
1447 * Still allow IANA code and user code which were coming
1448 * from the previous version of the XML-1.0 specification
1449 * it's deprecated but we should not fail
1452 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1453 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1455 return(cur
[0] == 0);
1458 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1459 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1461 if (nxt
- cur
>= 4) {
1465 if ((nxt
- cur
> 8) || (nxt
[0] != 0))
1471 /* we got an ISO 639 code */
1479 /* now we can have extlang or script or region or variant */
1480 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1483 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1484 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1490 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1494 /* we parsed an extlang */
1502 /* now we can have script or region or variant */
1503 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1506 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1507 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1511 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1515 /* we parsed a script */
1524 /* now we can have region or variant */
1525 if ((nxt
[0] >= '0') && (nxt
[0] <= '9'))
1528 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1529 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1532 if ((nxt
- cur
>= 5) && (nxt
- cur
<= 8))
1536 /* we parsed a region */
1545 /* now we can just have a variant */
1546 while (((nxt
[0] >= 'A') && (nxt
[0] <= 'Z')) ||
1547 ((nxt
[0] >= 'a') && (nxt
[0] <= 'z')))
1550 if ((nxt
- cur
< 5) || (nxt
- cur
> 8))
1553 /* we parsed a variant */
1559 /* extensions and private use subtags not checked */
1563 if (((nxt
[1] >= '0') && (nxt
[1] <= '9')) &&
1564 ((nxt
[2] >= '0') && (nxt
[2] <= '9'))) {
1571 /************************************************************************
1573 * Parser stacks related functions and macros *
1575 ************************************************************************/
1577 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
,
1578 const xmlChar
** str
);
1583 * @ctxt: an XML parser context
1584 * @prefix: the namespace prefix or NULL
1585 * @URL: the namespace name
1587 * Pushes a new parser namespace on top of the ns stack
1589 * Returns -1 in case of error, -2 if the namespace should be discarded
1590 * and the index in the stack otherwise.
1593 nsPush(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
, const xmlChar
*URL
)
1595 if (ctxt
->options
& XML_PARSE_NSCLEAN
) {
1597 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-= 2) {
1598 if (ctxt
->nsTab
[i
] == prefix
) {
1600 if (ctxt
->nsTab
[i
+ 1] == URL
)
1602 /* out of scope keep it */
1607 if ((ctxt
->nsMax
== 0) || (ctxt
->nsTab
== NULL
)) {
1610 ctxt
->nsTab
= (const xmlChar
**)
1611 xmlMalloc(ctxt
->nsMax
* sizeof(xmlChar
*));
1612 if (ctxt
->nsTab
== NULL
) {
1613 xmlErrMemory(ctxt
, NULL
);
1617 } else if (ctxt
->nsNr
>= ctxt
->nsMax
) {
1618 const xmlChar
** tmp
;
1620 tmp
= (const xmlChar
**) xmlRealloc((char *) ctxt
->nsTab
,
1621 ctxt
->nsMax
* sizeof(ctxt
->nsTab
[0]));
1623 xmlErrMemory(ctxt
, NULL
);
1629 ctxt
->nsTab
[ctxt
->nsNr
++] = prefix
;
1630 ctxt
->nsTab
[ctxt
->nsNr
++] = URL
;
1631 return (ctxt
->nsNr
);
1635 * @ctxt: an XML parser context
1636 * @nr: the number to pop
1638 * Pops the top @nr parser prefix/namespace from the ns stack
1640 * Returns the number of namespaces removed
1643 nsPop(xmlParserCtxtPtr ctxt
, int nr
)
1647 if (ctxt
->nsTab
== NULL
) return(0);
1648 if (ctxt
->nsNr
< nr
) {
1649 xmlGenericError(xmlGenericErrorContext
, "Pbm popping %d NS\n", nr
);
1652 if (ctxt
->nsNr
<= 0)
1655 for (i
= 0;i
< nr
;i
++) {
1657 ctxt
->nsTab
[ctxt
->nsNr
] = NULL
;
1664 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt
, int nr
) {
1665 const xmlChar
**atts
;
1669 if (nr
+ 5 > ctxt
->maxatts
) {
1670 maxatts
= ctxt
->maxatts
== 0 ? 55 : (nr
+ 5) * 2;
1671 atts
= (const xmlChar
**) xmlMalloc(
1672 maxatts
* sizeof(const xmlChar
*));
1673 if (atts
== NULL
) goto mem_error
;
1674 attallocs
= (int *) xmlRealloc((void *) ctxt
->attallocs
,
1675 (maxatts
/ 5) * sizeof(int));
1676 if (attallocs
== NULL
) {
1680 if (ctxt
->maxatts
> 0)
1681 memcpy(atts
, ctxt
->atts
, ctxt
->maxatts
* sizeof(const xmlChar
*));
1682 xmlFree(ctxt
->atts
);
1684 ctxt
->attallocs
= attallocs
;
1685 ctxt
->maxatts
= maxatts
;
1687 return(ctxt
->maxatts
);
1689 xmlErrMemory(ctxt
, NULL
);
1695 * @ctxt: an XML parser context
1696 * @value: the parser input
1698 * Pushes a new parser input on top of the input stack
1700 * Returns -1 in case of error, the index in the stack otherwise
1703 inputPush(xmlParserCtxtPtr ctxt
, xmlParserInputPtr value
)
1705 if ((ctxt
== NULL
) || (value
== NULL
))
1707 if (ctxt
->inputNr
>= ctxt
->inputMax
) {
1708 size_t newSize
= ctxt
->inputMax
* 2;
1709 xmlParserInputPtr
*tmp
;
1711 tmp
= (xmlParserInputPtr
*) xmlRealloc(ctxt
->inputTab
,
1712 newSize
* sizeof(*tmp
));
1714 xmlErrMemory(ctxt
, NULL
);
1717 ctxt
->inputTab
= tmp
;
1718 ctxt
->inputMax
= newSize
;
1720 ctxt
->inputTab
[ctxt
->inputNr
] = value
;
1721 ctxt
->input
= value
;
1722 return (ctxt
->inputNr
++);
1726 * @ctxt: an XML parser context
1728 * Pops the top parser input from the input stack
1730 * Returns the input just removed
1733 inputPop(xmlParserCtxtPtr ctxt
)
1735 xmlParserInputPtr ret
;
1739 if (ctxt
->inputNr
<= 0)
1742 if (ctxt
->inputNr
> 0)
1743 ctxt
->input
= ctxt
->inputTab
[ctxt
->inputNr
- 1];
1746 ret
= ctxt
->inputTab
[ctxt
->inputNr
];
1747 ctxt
->inputTab
[ctxt
->inputNr
] = NULL
;
1752 * @ctxt: an XML parser context
1753 * @value: the element node
1755 * DEPRECATED: Internal function, do not use.
1757 * Pushes a new element node on top of the node stack
1759 * Returns -1 in case of error, the index in the stack otherwise
1762 nodePush(xmlParserCtxtPtr ctxt
, xmlNodePtr value
)
1764 if (ctxt
== NULL
) return(0);
1765 if (ctxt
->nodeNr
>= ctxt
->nodeMax
) {
1768 tmp
= (xmlNodePtr
*) xmlRealloc(ctxt
->nodeTab
,
1770 sizeof(ctxt
->nodeTab
[0]));
1772 xmlErrMemory(ctxt
, NULL
);
1775 ctxt
->nodeTab
= tmp
;
1778 if ((((unsigned int) ctxt
->nodeNr
) > xmlParserMaxDepth
) &&
1779 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
1780 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
1781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1783 xmlHaltParser(ctxt
);
1786 ctxt
->nodeTab
[ctxt
->nodeNr
] = value
;
1788 return (ctxt
->nodeNr
++);
1793 * @ctxt: an XML parser context
1795 * DEPRECATED: Internal function, do not use.
1797 * Pops the top element node from the node stack
1799 * Returns the node just removed
1802 nodePop(xmlParserCtxtPtr ctxt
)
1806 if (ctxt
== NULL
) return(NULL
);
1807 if (ctxt
->nodeNr
<= 0)
1810 if (ctxt
->nodeNr
> 0)
1811 ctxt
->node
= ctxt
->nodeTab
[ctxt
->nodeNr
- 1];
1814 ret
= ctxt
->nodeTab
[ctxt
->nodeNr
];
1815 ctxt
->nodeTab
[ctxt
->nodeNr
] = NULL
;
1821 * @ctxt: an XML parser context
1822 * @value: the element name
1823 * @prefix: the element prefix
1824 * @URI: the element namespace name
1825 * @line: the current line number for error messages
1826 * @nsNr: the number of namespaces pushed on the namespace table
1828 * Pushes a new element name/prefix/URL on top of the name stack
1830 * Returns -1 in case of error, the index in the stack otherwise
1833 nameNsPush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
,
1834 const xmlChar
*prefix
, const xmlChar
*URI
, int line
, int nsNr
)
1838 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1839 const xmlChar
* *tmp
;
1842 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1844 sizeof(ctxt
->nameTab
[0]));
1849 ctxt
->nameTab
= tmp
;
1850 tmp2
= (xmlStartTag
*) xmlRealloc((void * *)ctxt
->pushTab
,
1852 sizeof(ctxt
->pushTab
[0]));
1857 ctxt
->pushTab
= tmp2
;
1858 } else if (ctxt
->pushTab
== NULL
) {
1859 ctxt
->pushTab
= (xmlStartTag
*) xmlMalloc(ctxt
->nameMax
*
1860 sizeof(ctxt
->pushTab
[0]));
1861 if (ctxt
->pushTab
== NULL
)
1864 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1866 tag
= &ctxt
->pushTab
[ctxt
->nameNr
];
1867 tag
->prefix
= prefix
;
1871 return (ctxt
->nameNr
++);
1873 xmlErrMemory(ctxt
, NULL
);
1876 #ifdef LIBXML_PUSH_ENABLED
1879 * @ctxt: an XML parser context
1881 * Pops the top element/prefix/URI name from the name stack
1883 * Returns the name just removed
1885 static const xmlChar
*
1886 nameNsPop(xmlParserCtxtPtr ctxt
)
1890 if (ctxt
->nameNr
<= 0)
1893 if (ctxt
->nameNr
> 0)
1894 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1897 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1898 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1901 #endif /* LIBXML_PUSH_ENABLED */
1905 * @ctxt: an XML parser context
1906 * @value: the element name
1908 * DEPRECATED: Internal function, do not use.
1910 * Pushes a new element name on top of the name stack
1912 * Returns -1 in case of error, the index in the stack otherwise
1915 namePush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
)
1917 if (ctxt
== NULL
) return (-1);
1919 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1920 const xmlChar
* *tmp
;
1921 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1923 sizeof(ctxt
->nameTab
[0]));
1927 ctxt
->nameTab
= tmp
;
1930 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1932 return (ctxt
->nameNr
++);
1934 xmlErrMemory(ctxt
, NULL
);
1940 * @ctxt: an XML parser context
1942 * DEPRECATED: Internal function, do not use.
1944 * Pops the top element name from the name stack
1946 * Returns the name just removed
1949 namePop(xmlParserCtxtPtr ctxt
)
1953 if ((ctxt
== NULL
) || (ctxt
->nameNr
<= 0))
1956 if (ctxt
->nameNr
> 0)
1957 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1960 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1961 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1965 static int spacePush(xmlParserCtxtPtr ctxt
, int val
) {
1966 if (ctxt
->spaceNr
>= ctxt
->spaceMax
) {
1969 ctxt
->spaceMax
*= 2;
1970 tmp
= (int *) xmlRealloc(ctxt
->spaceTab
,
1971 ctxt
->spaceMax
* sizeof(ctxt
->spaceTab
[0]));
1973 xmlErrMemory(ctxt
, NULL
);
1977 ctxt
->spaceTab
= tmp
;
1979 ctxt
->spaceTab
[ctxt
->spaceNr
] = val
;
1980 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
];
1981 return(ctxt
->spaceNr
++);
1984 static int spacePop(xmlParserCtxtPtr ctxt
) {
1986 if (ctxt
->spaceNr
<= 0) return(0);
1988 if (ctxt
->spaceNr
> 0)
1989 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
- 1];
1991 ctxt
->space
= &ctxt
->spaceTab
[0];
1992 ret
= ctxt
->spaceTab
[ctxt
->spaceNr
];
1993 ctxt
->spaceTab
[ctxt
->spaceNr
] = -1;
1998 * Macros for accessing the content. Those should be used only by the parser,
2001 * Dirty macros, i.e. one often need to make assumption on the context to
2004 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2005 * To be used with extreme caution since operations consuming
2006 * characters may move the input buffer to a different location !
2007 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2008 * This should be used internally by the parser
2009 * only to compare to ASCII values otherwise it would break when
2010 * running with UTF-8 encoding.
2011 * RAW same as CUR but in the input buffer, bypass any token
2012 * extraction that may have been done
2013 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2014 * to compare on ASCII based substring.
2015 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2016 * strings without newlines within the parser.
2017 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2018 * defined char within the parser.
2019 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2021 * NEXT Skip to the next character, this does the proper decoding
2022 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2023 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2024 * CUR_CHAR(l) returns the current unicode character (int), set l
2025 * to the number of xmlChars used for the encoding [0-5].
2026 * CUR_SCHAR same but operate on a string instead of the context
2027 * COPY_BUF copy the current unicode char to the target buffer, increment
2029 * GROW, SHRINK handling of input buffers
2032 #define RAW (*ctxt->input->cur)
2033 #define CUR (*ctxt->input->cur)
2034 #define NXT(val) ctxt->input->cur[(val)]
2035 #define CUR_PTR ctxt->input->cur
2036 #define BASE_PTR ctxt->input->base
2038 #define CMP4( s, c1, c2, c3, c4 ) \
2039 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2040 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2041 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2042 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2043 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2044 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2045 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2046 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2047 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2048 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2049 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2050 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2051 ((unsigned char *) s)[ 8 ] == c9 )
2052 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2053 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2054 ((unsigned char *) s)[ 9 ] == c10 )
2056 #define SKIP(val) do { \
2057 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2058 if (*ctxt->input->cur == 0) \
2059 xmlParserGrow(ctxt); \
2062 #define SKIPL(val) do { \
2064 for(skipl=0; skipl<val; skipl++) { \
2065 if (*(ctxt->input->cur) == '\n') { \
2066 ctxt->input->line++; ctxt->input->col = 1; \
2067 } else ctxt->input->col++; \
2068 ctxt->input->cur++; \
2070 if (*ctxt->input->cur == 0) \
2071 xmlParserGrow(ctxt); \
2074 #define SHRINK if ((ctxt->progressive == 0) && \
2075 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2076 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2077 xmlParserShrink(ctxt);
2079 #define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081 xmlParserGrow(ctxt);
2083 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2085 #define NEXT xmlNextChar(ctxt)
2088 ctxt->input->col++; \
2089 ctxt->input->cur++; \
2090 if (*ctxt->input->cur == 0) \
2091 xmlParserGrow(ctxt); \
2094 #define NEXTL(l) do { \
2095 if (*(ctxt->input->cur) == '\n') { \
2096 ctxt->input->line++; ctxt->input->col = 1; \
2097 } else ctxt->input->col++; \
2098 ctxt->input->cur += l; \
2101 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2102 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2104 #define COPY_BUF(l,b,i,v) \
2105 if (l == 1) b[i++] = v; \
2106 else i += xmlCopyCharMultiByte(&b[i],v)
2109 * xmlSkipBlankChars:
2110 * @ctxt: the XML parser context
2112 * DEPRECATED: Internal function, do not use.
2114 * skip all blanks character found at that point in the input streams.
2115 * It pops up finished entities in the process if allowable at that point.
2117 * Returns the number of space chars skipped
2121 xmlSkipBlankChars(xmlParserCtxtPtr ctxt
) {
2125 * It's Okay to use CUR/NEXT here since all the blanks are on
2128 if (((ctxt
->inputNr
== 1) && (ctxt
->instate
!= XML_PARSER_DTD
)) ||
2129 (ctxt
->instate
== XML_PARSER_START
)) {
2132 * if we are in the document content, go really fast
2134 cur
= ctxt
->input
->cur
;
2135 while (IS_BLANK_CH(*cur
)) {
2137 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
2145 ctxt
->input
->cur
= cur
;
2146 xmlParserGrow(ctxt
);
2147 cur
= ctxt
->input
->cur
;
2150 ctxt
->input
->cur
= cur
;
2152 int expandPE
= ((ctxt
->external
!= 0) || (ctxt
->inputNr
!= 1));
2154 while (ctxt
->instate
!= XML_PARSER_EOF
) {
2155 if (IS_BLANK_CH(CUR
)) { /* CHECKED tstblanks.xml */
2157 } else if (CUR
== '%') {
2159 * Need to handle support of entities branching here
2161 if ((expandPE
== 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2163 xmlParsePEReference(ctxt
);
2164 } else if (CUR
== 0) {
2165 unsigned long consumed
;
2168 if (ctxt
->inputNr
<= 1)
2171 consumed
= ctxt
->input
->consumed
;
2172 xmlSaturatedAddSizeT(&consumed
,
2173 ctxt
->input
->cur
- ctxt
->input
->base
);
2176 * Add to sizeentities when parsing an external entity
2177 * for the first time.
2179 ent
= ctxt
->input
->entity
;
2180 if ((ent
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
2181 ((ent
->flags
& XML_ENT_PARSED
) == 0)) {
2182 ent
->flags
|= XML_ENT_PARSED
;
2184 xmlSaturatedAdd(&ctxt
->sizeentities
, consumed
);
2187 xmlParserEntityCheck(ctxt
, consumed
);
2195 * Also increase the counter when entering or exiting a PERef.
2196 * The spec says: "When a parameter-entity reference is recognized
2197 * in the DTD and included, its replacement text MUST be enlarged
2198 * by the attachment of one leading and one following space (#x20)
2208 /************************************************************************
2210 * Commodity functions to handle entities *
2212 ************************************************************************/
2216 * @ctxt: an XML parser context
2218 * xmlPopInput: the current input pointed by ctxt->input came to an end
2219 * pop it and return the next char.
2221 * Returns the current xmlChar in the parser context
2224 xmlPopInput(xmlParserCtxtPtr ctxt
) {
2225 xmlParserInputPtr input
;
2227 if ((ctxt
== NULL
) || (ctxt
->inputNr
<= 1)) return(0);
2228 if (xmlParserDebugEntities
)
2229 xmlGenericError(xmlGenericErrorContext
,
2230 "Popping input %d\n", ctxt
->inputNr
);
2231 if ((ctxt
->inputNr
> 1) && (ctxt
->inSubset
== 0) &&
2232 (ctxt
->instate
!= XML_PARSER_EOF
))
2233 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
2234 "Unfinished entity outside the DTD");
2235 input
= inputPop(ctxt
);
2236 if (input
->entity
!= NULL
)
2237 input
->entity
->flags
&= ~XML_ENT_EXPANDING
;
2238 xmlFreeInputStream(input
);
2239 if (*ctxt
->input
->cur
== 0)
2240 xmlParserGrow(ctxt
);
2246 * @ctxt: an XML parser context
2247 * @input: an XML parser input fragment (entity, XML fragment ...).
2249 * xmlPushInput: switch to a new input stream which is stacked on top
2250 * of the previous one(s).
2251 * Returns -1 in case of error or the index in the input stack
2254 xmlPushInput(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
) {
2256 if (input
== NULL
) return(-1);
2258 if (xmlParserDebugEntities
) {
2259 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
2260 xmlGenericError(xmlGenericErrorContext
,
2261 "%s(%d): ", ctxt
->input
->filename
,
2263 xmlGenericError(xmlGenericErrorContext
,
2264 "Pushing input %d : %.30s\n", ctxt
->inputNr
+1, input
->cur
);
2266 if (((ctxt
->inputNr
> 40) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2267 (ctxt
->inputNr
> 100)) {
2268 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2269 while (ctxt
->inputNr
> 1)
2270 xmlFreeInputStream(inputPop(ctxt
));
2273 ret
= inputPush(ctxt
, input
);
2274 if (ctxt
->instate
== XML_PARSER_EOF
)
2282 * @ctxt: an XML parser context
2284 * DEPRECATED: Internal function, don't use.
2286 * Parse a numeric character reference. Always consumes '&'.
2288 * [66] CharRef ::= '&#' [0-9]+ ';' |
2289 * '&#x' [0-9a-fA-F]+ ';'
2291 * [ WFC: Legal Character ]
2292 * Characters referred to using character references must match the
2293 * production for Char.
2295 * Returns the value parsed (as an int), 0 in case of error
2298 xmlParseCharRef(xmlParserCtxtPtr ctxt
) {
2303 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2305 if ((RAW
== '&') && (NXT(1) == '#') &&
2309 while (RAW
!= ';') { /* loop blocked by count */
2313 if (ctxt
->instate
== XML_PARSER_EOF
)
2316 if ((RAW
>= '0') && (RAW
<= '9'))
2317 val
= val
* 16 + (CUR
- '0');
2318 else if ((RAW
>= 'a') && (RAW
<= 'f') && (count
< 20))
2319 val
= val
* 16 + (CUR
- 'a') + 10;
2320 else if ((RAW
>= 'A') && (RAW
<= 'F') && (count
< 20))
2321 val
= val
* 16 + (CUR
- 'A') + 10;
2323 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2334 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2338 } else if ((RAW
== '&') && (NXT(1) == '#')) {
2341 while (RAW
!= ';') { /* loop blocked by count */
2345 if (ctxt
->instate
== XML_PARSER_EOF
)
2348 if ((RAW
>= '0') && (RAW
<= '9'))
2349 val
= val
* 10 + (CUR
- '0');
2351 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2362 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2369 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2373 * [ WFC: Legal Character ]
2374 * Characters referred to using character references must match the
2375 * production for Char.
2377 if (val
>= 0x110000) {
2378 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2379 "xmlParseCharRef: character reference out of bounds\n",
2381 } else if (IS_CHAR(val
)) {
2384 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2385 "xmlParseCharRef: invalid xmlChar value %d\n",
2392 * xmlParseStringCharRef:
2393 * @ctxt: an XML parser context
2394 * @str: a pointer to an index in the string
2396 * parse Reference declarations, variant parsing from a string rather
2397 * than an an input flow.
2399 * [66] CharRef ::= '&#' [0-9]+ ';' |
2400 * '&#x' [0-9a-fA-F]+ ';'
2402 * [ WFC: Legal Character ]
2403 * Characters referred to using character references must match the
2404 * production for Char.
2406 * Returns the value parsed (as an int), 0 in case of error, str will be
2407 * updated to the current value of the index
2410 xmlParseStringCharRef(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
2415 if ((str
== NULL
) || (*str
== NULL
)) return(0);
2418 if ((cur
== '&') && (ptr
[1] == '#') && (ptr
[2] == 'x')) {
2421 while (cur
!= ';') { /* Non input consuming loop */
2422 if ((cur
>= '0') && (cur
<= '9'))
2423 val
= val
* 16 + (cur
- '0');
2424 else if ((cur
>= 'a') && (cur
<= 'f'))
2425 val
= val
* 16 + (cur
- 'a') + 10;
2426 else if ((cur
>= 'A') && (cur
<= 'F'))
2427 val
= val
* 16 + (cur
- 'A') + 10;
2429 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2441 } else if ((cur
== '&') && (ptr
[1] == '#')){
2444 while (cur
!= ';') { /* Non input consuming loops */
2445 if ((cur
>= '0') && (cur
<= '9'))
2446 val
= val
* 10 + (cur
- '0');
2448 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2461 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2467 * [ WFC: Legal Character ]
2468 * Characters referred to using character references must match the
2469 * production for Char.
2471 if (val
>= 0x110000) {
2472 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2473 "xmlParseStringCharRef: character reference out of bounds\n",
2475 } else if (IS_CHAR(val
)) {
2478 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2479 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2486 * xmlParserHandlePEReference:
2487 * @ctxt: the parser context
2489 * DEPRECATED: Internal function, do not use.
2491 * [69] PEReference ::= '%' Name ';'
2493 * [ WFC: No Recursion ]
2494 * A parsed entity must not contain a recursive
2495 * reference to itself, either directly or indirectly.
2497 * [ WFC: Entity Declared ]
2498 * In a document without any DTD, a document with only an internal DTD
2499 * subset which contains no parameter entity references, or a document
2500 * with "standalone='yes'", ... ... The declaration of a parameter
2501 * entity must precede any reference to it...
2503 * [ VC: Entity Declared ]
2504 * In a document with an external subset or external parameter entities
2505 * with "standalone='no'", ... ... The declaration of a parameter entity
2506 * must precede any reference to it...
2509 * Parameter-entity references may only appear in the DTD.
2510 * NOTE: misleading but this is handled.
2512 * A PEReference may have been detected in the current input stream
2513 * the handling is done accordingly to
2514 * http://www.w3.org/TR/REC-xml#entproc
2516 * - Included in literal in entity values
2517 * - Included as Parameter Entity reference within DTDs
2520 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt
) {
2521 switch(ctxt
->instate
) {
2522 case XML_PARSER_CDATA_SECTION
:
2524 case XML_PARSER_COMMENT
:
2526 case XML_PARSER_START_TAG
:
2528 case XML_PARSER_END_TAG
:
2530 case XML_PARSER_EOF
:
2531 xmlFatalErr(ctxt
, XML_ERR_PEREF_AT_EOF
, NULL
);
2533 case XML_PARSER_PROLOG
:
2534 case XML_PARSER_START
:
2535 case XML_PARSER_MISC
:
2536 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_PROLOG
, NULL
);
2538 case XML_PARSER_ENTITY_DECL
:
2539 case XML_PARSER_CONTENT
:
2540 case XML_PARSER_ATTRIBUTE_VALUE
:
2542 case XML_PARSER_SYSTEM_LITERAL
:
2543 case XML_PARSER_PUBLIC_LITERAL
:
2544 /* we just ignore it there */
2546 case XML_PARSER_EPILOG
:
2547 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_EPILOG
, NULL
);
2549 case XML_PARSER_ENTITY_VALUE
:
2551 * NOTE: in the case of entity values, we don't do the
2552 * substitution here since we need the literal
2553 * entity value to be able to save the internal
2554 * subset of the document.
2555 * This will be handled by xmlStringDecodeEntities
2558 case XML_PARSER_DTD
:
2560 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2561 * In the internal DTD subset, parameter-entity references
2562 * can occur only where markup declarations can occur, not
2563 * within markup declarations.
2564 * In that case this is handled in xmlParseMarkupDecl
2566 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
2568 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2571 case XML_PARSER_IGNORE
:
2575 xmlParsePEReference(ctxt
);
2579 * Macro used to grow the current buffer.
2580 * buffer##_size is expected to be a size_t
2581 * mem_error: is expected to handle memory allocation failures
2583 #define growBuffer(buffer, n) { \
2585 size_t new_size = buffer##_size * 2 + n; \
2586 if (new_size < buffer##_size) goto mem_error; \
2587 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2588 if (tmp == NULL) goto mem_error; \
2590 buffer##_size = new_size; \
2594 * xmlStringDecodeEntitiesInt:
2595 * @ctxt: the parser context
2596 * @str: the input string
2597 * @len: the string length
2598 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2599 * @end: an end marker xmlChar, 0 if none
2600 * @end2: an end marker xmlChar, 0 if none
2601 * @end3: an end marker xmlChar, 0 if none
2602 * @check: whether to perform entity checks
2605 xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2606 int what
, xmlChar end
, xmlChar end2
, xmlChar end3
,
2608 xmlChar
*buffer
= NULL
;
2609 size_t buffer_size
= 0;
2612 xmlChar
*current
= NULL
;
2613 xmlChar
*rep
= NULL
;
2614 const xmlChar
*last
;
2622 if (((ctxt
->depth
> 40) &&
2623 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2624 (ctxt
->depth
> 100)) {
2625 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_LOOP
,
2626 "Maximum entity nesting depth exceeded");
2631 * allocate a translation buffer.
2633 buffer_size
= XML_PARSER_BIG_BUFFER_SIZE
;
2634 buffer
= (xmlChar
*) xmlMallocAtomic(buffer_size
);
2635 if (buffer
== NULL
) goto mem_error
;
2638 * OK loop until we reach one of the ending char or a size limit.
2639 * we are operating on already parsed values.
2642 c
= CUR_SCHAR(str
, l
);
2645 while ((c
!= 0) && (c
!= end
) && /* non input consuming loop */
2646 (c
!= end2
) && (c
!= end3
) &&
2647 (ctxt
->instate
!= XML_PARSER_EOF
)) {
2650 if ((c
== '&') && (str
[1] == '#')) {
2651 int val
= xmlParseStringCharRef(ctxt
, &str
);
2654 COPY_BUF(0,buffer
,nbchars
,val
);
2655 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2656 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2658 } else if ((c
== '&') && (what
& XML_SUBSTITUTE_REF
)) {
2659 if (xmlParserDebugEntities
)
2660 xmlGenericError(xmlGenericErrorContext
,
2661 "String decoding Entity Reference: %.30s\n",
2663 ent
= xmlParseStringEntityRef(ctxt
, &str
);
2664 if ((ent
!= NULL
) &&
2665 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
2666 if (ent
->content
!= NULL
) {
2667 COPY_BUF(0,buffer
,nbchars
,ent
->content
[0]);
2668 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2669 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2672 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
2673 "predefined entity has no content\n");
2676 } else if ((ent
!= NULL
) && (ent
->content
!= NULL
)) {
2677 if ((check
) && (xmlParserEntityCheck(ctxt
, ent
->length
)))
2680 if (ent
->flags
& XML_ENT_EXPANDING
) {
2681 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2682 xmlHaltParser(ctxt
);
2683 ent
->content
[0] = 0;
2687 ent
->flags
|= XML_ENT_EXPANDING
;
2689 rep
= xmlStringDecodeEntitiesInt(ctxt
, ent
->content
,
2690 ent
->length
, what
, 0, 0, 0, check
);
2692 ent
->flags
&= ~XML_ENT_EXPANDING
;
2695 ent
->content
[0] = 0;
2700 while (*current
!= 0) { /* non input consuming loop */
2701 buffer
[nbchars
++] = *current
++;
2702 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2703 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2708 } else if (ent
!= NULL
) {
2709 int i
= xmlStrlen(ent
->name
);
2710 const xmlChar
*cur
= ent
->name
;
2712 buffer
[nbchars
++] = '&';
2713 if (nbchars
+ i
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2714 growBuffer(buffer
, i
+ XML_PARSER_BUFFER_SIZE
);
2717 buffer
[nbchars
++] = *cur
++;
2718 buffer
[nbchars
++] = ';';
2720 } else if (c
== '%' && (what
& XML_SUBSTITUTE_PEREF
)) {
2721 if (xmlParserDebugEntities
)
2722 xmlGenericError(xmlGenericErrorContext
,
2723 "String decoding PE Reference: %.30s\n", str
);
2724 ent
= xmlParseStringPEReference(ctxt
, &str
);
2726 if (ent
->content
== NULL
) {
2728 * Note: external parsed entities will not be loaded,
2729 * it is not required for a non-validating parser to
2730 * complete external PEReferences coming from the
2733 if (((ctxt
->options
& XML_PARSE_NOENT
) != 0) ||
2734 ((ctxt
->options
& XML_PARSE_DTDVALID
) != 0) ||
2735 (ctxt
->validate
!= 0)) {
2736 xmlLoadEntityContent(ctxt
, ent
);
2738 xmlWarningMsg(ctxt
, XML_ERR_ENTITY_PROCESSING
,
2739 "not validating will not read content for PE entity %s\n",
2744 if ((check
) && (xmlParserEntityCheck(ctxt
, ent
->length
)))
2747 if (ent
->flags
& XML_ENT_EXPANDING
) {
2748 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2749 xmlHaltParser(ctxt
);
2750 if (ent
->content
!= NULL
)
2751 ent
->content
[0] = 0;
2755 ent
->flags
|= XML_ENT_EXPANDING
;
2757 rep
= xmlStringDecodeEntitiesInt(ctxt
, ent
->content
,
2758 ent
->length
, what
, 0, 0, 0, check
);
2760 ent
->flags
&= ~XML_ENT_EXPANDING
;
2763 if (ent
->content
!= NULL
)
2764 ent
->content
[0] = 0;
2768 while (*current
!= 0) { /* non input consuming loop */
2769 buffer
[nbchars
++] = *current
++;
2770 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2771 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2778 COPY_BUF(l
,buffer
,nbchars
,c
);
2780 if (nbchars
+ XML_PARSER_BUFFER_SIZE
> buffer_size
) {
2781 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2785 c
= CUR_SCHAR(str
, l
);
2789 buffer
[nbchars
] = 0;
2793 xmlErrMemory(ctxt
, NULL
);
2803 * xmlStringLenDecodeEntities:
2804 * @ctxt: the parser context
2805 * @str: the input string
2806 * @len: the string length
2807 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2808 * @end: an end marker xmlChar, 0 if none
2809 * @end2: an end marker xmlChar, 0 if none
2810 * @end3: an end marker xmlChar, 0 if none
2812 * DEPRECATED: Internal function, don't use.
2814 * Takes a entity string content and process to do the adequate substitutions.
2816 * [67] Reference ::= EntityRef | CharRef
2818 * [69] PEReference ::= '%' Name ';'
2820 * Returns A newly allocated string with the substitution done. The caller
2821 * must deallocate it !
2824 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2825 int what
, xmlChar end
, xmlChar end2
,
2827 if ((ctxt
== NULL
) || (str
== NULL
) || (len
< 0))
2829 return(xmlStringDecodeEntitiesInt(ctxt
, str
, len
, what
,
2830 end
, end2
, end3
, 0));
2834 * xmlStringDecodeEntities:
2835 * @ctxt: the parser context
2836 * @str: the input string
2837 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2838 * @end: an end marker xmlChar, 0 if none
2839 * @end2: an end marker xmlChar, 0 if none
2840 * @end3: an end marker xmlChar, 0 if none
2842 * DEPRECATED: Internal function, don't use.
2844 * Takes a entity string content and process to do the adequate substitutions.
2846 * [67] Reference ::= EntityRef | CharRef
2848 * [69] PEReference ::= '%' Name ';'
2850 * Returns A newly allocated string with the substitution done. The caller
2851 * must deallocate it !
2854 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int what
,
2855 xmlChar end
, xmlChar end2
, xmlChar end3
) {
2856 if ((ctxt
== NULL
) || (str
== NULL
)) return(NULL
);
2857 return(xmlStringDecodeEntitiesInt(ctxt
, str
, xmlStrlen(str
), what
,
2858 end
, end2
, end3
, 0));
2861 /************************************************************************
2863 * Commodity functions, cleanup needed ? *
2865 ************************************************************************/
2869 * @ctxt: an XML parser context
2871 * @len: the size of @str
2872 * @blank_chars: we know the chars are blanks
2874 * Is this a sequence of blank chars that one can ignore ?
2876 * Returns 1 if ignorable 0 otherwise.
2879 static int areBlanks(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2882 xmlNodePtr lastChild
;
2885 * Don't spend time trying to differentiate them, the same callback is
2888 if (ctxt
->sax
->ignorableWhitespace
== ctxt
->sax
->characters
)
2892 * Check for xml:space value.
2894 if ((ctxt
->space
== NULL
) || (*(ctxt
->space
) == 1) ||
2895 (*(ctxt
->space
) == -2))
2899 * Check that the string is made of blanks
2901 if (blank_chars
== 0) {
2902 for (i
= 0;i
< len
;i
++)
2903 if (!(IS_BLANK_CH(str
[i
]))) return(0);
2907 * Look if the element is mixed content in the DTD if available
2909 if (ctxt
->node
== NULL
) return(0);
2910 if (ctxt
->myDoc
!= NULL
) {
2911 ret
= xmlIsMixedElement(ctxt
->myDoc
, ctxt
->node
->name
);
2912 if (ret
== 0) return(1);
2913 if (ret
== 1) return(0);
2917 * Otherwise, heuristic :-\
2919 if ((RAW
!= '<') && (RAW
!= 0xD)) return(0);
2920 if ((ctxt
->node
->children
== NULL
) &&
2921 (RAW
== '<') && (NXT(1) == '/')) return(0);
2923 lastChild
= xmlGetLastChild(ctxt
->node
);
2924 if (lastChild
== NULL
) {
2925 if ((ctxt
->node
->type
!= XML_ELEMENT_NODE
) &&
2926 (ctxt
->node
->content
!= NULL
)) return(0);
2927 } else if (xmlNodeIsText(lastChild
))
2929 else if ((ctxt
->node
->children
!= NULL
) &&
2930 (xmlNodeIsText(ctxt
->node
->children
)))
2935 /************************************************************************
2937 * Extra stuff for namespace support *
2938 * Relates to http://www.w3.org/TR/WD-xml-names *
2940 ************************************************************************/
2944 * @ctxt: an XML parser context
2945 * @name: an XML parser context
2946 * @prefix: a xmlChar **
2948 * parse an UTF8 encoded XML qualified name string
2950 * [NS 5] QName ::= (Prefix ':')? LocalPart
2952 * [NS 6] Prefix ::= NCName
2954 * [NS 7] LocalPart ::= NCName
2956 * Returns the local part, and prefix is updated
2957 * to get the Prefix if any.
2961 xmlSplitQName(xmlParserCtxtPtr ctxt
, const xmlChar
*name
, xmlChar
**prefix
) {
2962 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
2963 xmlChar
*buffer
= NULL
;
2965 int max
= XML_MAX_NAMELEN
;
2966 xmlChar
*ret
= NULL
;
2967 const xmlChar
*cur
= name
;
2970 if (prefix
== NULL
) return(NULL
);
2973 if (cur
== NULL
) return(NULL
);
2975 #ifndef XML_XML_NAMESPACE
2976 /* xml: prefix is not really a namespace */
2977 if ((cur
[0] == 'x') && (cur
[1] == 'm') &&
2978 (cur
[2] == 'l') && (cur
[3] == ':'))
2979 return(xmlStrdup(name
));
2982 /* nasty but well=formed */
2984 return(xmlStrdup(name
));
2987 while ((c
!= 0) && (c
!= ':') && (len
< max
)) { /* tested bigname.xml */
2993 * Okay someone managed to make a huge name, so he's ready to pay
2994 * for the processing speed.
2998 buffer
= (xmlChar
*) xmlMallocAtomic(max
);
2999 if (buffer
== NULL
) {
3000 xmlErrMemory(ctxt
, NULL
);
3003 memcpy(buffer
, buf
, len
);
3004 while ((c
!= 0) && (c
!= ':')) { /* tested bigname.xml */
3005 if (len
+ 10 > max
) {
3009 tmp
= (xmlChar
*) xmlRealloc(buffer
, max
);
3012 xmlErrMemory(ctxt
, NULL
);
3023 if ((c
== ':') && (*cur
== 0)) {
3027 return(xmlStrdup(name
));
3031 ret
= xmlStrndup(buf
, len
);
3035 max
= XML_MAX_NAMELEN
;
3043 return(xmlStrndup(BAD_CAST
"", 0));
3048 * Check that the first character is proper to start
3051 if (!(((c
>= 0x61) && (c
<= 0x7A)) ||
3052 ((c
>= 0x41) && (c
<= 0x5A)) ||
3053 (c
== '_') || (c
== ':'))) {
3055 int first
= CUR_SCHAR(cur
, l
);
3057 if (!IS_LETTER(first
) && (first
!= '_')) {
3058 xmlFatalErrMsgStr(ctxt
, XML_NS_ERR_QNAME
,
3059 "Name %s is not XML Namespace compliant\n",
3065 while ((c
!= 0) && (len
< max
)) { /* tested bigname2.xml */
3071 * Okay someone managed to make a huge name, so he's ready to pay
3072 * for the processing speed.
3076 buffer
= (xmlChar
*) xmlMallocAtomic(max
);
3077 if (buffer
== NULL
) {
3078 xmlErrMemory(ctxt
, NULL
);
3081 memcpy(buffer
, buf
, len
);
3082 while (c
!= 0) { /* tested bigname2.xml */
3083 if (len
+ 10 > max
) {
3087 tmp
= (xmlChar
*) xmlRealloc(buffer
, max
);
3089 xmlErrMemory(ctxt
, NULL
);
3102 ret
= xmlStrndup(buf
, len
);
3111 /************************************************************************
3113 * The parser itself *
3114 * Relates to http://www.w3.org/TR/REC-xml *
3116 ************************************************************************/
3118 /************************************************************************
3120 * Routines to parse Name, NCName and NmToken *
3122 ************************************************************************/
3124 static unsigned long nbParseName
= 0;
3125 static unsigned long nbParseNmToken
= 0;
3126 static unsigned long nbParseNCName
= 0;
3127 static unsigned long nbParseNCNameComplex
= 0;
3128 static unsigned long nbParseNameComplex
= 0;
3129 static unsigned long nbParseStringName
= 0;
3133 * The two following functions are related to the change of accepted
3134 * characters for Name and NmToken in the Revision 5 of XML-1.0
3135 * They correspond to the modified production [4] and the new production [4a]
3136 * changes in that revision. Also note that the macros used for the
3137 * productions Letter, Digit, CombiningChar and Extender are not needed
3139 * We still keep compatibility to pre-revision5 parsing semantic if the
3140 * new XML_PARSE_OLD10 option is given to the parser.
3143 xmlIsNameStartChar(xmlParserCtxtPtr ctxt
, int c
) {
3144 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3146 * Use the new checks of production [4] [4a] amd [5] of the
3147 * Update 5 of XML-1.0
3149 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3150 (((c
>= 'a') && (c
<= 'z')) ||
3151 ((c
>= 'A') && (c
<= 'Z')) ||
3152 (c
== '_') || (c
== ':') ||
3153 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3154 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3155 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3156 ((c
>= 0x370) && (c
<= 0x37D)) ||
3157 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3158 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3159 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3160 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3161 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3162 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3163 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3164 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3167 if (IS_LETTER(c
) || (c
== '_') || (c
== ':'))
3174 xmlIsNameChar(xmlParserCtxtPtr ctxt
, int c
) {
3175 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3177 * Use the new checks of production [4] [4a] amd [5] of the
3178 * Update 5 of XML-1.0
3180 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3181 (((c
>= 'a') && (c
<= 'z')) ||
3182 ((c
>= 'A') && (c
<= 'Z')) ||
3183 ((c
>= '0') && (c
<= '9')) || /* !start */
3184 (c
== '_') || (c
== ':') ||
3185 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3186 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3187 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3188 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3189 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3190 ((c
>= 0x370) && (c
<= 0x37D)) ||
3191 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3192 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3193 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3194 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3195 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3196 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3197 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3198 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3199 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3202 if ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3203 (c
== '.') || (c
== '-') ||
3204 (c
== '_') || (c
== ':') ||
3205 (IS_COMBINING(c
)) ||
3212 static xmlChar
* xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
,
3213 int *len
, int *alloc
, int normalize
);
3215 static const xmlChar
*
3216 xmlParseNameComplex(xmlParserCtxtPtr ctxt
) {
3219 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3220 XML_MAX_TEXT_LENGTH
:
3221 XML_MAX_NAME_LENGTH
;
3224 nbParseNameComplex
++;
3228 * Handler for more complex cases
3231 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3233 * Use the new checks of production [4] [4a] amd [5] of the
3234 * Update 5 of XML-1.0
3236 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3237 (!(((c
>= 'a') && (c
<= 'z')) ||
3238 ((c
>= 'A') && (c
<= 'Z')) ||
3239 (c
== '_') || (c
== ':') ||
3240 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3241 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3242 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3243 ((c
>= 0x370) && (c
<= 0x37D)) ||
3244 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3245 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3246 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3247 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3248 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3249 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3250 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3251 ((c
>= 0x10000) && (c
<= 0xEFFFF))))) {
3257 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3258 (((c
>= 'a') && (c
<= 'z')) ||
3259 ((c
>= 'A') && (c
<= 'Z')) ||
3260 ((c
>= '0') && (c
<= '9')) || /* !start */
3261 (c
== '_') || (c
== ':') ||
3262 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3263 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3264 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3265 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3266 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3267 ((c
>= 0x370) && (c
<= 0x37D)) ||
3268 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3269 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3270 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3271 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3272 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3273 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3274 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3275 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3276 ((c
>= 0x10000) && (c
<= 0xEFFFF))
3278 if (len
<= INT_MAX
- l
)
3284 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3285 (!IS_LETTER(c
) && (c
!= '_') &&
3293 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3294 ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3295 (c
== '.') || (c
== '-') ||
3296 (c
== '_') || (c
== ':') ||
3297 (IS_COMBINING(c
)) ||
3298 (IS_EXTENDER(c
)))) {
3299 if (len
<= INT_MAX
- l
)
3305 if (ctxt
->instate
== XML_PARSER_EOF
)
3307 if (len
> maxLength
) {
3308 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3311 if (ctxt
->input
->cur
- ctxt
->input
->base
< len
) {
3313 * There were a couple of bugs where PERefs lead to to a change
3314 * of the buffer. Check the buffer size to avoid passing an invalid
3315 * pointer to xmlDictLookup.
3317 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
3318 "unexpected change of input buffer");
3321 if ((*ctxt
->input
->cur
== '\n') && (ctxt
->input
->cur
[-1] == '\r'))
3322 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- (len
+ 1), len
));
3323 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- len
, len
));
3328 * @ctxt: an XML parser context
3330 * DEPRECATED: Internal function, don't use.
3332 * parse an XML name.
3334 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3335 * CombiningChar | Extender
3337 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3339 * [6] Names ::= Name (#x20 Name)*
3341 * Returns the Name parsed or NULL
3345 xmlParseName(xmlParserCtxtPtr ctxt
) {
3349 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3350 XML_MAX_TEXT_LENGTH
:
3351 XML_MAX_NAME_LENGTH
;
3354 if (ctxt
->instate
== XML_PARSER_EOF
)
3362 * Accelerator for simple ASCII names
3364 in
= ctxt
->input
->cur
;
3365 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3366 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3367 (*in
== '_') || (*in
== ':')) {
3369 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3370 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3371 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3372 (*in
== '_') || (*in
== '-') ||
3373 (*in
== ':') || (*in
== '.'))
3375 if ((*in
> 0) && (*in
< 0x80)) {
3376 count
= in
- ctxt
->input
->cur
;
3377 if (count
> maxLength
) {
3378 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Name");
3381 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3382 ctxt
->input
->cur
= in
;
3383 ctxt
->input
->col
+= count
;
3385 xmlErrMemory(ctxt
, NULL
);
3389 /* accelerator for special cases */
3390 return(xmlParseNameComplex(ctxt
));
3393 static const xmlChar
*
3394 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt
) {
3397 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3398 XML_MAX_TEXT_LENGTH
:
3399 XML_MAX_NAME_LENGTH
;
3400 size_t startPosition
= 0;
3403 nbParseNCNameComplex
++;
3407 * Handler for more complex cases
3409 startPosition
= CUR_PTR
- BASE_PTR
;
3411 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3412 (!xmlIsNameStartChar(ctxt
, c
) || (c
== ':'))) {
3416 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3417 (xmlIsNameChar(ctxt
, c
) && (c
!= ':'))) {
3418 if (len
<= INT_MAX
- l
)
3423 if (ctxt
->instate
== XML_PARSER_EOF
)
3425 if (len
> maxLength
) {
3426 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3429 return(xmlDictLookup(ctxt
->dict
, (BASE_PTR
+ startPosition
), len
));
3434 * @ctxt: an XML parser context
3435 * @len: length of the string parsed
3437 * parse an XML name.
3439 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3440 * CombiningChar | Extender
3442 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3444 * Returns the Name parsed or NULL
3447 static const xmlChar
*
3448 xmlParseNCName(xmlParserCtxtPtr ctxt
) {
3449 const xmlChar
*in
, *e
;
3452 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3453 XML_MAX_TEXT_LENGTH
:
3454 XML_MAX_NAME_LENGTH
;
3461 * Accelerator for simple ASCII names
3463 in
= ctxt
->input
->cur
;
3464 e
= ctxt
->input
->end
;
3465 if ((((*in
>= 0x61) && (*in
<= 0x7A)) ||
3466 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3467 (*in
== '_')) && (in
< e
)) {
3469 while ((((*in
>= 0x61) && (*in
<= 0x7A)) ||
3470 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3471 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3472 (*in
== '_') || (*in
== '-') ||
3473 (*in
== '.')) && (in
< e
))
3477 if ((*in
> 0) && (*in
< 0x80)) {
3478 count
= in
- ctxt
->input
->cur
;
3479 if (count
> maxLength
) {
3480 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3483 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3484 ctxt
->input
->cur
= in
;
3485 ctxt
->input
->col
+= count
;
3487 xmlErrMemory(ctxt
, NULL
);
3493 return(xmlParseNCNameComplex(ctxt
));
3497 * xmlParseNameAndCompare:
3498 * @ctxt: an XML parser context
3500 * parse an XML name and compares for match
3501 * (specialized for endtag parsing)
3503 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3504 * and the name for mismatch
3507 static const xmlChar
*
3508 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *other
) {
3509 register const xmlChar
*cmp
= other
;
3510 register const xmlChar
*in
;
3514 if (ctxt
->instate
== XML_PARSER_EOF
)
3517 in
= ctxt
->input
->cur
;
3518 while (*in
!= 0 && *in
== *cmp
) {
3522 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
3524 ctxt
->input
->col
+= in
- ctxt
->input
->cur
;
3525 ctxt
->input
->cur
= in
;
3526 return (const xmlChar
*) 1;
3528 /* failure (or end of input buffer), check with full function */
3529 ret
= xmlParseName (ctxt
);
3530 /* strings coming from the dictionary direct compare possible */
3532 return (const xmlChar
*) 1;
3538 * xmlParseStringName:
3539 * @ctxt: an XML parser context
3540 * @str: a pointer to the string pointer (IN/OUT)
3542 * parse an XML name.
3544 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3545 * CombiningChar | Extender
3547 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3549 * [6] Names ::= Name (#x20 Name)*
3551 * Returns the Name parsed or NULL. The @str pointer
3552 * is updated to the current location in the string.
3556 xmlParseStringName(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
3557 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3558 const xmlChar
*cur
= *str
;
3561 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3562 XML_MAX_TEXT_LENGTH
:
3563 XML_MAX_NAME_LENGTH
;
3566 nbParseStringName
++;
3569 c
= CUR_SCHAR(cur
, l
);
3570 if (!xmlIsNameStartChar(ctxt
, c
)) {
3574 COPY_BUF(l
,buf
,len
,c
);
3576 c
= CUR_SCHAR(cur
, l
);
3577 while (xmlIsNameChar(ctxt
, c
)) {
3578 COPY_BUF(l
,buf
,len
,c
);
3580 c
= CUR_SCHAR(cur
, l
);
3581 if (len
>= XML_MAX_NAMELEN
) { /* test bigentname.xml */
3583 * Okay someone managed to make a huge name, so he's ready to pay
3584 * for the processing speed.
3589 buffer
= (xmlChar
*) xmlMallocAtomic(max
);
3590 if (buffer
== NULL
) {
3591 xmlErrMemory(ctxt
, NULL
);
3594 memcpy(buffer
, buf
, len
);
3595 while (xmlIsNameChar(ctxt
, c
)) {
3596 if (len
+ 10 > max
) {
3600 tmp
= (xmlChar
*) xmlRealloc(buffer
, max
);
3602 xmlErrMemory(ctxt
, NULL
);
3608 COPY_BUF(l
,buffer
,len
,c
);
3610 c
= CUR_SCHAR(cur
, l
);
3611 if (len
> maxLength
) {
3612 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3622 if (len
> maxLength
) {
3623 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NCName");
3627 return(xmlStrndup(buf
, len
));
3632 * @ctxt: an XML parser context
3634 * DEPRECATED: Internal function, don't use.
3636 * parse an XML Nmtoken.
3638 * [7] Nmtoken ::= (NameChar)+
3640 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3642 * Returns the Nmtoken parsed or NULL
3646 xmlParseNmtoken(xmlParserCtxtPtr ctxt
) {
3647 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3650 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3651 XML_MAX_TEXT_LENGTH
:
3652 XML_MAX_NAME_LENGTH
;
3660 while (xmlIsNameChar(ctxt
, c
)) {
3661 COPY_BUF(l
,buf
,len
,c
);
3664 if (len
>= XML_MAX_NAMELEN
) {
3666 * Okay someone managed to make a huge token, so he's ready to pay
3667 * for the processing speed.
3672 buffer
= (xmlChar
*) xmlMallocAtomic(max
);
3673 if (buffer
== NULL
) {
3674 xmlErrMemory(ctxt
, NULL
);
3677 memcpy(buffer
, buf
, len
);
3678 while (xmlIsNameChar(ctxt
, c
)) {
3679 if (len
+ 10 > max
) {
3683 tmp
= (xmlChar
*) xmlRealloc(buffer
, max
);
3685 xmlErrMemory(ctxt
, NULL
);
3691 COPY_BUF(l
,buffer
,len
,c
);
3692 if (len
> maxLength
) {
3693 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3701 if (ctxt
->instate
== XML_PARSER_EOF
) {
3708 if (ctxt
->instate
== XML_PARSER_EOF
)
3712 if (len
> maxLength
) {
3713 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "NmToken");
3716 return(xmlStrndup(buf
, len
));
3720 * xmlParseEntityValue:
3721 * @ctxt: an XML parser context
3722 * @orig: if non-NULL store a copy of the original entity value
3724 * DEPRECATED: Internal function, don't use.
3726 * parse a value for ENTITY declarations
3728 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3729 * "'" ([^%&'] | PEReference | Reference)* "'"
3731 * Returns the EntityValue parsed with reference substituted or NULL
3735 xmlParseEntityValue(xmlParserCtxtPtr ctxt
, xmlChar
**orig
) {
3736 xmlChar
*buf
= NULL
;
3738 int size
= XML_PARSER_BUFFER_SIZE
;
3740 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3741 XML_MAX_HUGE_LENGTH
:
3742 XML_MAX_TEXT_LENGTH
;
3744 xmlChar
*ret
= NULL
;
3745 const xmlChar
*cur
= NULL
;
3746 xmlParserInputPtr input
;
3748 if (RAW
== '"') stop
= '"';
3749 else if (RAW
== '\'') stop
= '\'';
3751 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_STARTED
, NULL
);
3754 buf
= (xmlChar
*) xmlMallocAtomic(size
);
3756 xmlErrMemory(ctxt
, NULL
);
3761 * The content of the entity definition is copied in a buffer.
3764 ctxt
->instate
= XML_PARSER_ENTITY_VALUE
;
3765 input
= ctxt
->input
;
3767 if (ctxt
->instate
== XML_PARSER_EOF
)
3772 * NOTE: 4.4.5 Included in Literal
3773 * When a parameter entity reference appears in a literal entity
3774 * value, ... a single or double quote character in the replacement
3775 * text is always treated as a normal data character and will not
3776 * terminate the literal.
3777 * In practice it means we stop the loop only when back at parsing
3778 * the initial entity and the quote is found
3780 while (((IS_CHAR(c
)) && ((c
!= stop
) || /* checked */
3781 (ctxt
->input
!= input
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
3782 if (len
+ 5 >= size
) {
3786 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
3788 xmlErrMemory(ctxt
, NULL
);
3793 COPY_BUF(l
,buf
,len
,c
);
3803 if (len
> maxLength
) {
3804 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
3805 "entity value too long\n");
3810 if (ctxt
->instate
== XML_PARSER_EOF
)
3813 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
, NULL
);
3819 * Raise problem w.r.t. '&' and '%' being used in non-entities
3820 * reference constructs. Note Charref will be handled in
3821 * xmlStringDecodeEntities()
3824 while (*cur
!= 0) { /* non input consuming */
3825 if ((*cur
== '%') || ((*cur
== '&') && (cur
[1] != '#'))) {
3831 name
= xmlParseStringName(ctxt
, &cur
);
3836 if ((nameOk
== 0) || (*cur
!= ';')) {
3837 xmlFatalErrMsgInt(ctxt
, XML_ERR_ENTITY_CHAR_ERROR
,
3838 "EntityValue: '%c' forbidden except for entities references\n",
3842 if ((tmp
== '%') && (ctxt
->inSubset
== 1) &&
3843 (ctxt
->inputNr
== 1)) {
3844 xmlFatalErr(ctxt
, XML_ERR_ENTITY_PE_INTERNAL
, NULL
);
3854 * Then PEReference entities are substituted.
3856 * NOTE: 4.4.7 Bypassed
3857 * When a general entity reference appears in the EntityValue in
3858 * an entity declaration, it is bypassed and left as is.
3859 * so XML_SUBSTITUTE_REF is not set here.
3862 ret
= xmlStringDecodeEntitiesInt(ctxt
, buf
, len
, XML_SUBSTITUTE_PEREF
,
3863 0, 0, 0, /* check */ 1);
3878 * xmlParseAttValueComplex:
3879 * @ctxt: an XML parser context
3880 * @len: the resulting attribute len
3881 * @normalize: whether to apply the inner normalization
3883 * parse a value for an attribute, this is the fallback function
3884 * of xmlParseAttValue() when the attribute parsing requires handling
3885 * of non-ASCII characters, or normalization compaction.
3887 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3890 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt
, int *attlen
, int normalize
) {
3892 xmlChar
*buf
= NULL
;
3893 xmlChar
*rep
= NULL
;
3895 size_t buf_size
= 0;
3896 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
3897 XML_MAX_HUGE_LENGTH
:
3898 XML_MAX_TEXT_LENGTH
;
3899 int c
, l
, in_space
= 0;
3900 xmlChar
*current
= NULL
;
3903 if (NXT(0) == '"') {
3904 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3907 } else if (NXT(0) == '\'') {
3909 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3912 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
3917 * allocate a translation buffer.
3919 buf_size
= XML_PARSER_BUFFER_SIZE
;
3920 buf
= (xmlChar
*) xmlMallocAtomic(buf_size
);
3921 if (buf
== NULL
) goto mem_error
;
3924 * OK loop until we reach one of the ending char or a size limit.
3927 while (((NXT(0) != limit
) && /* checked */
3928 (IS_CHAR(c
)) && (c
!= '<')) &&
3929 (ctxt
->instate
!= XML_PARSER_EOF
)) {
3932 if (NXT(1) == '#') {
3933 int val
= xmlParseCharRef(ctxt
);
3936 if (ctxt
->replaceEntities
) {
3937 if (len
+ 10 > buf_size
) {
3938 growBuffer(buf
, 10);
3943 * The reparsing will be done in xmlStringGetNodeList()
3944 * called by the attribute() function in SAX.c
3946 if (len
+ 10 > buf_size
) {
3947 growBuffer(buf
, 10);
3955 } else if (val
!= 0) {
3956 if (len
+ 10 > buf_size
) {
3957 growBuffer(buf
, 10);
3959 len
+= xmlCopyChar(0, &buf
[len
], val
);
3962 ent
= xmlParseEntityRef(ctxt
);
3963 if ((ent
!= NULL
) &&
3964 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
3965 if (len
+ 10 > buf_size
) {
3966 growBuffer(buf
, 10);
3968 if ((ctxt
->replaceEntities
== 0) &&
3969 (ent
->content
[0] == '&')) {
3976 buf
[len
++] = ent
->content
[0];
3978 } else if ((ent
!= NULL
) &&
3979 (ctxt
->replaceEntities
!= 0)) {
3980 if (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) {
3981 if (xmlParserEntityCheck(ctxt
, ent
->length
))
3985 rep
= xmlStringDecodeEntitiesInt(ctxt
, ent
->content
,
3986 ent
->length
, XML_SUBSTITUTE_REF
, 0, 0, 0,
3991 while (*current
!= 0) { /* non input consuming */
3992 if ((*current
== 0xD) || (*current
== 0xA) ||
3993 (*current
== 0x9)) {
3997 buf
[len
++] = *current
++;
3998 if (len
+ 10 > buf_size
) {
3999 growBuffer(buf
, 10);
4006 if (len
+ 10 > buf_size
) {
4007 growBuffer(buf
, 10);
4009 if (ent
->content
!= NULL
)
4010 buf
[len
++] = ent
->content
[0];
4012 } else if (ent
!= NULL
) {
4013 int i
= xmlStrlen(ent
->name
);
4014 const xmlChar
*cur
= ent
->name
;
4017 * We also check for recursion and amplification
4018 * when entities are not substituted. They're
4019 * often expanded later.
4021 if ((ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
4022 (ent
->content
!= NULL
)) {
4023 if ((ent
->flags
& XML_ENT_CHECKED
) == 0) {
4024 unsigned long oldCopy
= ctxt
->sizeentcopy
;
4026 ctxt
->sizeentcopy
= ent
->length
;
4029 rep
= xmlStringDecodeEntitiesInt(ctxt
,
4030 ent
->content
, ent
->length
,
4031 XML_SUBSTITUTE_REF
, 0, 0, 0,
4036 * If we're parsing DTD content, the entity
4037 * might reference other entities which
4038 * weren't defined yet, so the check isn't
4041 if (ctxt
->inSubset
== 0) {
4042 ent
->flags
|= XML_ENT_CHECKED
;
4043 ent
->expandedSize
= ctxt
->sizeentcopy
;
4050 ent
->content
[0] = 0;
4053 if (xmlParserEntityCheck(ctxt
, oldCopy
))
4056 if (xmlParserEntityCheck(ctxt
, ent
->expandedSize
))
4062 * Just output the reference
4065 while (len
+ i
+ 10 > buf_size
) {
4066 growBuffer(buf
, i
+ 10);
4069 buf
[len
++] = *cur
++;
4074 if ((c
== 0x20) || (c
== 0xD) || (c
== 0xA) || (c
== 0x9)) {
4075 if ((len
!= 0) || (!normalize
)) {
4076 if ((!normalize
) || (!in_space
)) {
4077 COPY_BUF(l
,buf
,len
,0x20);
4078 while (len
+ 10 > buf_size
) {
4079 growBuffer(buf
, 10);
4086 COPY_BUF(l
,buf
,len
,c
);
4087 if (len
+ 10 > buf_size
) {
4088 growBuffer(buf
, 10);
4095 if (len
> maxLength
) {
4096 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4097 "AttValue length too long\n");
4101 if (ctxt
->instate
== XML_PARSER_EOF
)
4104 if ((in_space
) && (normalize
)) {
4105 while ((len
> 0) && (buf
[len
- 1] == 0x20)) len
--;
4109 xmlFatalErr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
, NULL
);
4110 } else if (RAW
!= limit
) {
4111 if ((c
!= 0) && (!IS_CHAR(c
))) {
4112 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_CHAR
,
4113 "invalid character in attribute value\n");
4115 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
4116 "AttValue: ' expected\n");
4121 if (attlen
!= NULL
) *attlen
= len
;
4125 xmlErrMemory(ctxt
, NULL
);
4136 * @ctxt: an XML parser context
4138 * DEPRECATED: Internal function, don't use.
4140 * parse a value for an attribute
4141 * Note: the parser won't do substitution of entities here, this
4142 * will be handled later in xmlStringGetNodeList
4144 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4145 * "'" ([^<&'] | Reference)* "'"
4147 * 3.3.3 Attribute-Value Normalization:
4148 * Before the value of an attribute is passed to the application or
4149 * checked for validity, the XML processor must normalize it as follows:
4150 * - a character reference is processed by appending the referenced
4151 * character to the attribute value
4152 * - an entity reference is processed by recursively processing the
4153 * replacement text of the entity
4154 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4155 * appending #x20 to the normalized value, except that only a single
4156 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4157 * parsed entity or the literal entity value of an internal parsed entity
4158 * - other characters are processed by appending them to the normalized value
4159 * If the declared value is not CDATA, then the XML processor must further
4160 * process the normalized attribute value by discarding any leading and
4161 * trailing space (#x20) characters, and by replacing sequences of space
4162 * (#x20) characters by a single space (#x20) character.
4163 * All attributes for which no declaration has been read should be treated
4164 * by a non-validating parser as if declared CDATA.
4166 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4171 xmlParseAttValue(xmlParserCtxtPtr ctxt
) {
4172 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
)) return(NULL
);
4173 return(xmlParseAttValueInternal(ctxt
, NULL
, NULL
, 0));
4177 * xmlParseSystemLiteral:
4178 * @ctxt: an XML parser context
4180 * DEPRECATED: Internal function, don't use.
4182 * parse an XML Literal
4184 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4186 * Returns the SystemLiteral parsed or NULL
4190 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt
) {
4191 xmlChar
*buf
= NULL
;
4193 int size
= XML_PARSER_BUFFER_SIZE
;
4195 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4196 XML_MAX_TEXT_LENGTH
:
4197 XML_MAX_NAME_LENGTH
;
4199 int state
= ctxt
->instate
;
4204 } else if (RAW
== '\'') {
4208 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4212 buf
= (xmlChar
*) xmlMallocAtomic(size
);
4214 xmlErrMemory(ctxt
, NULL
);
4217 ctxt
->instate
= XML_PARSER_SYSTEM_LITERAL
;
4219 while ((IS_CHAR(cur
)) && (cur
!= stop
)) { /* checked */
4220 if (len
+ 5 >= size
) {
4224 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
4227 xmlErrMemory(ctxt
, NULL
);
4228 ctxt
->instate
= (xmlParserInputState
) state
;
4233 COPY_BUF(l
,buf
,len
,cur
);
4234 if (len
> maxLength
) {
4235 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "SystemLiteral");
4237 ctxt
->instate
= (xmlParserInputState
) state
;
4244 if (ctxt
->instate
== XML_PARSER_EOF
) {
4248 ctxt
->instate
= (xmlParserInputState
) state
;
4249 if (!IS_CHAR(cur
)) {
4250 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4258 * xmlParsePubidLiteral:
4259 * @ctxt: an XML parser context
4261 * DEPRECATED: Internal function, don't use.
4263 * parse an XML public literal
4265 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4267 * Returns the PubidLiteral parsed or NULL.
4271 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt
) {
4272 xmlChar
*buf
= NULL
;
4274 int size
= XML_PARSER_BUFFER_SIZE
;
4275 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4276 XML_MAX_TEXT_LENGTH
:
4277 XML_MAX_NAME_LENGTH
;
4280 xmlParserInputState oldstate
= ctxt
->instate
;
4285 } else if (RAW
== '\'') {
4289 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
4292 buf
= (xmlChar
*) xmlMallocAtomic(size
);
4294 xmlErrMemory(ctxt
, NULL
);
4297 ctxt
->instate
= XML_PARSER_PUBLIC_LITERAL
;
4299 while ((IS_PUBIDCHAR_CH(cur
)) && (cur
!= stop
)) { /* checked */
4300 if (len
+ 1 >= size
) {
4304 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
4306 xmlErrMemory(ctxt
, NULL
);
4313 if (len
> maxLength
) {
4314 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "Public ID");
4322 if (ctxt
->instate
== XML_PARSER_EOF
) {
4327 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4331 ctxt
->instate
= oldstate
;
4335 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int partial
);
4338 * used for the test in the inner loop of the char data testing
4340 static const unsigned char test_char_data
[256] = {
4341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4343 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4346 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4347 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4348 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4349 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4350 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4351 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4352 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4353 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4354 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4355 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4356 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4376 * xmlParseCharDataInternal:
4377 * @ctxt: an XML parser context
4378 * @partial: buffer may contain partial UTF-8 sequences
4380 * Parse character data. Always makes progress if the first char isn't
4383 * The right angle bracket (>) may be represented using the string ">",
4384 * and must, for compatibility, be escaped using ">" or a character
4385 * reference when it appears in the string "]]>" in content, when that
4386 * string is not marking the end of a CDATA section.
4388 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4391 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt
, int partial
) {
4394 int line
= ctxt
->input
->line
;
4395 int col
= ctxt
->input
->col
;
4400 * Accelerated common case where input don't need to be
4401 * modified before passing it to the handler.
4403 in
= ctxt
->input
->cur
;
4406 while (*in
== 0x20) { in
++; ctxt
->input
->col
++; }
4409 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4411 } while (*in
== 0xA);
4412 goto get_more_space
;
4415 nbchar
= in
- ctxt
->input
->cur
;
4417 const xmlChar
*tmp
= ctxt
->input
->cur
;
4418 ctxt
->input
->cur
= in
;
4420 if ((ctxt
->sax
!= NULL
) &&
4421 (ctxt
->sax
->ignorableWhitespace
!=
4422 ctxt
->sax
->characters
)) {
4423 if (areBlanks(ctxt
, tmp
, nbchar
, 1)) {
4424 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4425 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4428 if (ctxt
->sax
->characters
!= NULL
)
4429 ctxt
->sax
->characters(ctxt
->userData
,
4431 if (*ctxt
->space
== -1)
4434 } else if ((ctxt
->sax
!= NULL
) &&
4435 (ctxt
->sax
->characters
!= NULL
)) {
4436 ctxt
->sax
->characters(ctxt
->userData
,
4444 ccol
= ctxt
->input
->col
;
4445 while (test_char_data
[*in
]) {
4449 ctxt
->input
->col
= ccol
;
4452 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4454 } while (*in
== 0xA);
4458 if ((in
[1] == ']') && (in
[2] == '>')) {
4459 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4460 if (ctxt
->instate
!= XML_PARSER_EOF
)
4461 ctxt
->input
->cur
= in
+ 1;
4468 nbchar
= in
- ctxt
->input
->cur
;
4470 if ((ctxt
->sax
!= NULL
) &&
4471 (ctxt
->sax
->ignorableWhitespace
!=
4472 ctxt
->sax
->characters
) &&
4473 (IS_BLANK_CH(*ctxt
->input
->cur
))) {
4474 const xmlChar
*tmp
= ctxt
->input
->cur
;
4475 ctxt
->input
->cur
= in
;
4477 if (areBlanks(ctxt
, tmp
, nbchar
, 0)) {
4478 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4479 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4482 if (ctxt
->sax
->characters
!= NULL
)
4483 ctxt
->sax
->characters(ctxt
->userData
,
4485 if (*ctxt
->space
== -1)
4488 line
= ctxt
->input
->line
;
4489 col
= ctxt
->input
->col
;
4490 } else if (ctxt
->sax
!= NULL
) {
4491 if (ctxt
->sax
->characters
!= NULL
)
4492 ctxt
->sax
->characters(ctxt
->userData
,
4493 ctxt
->input
->cur
, nbchar
);
4494 line
= ctxt
->input
->line
;
4495 col
= ctxt
->input
->col
;
4498 ctxt
->input
->cur
= in
;
4502 ctxt
->input
->cur
= in
;
4504 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4505 continue; /* while */
4517 if (ctxt
->instate
== XML_PARSER_EOF
)
4519 in
= ctxt
->input
->cur
;
4520 } while (((*in
>= 0x20) && (*in
<= 0x7F)) ||
4521 (*in
== 0x09) || (*in
== 0x0a));
4522 ctxt
->input
->line
= line
;
4523 ctxt
->input
->col
= col
;
4524 xmlParseCharDataComplex(ctxt
, partial
);
4528 * xmlParseCharDataComplex:
4529 * @ctxt: an XML parser context
4530 * @cdata: int indicating whether we are within a CDATA section
4532 * Always makes progress if the first char isn't '<' or '&'.
4534 * parse a CharData section.this is the fallback function
4535 * of xmlParseCharData() when the parsing requires handling
4536 * of non-ASCII characters.
4539 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int partial
) {
4540 xmlChar buf
[XML_PARSER_BIG_BUFFER_SIZE
+ 5];
4545 while ((cur
!= '<') && /* checked */
4547 (IS_CHAR(cur
))) /* test also done in xmlCurrentChar() */ {
4548 if ((cur
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4549 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4551 COPY_BUF(l
,buf
,nbchar
,cur
);
4552 /* move current position before possible calling of ctxt->sax->characters */
4554 if (nbchar
>= XML_PARSER_BIG_BUFFER_SIZE
) {
4558 * OK the segment is to be consumed as chars.
4560 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4561 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4562 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4563 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4566 if (ctxt
->sax
->characters
!= NULL
)
4567 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4568 if ((ctxt
->sax
->characters
!=
4569 ctxt
->sax
->ignorableWhitespace
) &&
4570 (*ctxt
->space
== -1))
4575 /* something really bad happened in the SAX callback */
4576 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4582 if (ctxt
->instate
== XML_PARSER_EOF
)
4587 * OK the segment is to be consumed as chars.
4589 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4590 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4591 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4592 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
, buf
, nbchar
);
4594 if (ctxt
->sax
->characters
!= NULL
)
4595 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4596 if ((ctxt
->sax
->characters
!= ctxt
->sax
->ignorableWhitespace
) &&
4597 (*ctxt
->space
== -1))
4605 * - XML_PARSER_EOF or memory error. This is checked above.
4606 * - An actual 0 character.
4608 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4610 if (ctxt
->input
->cur
< ctxt
->input
->end
) {
4611 if ((cur
== 0) && (CUR
!= 0)) {
4613 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4614 "Incomplete UTF-8 sequence starting with %02X\n", CUR
);
4617 } else if ((cur
!= '<') && (cur
!= '&')) {
4618 /* Generate the error and skip the offending character */
4619 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4620 "PCDATA invalid Char value %d\n", cur
);
4628 * @ctxt: an XML parser context
4631 * DEPRECATED: Internal function, don't use.
4634 xmlParseCharData(xmlParserCtxtPtr ctxt
, ATTRIBUTE_UNUSED
int cdata
) {
4635 xmlParseCharDataInternal(ctxt
, 0);
4639 * xmlParseExternalID:
4640 * @ctxt: an XML parser context
4641 * @publicID: a xmlChar** receiving PubidLiteral
4642 * @strict: indicate whether we should restrict parsing to only
4643 * production [75], see NOTE below
4645 * DEPRECATED: Internal function, don't use.
4647 * Parse an External ID or a Public ID
4649 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4650 * 'PUBLIC' S PubidLiteral S SystemLiteral
4652 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4653 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4655 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4657 * Returns the function returns SystemLiteral and in the second
4658 * case publicID receives PubidLiteral, is strict is off
4659 * it is possible to return NULL and have publicID set.
4663 xmlParseExternalID(xmlParserCtxtPtr ctxt
, xmlChar
**publicID
, int strict
) {
4664 xmlChar
*URI
= NULL
;
4667 if (CMP6(CUR_PTR
, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4669 if (SKIP_BLANKS
== 0) {
4670 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4671 "Space required after 'SYSTEM'\n");
4673 URI
= xmlParseSystemLiteral(ctxt
);
4675 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4677 } else if (CMP6(CUR_PTR
, 'P', 'U', 'B', 'L', 'I', 'C')) {
4679 if (SKIP_BLANKS
== 0) {
4680 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4681 "Space required after 'PUBLIC'\n");
4683 *publicID
= xmlParsePubidLiteral(ctxt
);
4684 if (*publicID
== NULL
) {
4685 xmlFatalErr(ctxt
, XML_ERR_PUBID_REQUIRED
, NULL
);
4689 * We don't handle [83] so "S SystemLiteral" is required.
4691 if (SKIP_BLANKS
== 0) {
4692 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4693 "Space required after the Public Identifier\n");
4697 * We handle [83] so we return immediately, if
4698 * "S SystemLiteral" is not detected. We skip blanks if no
4699 * system literal was found, but this is harmless since we must
4700 * be at the end of a NotationDecl.
4702 if (SKIP_BLANKS
== 0) return(NULL
);
4703 if ((CUR
!= '\'') && (CUR
!= '"')) return(NULL
);
4705 URI
= xmlParseSystemLiteral(ctxt
);
4707 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4714 * xmlParseCommentComplex:
4715 * @ctxt: an XML parser context
4716 * @buf: the already parsed part of the buffer
4717 * @len: number of bytes in the buffer
4718 * @size: allocated size of the buffer
4720 * Skip an XML (SGML) comment <!-- .... -->
4721 * The spec says that "For compatibility, the string "--" (double-hyphen)
4722 * must not occur within comments. "
4723 * This is the slow routine in case the accelerator for ascii didn't work
4725 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4728 xmlParseCommentComplex(xmlParserCtxtPtr ctxt
, xmlChar
*buf
,
4729 size_t len
, size_t size
) {
4733 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4734 XML_MAX_HUGE_LENGTH
:
4735 XML_MAX_TEXT_LENGTH
;
4738 inputid
= ctxt
->input
->id
;
4742 size
= XML_PARSER_BUFFER_SIZE
;
4743 buf
= (xmlChar
*) xmlMallocAtomic(size
);
4745 xmlErrMemory(ctxt
, NULL
);
4751 goto not_terminated
;
4753 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4754 "xmlParseComment: invalid xmlChar value %d\n",
4762 goto not_terminated
;
4764 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4765 "xmlParseComment: invalid xmlChar value %d\n",
4773 goto not_terminated
;
4774 while (IS_CHAR(cur
) && /* checked */
4776 (r
!= '-') || (q
!= '-'))) {
4777 if ((r
== '-') && (q
== '-')) {
4778 xmlFatalErr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
, NULL
);
4780 if (len
+ 5 >= size
) {
4784 new_size
= size
* 2;
4785 new_buf
= (xmlChar
*) xmlRealloc(buf
, new_size
);
4786 if (new_buf
== NULL
) {
4788 xmlErrMemory(ctxt
, NULL
);
4794 COPY_BUF(ql
,buf
,len
,q
);
4795 if (len
> maxLength
) {
4796 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4797 "Comment too big found", NULL
);
4812 if (ctxt
->instate
== XML_PARSER_EOF
) {
4817 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4818 "Comment not terminated \n<!--%.50s\n", buf
);
4819 } else if (!IS_CHAR(cur
)) {
4820 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4821 "xmlParseComment: invalid xmlChar value %d\n",
4824 if (inputid
!= ctxt
->input
->id
) {
4825 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4826 "Comment doesn't start and stop in the same"
4830 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4831 (!ctxt
->disableSAX
))
4832 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4837 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4838 "Comment not terminated\n", NULL
);
4845 * @ctxt: an XML parser context
4847 * DEPRECATED: Internal function, don't use.
4849 * Parse an XML (SGML) comment. Always consumes '<!'.
4851 * The spec says that "For compatibility, the string "--" (double-hyphen)
4852 * must not occur within comments. "
4854 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4857 xmlParseComment(xmlParserCtxtPtr ctxt
) {
4858 xmlChar
*buf
= NULL
;
4859 size_t size
= XML_PARSER_BUFFER_SIZE
;
4861 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
4862 XML_MAX_HUGE_LENGTH
:
4863 XML_MAX_TEXT_LENGTH
;
4864 xmlParserInputState state
;
4871 * Check that there is a comment right here.
4873 if ((RAW
!= '<') || (NXT(1) != '!'))
4876 if ((RAW
!= '-') || (NXT(1) != '-'))
4878 state
= ctxt
->instate
;
4879 ctxt
->instate
= XML_PARSER_COMMENT
;
4880 inputid
= ctxt
->input
->id
;
4885 * Accelerated common case where input don't need to be
4886 * modified before passing it to the handler.
4888 in
= ctxt
->input
->cur
;
4892 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4894 } while (*in
== 0xA);
4897 ccol
= ctxt
->input
->col
;
4898 while (((*in
> '-') && (*in
<= 0x7F)) ||
4899 ((*in
>= 0x20) && (*in
< '-')) ||
4904 ctxt
->input
->col
= ccol
;
4907 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4909 } while (*in
== 0xA);
4912 nbchar
= in
- ctxt
->input
->cur
;
4914 * save current set of data
4917 if ((ctxt
->sax
!= NULL
) &&
4918 (ctxt
->sax
->comment
!= NULL
)) {
4920 if ((*in
== '-') && (in
[1] == '-'))
4923 size
= XML_PARSER_BUFFER_SIZE
+ nbchar
;
4924 buf
= (xmlChar
*) xmlMallocAtomic(size
);
4926 xmlErrMemory(ctxt
, NULL
);
4927 ctxt
->instate
= state
;
4931 } else if (len
+ nbchar
+ 1 >= size
) {
4933 size
+= len
+ nbchar
+ XML_PARSER_BUFFER_SIZE
;
4934 new_buf
= (xmlChar
*) xmlRealloc(buf
, size
);
4935 if (new_buf
== NULL
) {
4937 xmlErrMemory(ctxt
, NULL
);
4938 ctxt
->instate
= state
;
4943 memcpy(&buf
[len
], ctxt
->input
->cur
, nbchar
);
4948 if (len
> maxLength
) {
4949 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4950 "Comment too big found", NULL
);
4954 ctxt
->input
->cur
= in
;
4957 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4962 ctxt
->input
->cur
= in
;
4964 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4971 if (ctxt
->instate
== XML_PARSER_EOF
) {
4975 in
= ctxt
->input
->cur
;
4979 if (ctxt
->input
->id
!= inputid
) {
4980 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4981 "comment doesn't start and stop in the"
4985 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4986 (!ctxt
->disableSAX
)) {
4988 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4990 ctxt
->sax
->comment(ctxt
->userData
, BAD_CAST
"");
4994 if (ctxt
->instate
!= XML_PARSER_EOF
)
4995 ctxt
->instate
= state
;
4999 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5000 "Double hyphen within comment: "
5004 xmlFatalErrMsgStr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
,
5005 "Double hyphen within comment\n", NULL
);
5006 if (ctxt
->instate
== XML_PARSER_EOF
) {
5017 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09) || (*in
== 0x0a));
5018 xmlParseCommentComplex(ctxt
, buf
, len
, size
);
5019 ctxt
->instate
= state
;
5026 * @ctxt: an XML parser context
5028 * DEPRECATED: Internal function, don't use.
5030 * parse the name of a PI
5032 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5034 * Returns the PITarget name or NULL
5038 xmlParsePITarget(xmlParserCtxtPtr ctxt
) {
5039 const xmlChar
*name
;
5041 name
= xmlParseName(ctxt
);
5042 if ((name
!= NULL
) &&
5043 ((name
[0] == 'x') || (name
[0] == 'X')) &&
5044 ((name
[1] == 'm') || (name
[1] == 'M')) &&
5045 ((name
[2] == 'l') || (name
[2] == 'L'))) {
5047 if ((name
[0] == 'x') && (name
[1] == 'm') &&
5048 (name
[2] == 'l') && (name
[3] == 0)) {
5049 xmlFatalErrMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5050 "XML declaration allowed only at the start of the document\n");
5052 } else if (name
[3] == 0) {
5053 xmlFatalErr(ctxt
, XML_ERR_RESERVED_XML_NAME
, NULL
);
5057 if (xmlW3CPIs
[i
] == NULL
) break;
5058 if (xmlStrEqual(name
, (const xmlChar
*)xmlW3CPIs
[i
]))
5061 xmlWarningMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
5062 "xmlParsePITarget: invalid name prefix 'xml'\n",
5065 if ((name
!= NULL
) && (xmlStrchr(name
, ':') != NULL
)) {
5066 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5067 "colons are forbidden from PI names '%s'\n", name
, NULL
, NULL
);
5072 #ifdef LIBXML_CATALOG_ENABLED
5074 * xmlParseCatalogPI:
5075 * @ctxt: an XML parser context
5076 * @catalog: the PI value string
5078 * parse an XML Catalog Processing Instruction.
5080 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5082 * Occurs only if allowed by the user and if happening in the Misc
5083 * part of the document before any doctype information
5084 * This will add the given catalog to the parsing context in order
5085 * to be used if there is a resolution need further down in the document
5089 xmlParseCatalogPI(xmlParserCtxtPtr ctxt
, const xmlChar
*catalog
) {
5090 xmlChar
*URL
= NULL
;
5091 const xmlChar
*tmp
, *base
;
5095 while (IS_BLANK_CH(*tmp
)) tmp
++;
5096 if (xmlStrncmp(tmp
, BAD_CAST
"catalog", 7))
5099 while (IS_BLANK_CH(*tmp
)) tmp
++;
5104 while (IS_BLANK_CH(*tmp
)) tmp
++;
5106 if ((marker
!= '\'') && (marker
!= '"'))
5110 while ((*tmp
!= 0) && (*tmp
!= marker
)) tmp
++;
5113 URL
= xmlStrndup(base
, tmp
- base
);
5115 while (IS_BLANK_CH(*tmp
)) tmp
++;
5120 ctxt
->catalogs
= xmlCatalogAddLocal(ctxt
->catalogs
, URL
);
5126 xmlWarningMsg(ctxt
, XML_WAR_CATALOG_PI
,
5127 "Catalog PI syntax error: %s\n",
5136 * @ctxt: an XML parser context
5138 * DEPRECATED: Internal function, don't use.
5140 * parse an XML Processing Instruction.
5142 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5144 * The processing is transferred to SAX once parsed.
5148 xmlParsePI(xmlParserCtxtPtr ctxt
) {
5149 xmlChar
*buf
= NULL
;
5151 size_t size
= XML_PARSER_BUFFER_SIZE
;
5152 size_t maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
5153 XML_MAX_HUGE_LENGTH
:
5154 XML_MAX_TEXT_LENGTH
;
5156 const xmlChar
*target
;
5157 xmlParserInputState state
;
5159 if ((RAW
== '<') && (NXT(1) == '?')) {
5160 int inputid
= ctxt
->input
->id
;
5161 state
= ctxt
->instate
;
5162 ctxt
->instate
= XML_PARSER_PI
;
5164 * this is a Processing Instruction.
5169 * Parse the target name and check for special support like
5172 target
= xmlParsePITarget(ctxt
);
5173 if (target
!= NULL
) {
5174 if ((RAW
== '?') && (NXT(1) == '>')) {
5175 if (inputid
!= ctxt
->input
->id
) {
5176 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5177 "PI declaration doesn't start and stop in"
5178 " the same entity\n");
5185 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5186 (ctxt
->sax
->processingInstruction
!= NULL
))
5187 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5189 if (ctxt
->instate
!= XML_PARSER_EOF
)
5190 ctxt
->instate
= state
;
5193 buf
= (xmlChar
*) xmlMallocAtomic(size
);
5195 xmlErrMemory(ctxt
, NULL
);
5196 ctxt
->instate
= state
;
5199 if (SKIP_BLANKS
== 0) {
5200 xmlFatalErrMsgStr(ctxt
, XML_ERR_SPACE_REQUIRED
,
5201 "ParsePI: PI %s space expected\n", target
);
5204 while (IS_CHAR(cur
) && /* checked */
5205 ((cur
!= '?') || (NXT(1) != '>'))) {
5206 if (len
+ 5 >= size
) {
5208 size_t new_size
= size
* 2;
5209 tmp
= (xmlChar
*) xmlRealloc(buf
, new_size
);
5211 xmlErrMemory(ctxt
, NULL
);
5213 ctxt
->instate
= state
;
5219 COPY_BUF(l
,buf
,len
,cur
);
5220 if (len
> maxLength
) {
5221 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5222 "PI %s too big found", target
);
5224 ctxt
->instate
= state
;
5231 if (ctxt
->instate
== XML_PARSER_EOF
) {
5236 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
5237 "ParsePI: PI %s never end ...\n", target
);
5239 if (inputid
!= ctxt
->input
->id
) {
5240 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5241 "PI declaration doesn't start and stop in"
5242 " the same entity\n");
5246 #ifdef LIBXML_CATALOG_ENABLED
5247 if (((state
== XML_PARSER_MISC
) ||
5248 (state
== XML_PARSER_START
)) &&
5249 (xmlStrEqual(target
, XML_CATALOG_PI
))) {
5250 xmlCatalogAllow allow
= xmlCatalogGetDefaults();
5251 if ((allow
== XML_CATA_ALLOW_DOCUMENT
) ||
5252 (allow
== XML_CATA_ALLOW_ALL
))
5253 xmlParseCatalogPI(ctxt
, buf
);
5261 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
5262 (ctxt
->sax
->processingInstruction
!= NULL
))
5263 ctxt
->sax
->processingInstruction(ctxt
->userData
,
5268 xmlFatalErr(ctxt
, XML_ERR_PI_NOT_STARTED
, NULL
);
5270 if (ctxt
->instate
!= XML_PARSER_EOF
)
5271 ctxt
->instate
= state
;
5276 * xmlParseNotationDecl:
5277 * @ctxt: an XML parser context
5279 * DEPRECATED: Internal function, don't use.
5281 * Parse a notation declaration. Always consumes '<!'.
5283 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5285 * Hence there is actually 3 choices:
5286 * 'PUBLIC' S PubidLiteral
5287 * 'PUBLIC' S PubidLiteral S SystemLiteral
5288 * and 'SYSTEM' S SystemLiteral
5290 * See the NOTE on xmlParseExternalID().
5294 xmlParseNotationDecl(xmlParserCtxtPtr ctxt
) {
5295 const xmlChar
*name
;
5299 if ((CUR
!= '<') || (NXT(1) != '!'))
5303 if (CMP8(CUR_PTR
, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5304 int inputid
= ctxt
->input
->id
;
5306 if (SKIP_BLANKS
== 0) {
5307 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5308 "Space required after '<!NOTATION'\n");
5312 name
= xmlParseName(ctxt
);
5314 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5317 if (xmlStrchr(name
, ':') != NULL
) {
5318 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5319 "colons are forbidden from notation names '%s'\n",
5322 if (SKIP_BLANKS
== 0) {
5323 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5324 "Space required after the NOTATION name'\n");
5331 Systemid
= xmlParseExternalID(ctxt
, &Pubid
, 0);
5335 if (inputid
!= ctxt
->input
->id
) {
5336 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5337 "Notation declaration doesn't start and stop"
5338 " in the same entity\n");
5341 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5342 (ctxt
->sax
->notationDecl
!= NULL
))
5343 ctxt
->sax
->notationDecl(ctxt
->userData
, name
, Pubid
, Systemid
);
5345 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5347 if (Systemid
!= NULL
) xmlFree(Systemid
);
5348 if (Pubid
!= NULL
) xmlFree(Pubid
);
5353 * xmlParseEntityDecl:
5354 * @ctxt: an XML parser context
5356 * DEPRECATED: Internal function, don't use.
5358 * Parse an entity declaration. Always consumes '<!'.
5360 * [70] EntityDecl ::= GEDecl | PEDecl
5362 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5364 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5366 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5368 * [74] PEDef ::= EntityValue | ExternalID
5370 * [76] NDataDecl ::= S 'NDATA' S Name
5372 * [ VC: Notation Declared ]
5373 * The Name must match the declared name of a notation.
5377 xmlParseEntityDecl(xmlParserCtxtPtr ctxt
) {
5378 const xmlChar
*name
= NULL
;
5379 xmlChar
*value
= NULL
;
5380 xmlChar
*URI
= NULL
, *literal
= NULL
;
5381 const xmlChar
*ndata
= NULL
;
5382 int isParameter
= 0;
5383 xmlChar
*orig
= NULL
;
5385 if ((CUR
!= '<') || (NXT(1) != '!'))
5389 /* GROW; done in the caller */
5390 if (CMP6(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5391 int inputid
= ctxt
->input
->id
;
5393 if (SKIP_BLANKS
== 0) {
5394 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5395 "Space required after '<!ENTITY'\n");
5400 if (SKIP_BLANKS
== 0) {
5401 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5402 "Space required after '%%'\n");
5407 name
= xmlParseName(ctxt
);
5409 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5410 "xmlParseEntityDecl: no name\n");
5413 if (xmlStrchr(name
, ':') != NULL
) {
5414 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5415 "colons are forbidden from entities names '%s'\n",
5418 if (SKIP_BLANKS
== 0) {
5419 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5420 "Space required after the entity name\n");
5423 ctxt
->instate
= XML_PARSER_ENTITY_DECL
;
5425 * handle the various case of definitions...
5428 if ((RAW
== '"') || (RAW
== '\'')) {
5429 value
= xmlParseEntityValue(ctxt
, &orig
);
5431 if ((ctxt
->sax
!= NULL
) &&
5432 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5433 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5434 XML_INTERNAL_PARAMETER_ENTITY
,
5438 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5439 if ((URI
== NULL
) && (literal
== NULL
)) {
5440 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5445 uri
= xmlParseURI((const char *) URI
);
5447 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5448 "Invalid URI: %s\n", URI
);
5450 * This really ought to be a well formedness error
5451 * but the XML Core WG decided otherwise c.f. issue
5452 * E26 of the XML erratas.
5455 if (uri
->fragment
!= NULL
) {
5457 * Okay this is foolish to block those but not
5460 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5462 if ((ctxt
->sax
!= NULL
) &&
5463 (!ctxt
->disableSAX
) &&
5464 (ctxt
->sax
->entityDecl
!= NULL
))
5465 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5466 XML_EXTERNAL_PARAMETER_ENTITY
,
5467 literal
, URI
, NULL
);
5474 if ((RAW
== '"') || (RAW
== '\'')) {
5475 value
= xmlParseEntityValue(ctxt
, &orig
);
5476 if ((ctxt
->sax
!= NULL
) &&
5477 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5478 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5479 XML_INTERNAL_GENERAL_ENTITY
,
5482 * For expat compatibility in SAX mode.
5484 if ((ctxt
->myDoc
== NULL
) ||
5485 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
5486 if (ctxt
->myDoc
== NULL
) {
5487 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5488 if (ctxt
->myDoc
== NULL
) {
5489 xmlErrMemory(ctxt
, "New Doc failed");
5492 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5494 if (ctxt
->myDoc
->intSubset
== NULL
)
5495 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5496 BAD_CAST
"fake", NULL
, NULL
);
5498 xmlSAX2EntityDecl(ctxt
, name
, XML_INTERNAL_GENERAL_ENTITY
,
5502 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5503 if ((URI
== NULL
) && (literal
== NULL
)) {
5504 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5509 uri
= xmlParseURI((const char *)URI
);
5511 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5512 "Invalid URI: %s\n", URI
);
5514 * This really ought to be a well formedness error
5515 * but the XML Core WG decided otherwise c.f. issue
5516 * E26 of the XML erratas.
5519 if (uri
->fragment
!= NULL
) {
5521 * Okay this is foolish to block those but not
5524 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5529 if ((RAW
!= '>') && (SKIP_BLANKS
== 0)) {
5530 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5531 "Space required before 'NDATA'\n");
5533 if (CMP5(CUR_PTR
, 'N', 'D', 'A', 'T', 'A')) {
5535 if (SKIP_BLANKS
== 0) {
5536 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5537 "Space required after 'NDATA'\n");
5539 ndata
= xmlParseName(ctxt
);
5540 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5541 (ctxt
->sax
->unparsedEntityDecl
!= NULL
))
5542 ctxt
->sax
->unparsedEntityDecl(ctxt
->userData
, name
,
5543 literal
, URI
, ndata
);
5545 if ((ctxt
->sax
!= NULL
) &&
5546 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5547 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5548 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5549 literal
, URI
, NULL
);
5551 * For expat compatibility in SAX mode.
5552 * assuming the entity replacement was asked for
5554 if ((ctxt
->replaceEntities
!= 0) &&
5555 ((ctxt
->myDoc
== NULL
) ||
5556 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
)))) {
5557 if (ctxt
->myDoc
== NULL
) {
5558 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5559 if (ctxt
->myDoc
== NULL
) {
5560 xmlErrMemory(ctxt
, "New Doc failed");
5563 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5566 if (ctxt
->myDoc
->intSubset
== NULL
)
5567 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5568 BAD_CAST
"fake", NULL
, NULL
);
5569 xmlSAX2EntityDecl(ctxt
, name
,
5570 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5571 literal
, URI
, NULL
);
5576 if (ctxt
->instate
== XML_PARSER_EOF
)
5580 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
5581 "xmlParseEntityDecl: entity %s not terminated\n", name
);
5582 xmlHaltParser(ctxt
);
5584 if (inputid
!= ctxt
->input
->id
) {
5585 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5586 "Entity declaration doesn't start and stop in"
5587 " the same entity\n");
5593 * Ugly mechanism to save the raw entity value.
5595 xmlEntityPtr cur
= NULL
;
5598 if ((ctxt
->sax
!= NULL
) &&
5599 (ctxt
->sax
->getParameterEntity
!= NULL
))
5600 cur
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
5602 if ((ctxt
->sax
!= NULL
) &&
5603 (ctxt
->sax
->getEntity
!= NULL
))
5604 cur
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
5605 if ((cur
== NULL
) && (ctxt
->userData
==ctxt
)) {
5606 cur
= xmlSAX2GetEntity(ctxt
, name
);
5609 if ((cur
!= NULL
) && (cur
->orig
== NULL
)) {
5616 if (value
!= NULL
) xmlFree(value
);
5617 if (URI
!= NULL
) xmlFree(URI
);
5618 if (literal
!= NULL
) xmlFree(literal
);
5619 if (orig
!= NULL
) xmlFree(orig
);
5624 * xmlParseDefaultDecl:
5625 * @ctxt: an XML parser context
5626 * @value: Receive a possible fixed default value for the attribute
5628 * DEPRECATED: Internal function, don't use.
5630 * Parse an attribute default declaration
5632 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5634 * [ VC: Required Attribute ]
5635 * if the default declaration is the keyword #REQUIRED, then the
5636 * attribute must be specified for all elements of the type in the
5637 * attribute-list declaration.
5639 * [ VC: Attribute Default Legal ]
5640 * The declared default value must meet the lexical constraints of
5641 * the declared attribute type c.f. xmlValidateAttributeDecl()
5643 * [ VC: Fixed Attribute Default ]
5644 * if an attribute has a default value declared with the #FIXED
5645 * keyword, instances of that attribute must match the default value.
5647 * [ WFC: No < in Attribute Values ]
5648 * handled in xmlParseAttValue()
5650 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5651 * or XML_ATTRIBUTE_FIXED.
5655 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
5660 if (CMP9(CUR_PTR
, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5662 return(XML_ATTRIBUTE_REQUIRED
);
5664 if (CMP8(CUR_PTR
, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5666 return(XML_ATTRIBUTE_IMPLIED
);
5668 val
= XML_ATTRIBUTE_NONE
;
5669 if (CMP6(CUR_PTR
, '#', 'F', 'I', 'X', 'E', 'D')) {
5671 val
= XML_ATTRIBUTE_FIXED
;
5672 if (SKIP_BLANKS
== 0) {
5673 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5674 "Space required after '#FIXED'\n");
5677 ret
= xmlParseAttValue(ctxt
);
5678 ctxt
->instate
= XML_PARSER_DTD
;
5680 xmlFatalErrMsg(ctxt
, (xmlParserErrors
)ctxt
->errNo
,
5681 "Attribute default value declaration error\n");
5688 * xmlParseNotationType:
5689 * @ctxt: an XML parser context
5691 * DEPRECATED: Internal function, don't use.
5693 * parse an Notation attribute type.
5695 * Note: the leading 'NOTATION' S part has already being parsed...
5697 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5699 * [ VC: Notation Attributes ]
5700 * Values of this type must match one of the notation names included
5701 * in the declaration; all notation names in the declaration must be declared.
5703 * Returns: the notation attribute tree built while parsing
5707 xmlParseNotationType(xmlParserCtxtPtr ctxt
) {
5708 const xmlChar
*name
;
5709 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5712 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5718 name
= xmlParseName(ctxt
);
5720 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5721 "Name expected in NOTATION declaration\n");
5722 xmlFreeEnumeration(ret
);
5726 while (tmp
!= NULL
) {
5727 if (xmlStrEqual(name
, tmp
->name
)) {
5728 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5729 "standalone: attribute notation value token %s duplicated\n",
5731 if (!xmlDictOwns(ctxt
->dict
, name
))
5732 xmlFree((xmlChar
*) name
);
5738 cur
= xmlCreateEnumeration(name
);
5740 xmlFreeEnumeration(ret
);
5743 if (last
== NULL
) ret
= last
= cur
;
5750 } while (RAW
== '|');
5752 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5753 xmlFreeEnumeration(ret
);
5761 * xmlParseEnumerationType:
5762 * @ctxt: an XML parser context
5764 * DEPRECATED: Internal function, don't use.
5766 * parse an Enumeration attribute type.
5768 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5770 * [ VC: Enumeration ]
5771 * Values of this type must match one of the Nmtoken tokens in
5774 * Returns: the enumeration attribute tree built while parsing
5778 xmlParseEnumerationType(xmlParserCtxtPtr ctxt
) {
5780 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5783 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_STARTED
, NULL
);
5789 name
= xmlParseNmtoken(ctxt
);
5791 xmlFatalErr(ctxt
, XML_ERR_NMTOKEN_REQUIRED
, NULL
);
5795 while (tmp
!= NULL
) {
5796 if (xmlStrEqual(name
, tmp
->name
)) {
5797 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5798 "standalone: attribute enumeration value token %s duplicated\n",
5800 if (!xmlDictOwns(ctxt
->dict
, name
))
5807 cur
= xmlCreateEnumeration(name
);
5808 if (!xmlDictOwns(ctxt
->dict
, name
))
5811 xmlFreeEnumeration(ret
);
5814 if (last
== NULL
) ret
= last
= cur
;
5821 } while (RAW
== '|');
5823 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_FINISHED
, NULL
);
5831 * xmlParseEnumeratedType:
5832 * @ctxt: an XML parser context
5833 * @tree: the enumeration tree built while parsing
5835 * DEPRECATED: Internal function, don't use.
5837 * parse an Enumerated attribute type.
5839 * [57] EnumeratedType ::= NotationType | Enumeration
5841 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5844 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5848 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5849 if (CMP8(CUR_PTR
, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5851 if (SKIP_BLANKS
== 0) {
5852 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5853 "Space required after 'NOTATION'\n");
5856 *tree
= xmlParseNotationType(ctxt
);
5857 if (*tree
== NULL
) return(0);
5858 return(XML_ATTRIBUTE_NOTATION
);
5860 *tree
= xmlParseEnumerationType(ctxt
);
5861 if (*tree
== NULL
) return(0);
5862 return(XML_ATTRIBUTE_ENUMERATION
);
5866 * xmlParseAttributeType:
5867 * @ctxt: an XML parser context
5868 * @tree: the enumeration tree built while parsing
5870 * DEPRECATED: Internal function, don't use.
5872 * parse the Attribute list def for an element
5874 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5876 * [55] StringType ::= 'CDATA'
5878 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5879 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5881 * Validity constraints for attribute values syntax are checked in
5882 * xmlValidateAttributeValue()
5885 * Values of type ID must match the Name production. A name must not
5886 * appear more than once in an XML document as a value of this type;
5887 * i.e., ID values must uniquely identify the elements which bear them.
5889 * [ VC: One ID per Element Type ]
5890 * No element type may have more than one ID attribute specified.
5892 * [ VC: ID Attribute Default ]
5893 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5896 * Values of type IDREF must match the Name production, and values
5897 * of type IDREFS must match Names; each IDREF Name must match the value
5898 * of an ID attribute on some element in the XML document; i.e. IDREF
5899 * values must match the value of some ID attribute.
5901 * [ VC: Entity Name ]
5902 * Values of type ENTITY must match the Name production, values
5903 * of type ENTITIES must match Names; each Entity Name must match the
5904 * name of an unparsed entity declared in the DTD.
5906 * [ VC: Name Token ]
5907 * Values of type NMTOKEN must match the Nmtoken production; values
5908 * of type NMTOKENS must match Nmtokens.
5910 * Returns the attribute type
5913 xmlParseAttributeType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5914 if (CMP5(CUR_PTR
, 'C', 'D', 'A', 'T', 'A')) {
5916 return(XML_ATTRIBUTE_CDATA
);
5917 } else if (CMP6(CUR_PTR
, 'I', 'D', 'R', 'E', 'F', 'S')) {
5919 return(XML_ATTRIBUTE_IDREFS
);
5920 } else if (CMP5(CUR_PTR
, 'I', 'D', 'R', 'E', 'F')) {
5922 return(XML_ATTRIBUTE_IDREF
);
5923 } else if ((RAW
== 'I') && (NXT(1) == 'D')) {
5925 return(XML_ATTRIBUTE_ID
);
5926 } else if (CMP6(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5928 return(XML_ATTRIBUTE_ENTITY
);
5929 } else if (CMP8(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5931 return(XML_ATTRIBUTE_ENTITIES
);
5932 } else if (CMP8(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5934 return(XML_ATTRIBUTE_NMTOKENS
);
5935 } else if (CMP7(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5937 return(XML_ATTRIBUTE_NMTOKEN
);
5939 return(xmlParseEnumeratedType(ctxt
, tree
));
5943 * xmlParseAttributeListDecl:
5944 * @ctxt: an XML parser context
5946 * DEPRECATED: Internal function, don't use.
5948 * Parse an attribute list declaration for an element. Always consumes '<!'.
5950 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5952 * [53] AttDef ::= S Name S AttType S DefaultDecl
5956 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt
) {
5957 const xmlChar
*elemName
;
5958 const xmlChar
*attrName
;
5959 xmlEnumerationPtr tree
;
5961 if ((CUR
!= '<') || (NXT(1) != '!'))
5965 if (CMP7(CUR_PTR
, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5966 int inputid
= ctxt
->input
->id
;
5969 if (SKIP_BLANKS
== 0) {
5970 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5971 "Space required after '<!ATTLIST'\n");
5973 elemName
= xmlParseName(ctxt
);
5974 if (elemName
== NULL
) {
5975 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5976 "ATTLIST: no name for Element\n");
5981 while ((RAW
!= '>') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
5984 xmlChar
*defaultValue
= NULL
;
5988 attrName
= xmlParseName(ctxt
);
5989 if (attrName
== NULL
) {
5990 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5991 "ATTLIST: no name for Attribute\n");
5995 if (SKIP_BLANKS
== 0) {
5996 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5997 "Space required after the attribute name\n");
6001 type
= xmlParseAttributeType(ctxt
, &tree
);
6007 if (SKIP_BLANKS
== 0) {
6008 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6009 "Space required after the attribute type\n");
6011 xmlFreeEnumeration(tree
);
6015 def
= xmlParseDefaultDecl(ctxt
, &defaultValue
);
6017 if (defaultValue
!= NULL
)
6018 xmlFree(defaultValue
);
6020 xmlFreeEnumeration(tree
);
6023 if ((type
!= XML_ATTRIBUTE_CDATA
) && (defaultValue
!= NULL
))
6024 xmlAttrNormalizeSpace(defaultValue
, defaultValue
);
6028 if (SKIP_BLANKS
== 0) {
6029 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6030 "Space required after the attribute default value\n");
6031 if (defaultValue
!= NULL
)
6032 xmlFree(defaultValue
);
6034 xmlFreeEnumeration(tree
);
6038 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6039 (ctxt
->sax
->attributeDecl
!= NULL
))
6040 ctxt
->sax
->attributeDecl(ctxt
->userData
, elemName
, attrName
,
6041 type
, def
, defaultValue
, tree
);
6042 else if (tree
!= NULL
)
6043 xmlFreeEnumeration(tree
);
6045 if ((ctxt
->sax2
) && (defaultValue
!= NULL
) &&
6046 (def
!= XML_ATTRIBUTE_IMPLIED
) &&
6047 (def
!= XML_ATTRIBUTE_REQUIRED
)) {
6048 xmlAddDefAttrs(ctxt
, elemName
, attrName
, defaultValue
);
6051 xmlAddSpecialAttr(ctxt
, elemName
, attrName
, type
);
6053 if (defaultValue
!= NULL
)
6054 xmlFree(defaultValue
);
6058 if (inputid
!= ctxt
->input
->id
) {
6059 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6060 "Attribute list declaration doesn't start and"
6061 " stop in the same entity\n");
6069 * xmlParseElementMixedContentDecl:
6070 * @ctxt: an XML parser context
6071 * @inputchk: the input used for the current entity, needed for boundary checks
6073 * DEPRECATED: Internal function, don't use.
6075 * parse the declaration for a Mixed Element content
6076 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6078 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6079 * '(' S? '#PCDATA' S? ')'
6081 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6083 * [ VC: No Duplicate Types ]
6084 * The same name must not appear more than once in a single
6085 * mixed-content declaration.
6087 * returns: the list of the xmlElementContentPtr describing the element choices
6089 xmlElementContentPtr
6090 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6091 xmlElementContentPtr ret
= NULL
, cur
= NULL
, n
;
6092 const xmlChar
*elem
= NULL
;
6095 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6099 if (ctxt
->input
->id
!= inputchk
) {
6100 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6101 "Element content declaration doesn't start and"
6102 " stop in the same entity\n");
6105 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6109 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6114 if ((RAW
== '(') || (RAW
== '|')) {
6115 ret
= cur
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
6116 if (ret
== NULL
) return(NULL
);
6118 while ((RAW
== '|') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6121 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6123 xmlFreeDocElementContent(ctxt
->myDoc
, cur
);
6131 n
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6133 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6136 n
->c1
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6145 elem
= xmlParseName(ctxt
);
6147 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6148 "xmlParseElementMixedContentDecl : Name expected\n");
6149 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6155 if ((RAW
== ')') && (NXT(1) == '*')) {
6157 cur
->c2
= xmlNewDocElementContent(ctxt
->myDoc
, elem
,
6158 XML_ELEMENT_CONTENT_ELEMENT
);
6159 if (cur
->c2
!= NULL
)
6160 cur
->c2
->parent
= cur
;
6163 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6164 if (ctxt
->input
->id
!= inputchk
) {
6165 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6166 "Element content declaration doesn't start and"
6167 " stop in the same entity\n");
6171 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6172 xmlFatalErr(ctxt
, XML_ERR_MIXED_NOT_STARTED
, NULL
);
6177 xmlFatalErr(ctxt
, XML_ERR_PCDATA_REQUIRED
, NULL
);
6183 * xmlParseElementChildrenContentDeclPriv:
6184 * @ctxt: an XML parser context
6185 * @inputchk: the input used for the current entity, needed for boundary checks
6186 * @depth: the level of recursion
6188 * parse the declaration for a Mixed Element content
6189 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6192 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6194 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6196 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6198 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6200 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6201 * TODO Parameter-entity replacement text must be properly nested
6202 * with parenthesized groups. That is to say, if either of the
6203 * opening or closing parentheses in a choice, seq, or Mixed
6204 * construct is contained in the replacement text for a parameter
6205 * entity, both must be contained in the same replacement text. For
6206 * interoperability, if a parameter-entity reference appears in a
6207 * choice, seq, or Mixed construct, its replacement text should not
6208 * be empty, and neither the first nor last non-blank character of
6209 * the replacement text should be a connector (| or ,).
6211 * Returns the tree of xmlElementContentPtr describing the element
6214 static xmlElementContentPtr
6215 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt
, int inputchk
,
6217 xmlElementContentPtr ret
= NULL
, cur
= NULL
, last
= NULL
, op
= NULL
;
6218 const xmlChar
*elem
;
6221 if (((depth
> 128) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
6223 xmlFatalErrMsgInt(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
,
6224 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6231 int inputid
= ctxt
->input
->id
;
6233 /* Recurse on first child */
6236 cur
= ret
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6243 elem
= xmlParseName(ctxt
);
6245 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6248 cur
= ret
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6250 xmlErrMemory(ctxt
, NULL
);
6255 cur
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6257 } else if (RAW
== '*') {
6258 cur
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6260 } else if (RAW
== '+') {
6261 cur
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6264 cur
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6269 while ((RAW
!= ')') && (ctxt
->instate
!= XML_PARSER_EOF
)) {
6271 * Each loop we parse one separator and one element.
6274 if (type
== 0) type
= CUR
;
6277 * Detect "Name | Name , Name" error
6279 else if (type
!= CUR
) {
6280 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6281 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6283 if ((last
!= NULL
) && (last
!= ret
))
6284 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6286 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6291 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_SEQ
);
6293 if ((last
!= NULL
) && (last
!= ret
))
6294 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6295 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6313 } else if (RAW
== '|') {
6314 if (type
== 0) type
= CUR
;
6317 * Detect "Name , Name | Name" error
6319 else if (type
!= CUR
) {
6320 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
6321 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6323 if ((last
!= NULL
) && (last
!= ret
))
6324 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6326 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6331 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
6333 if ((last
!= NULL
) && (last
!= ret
))
6334 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6336 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6355 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
, NULL
);
6356 if ((last
!= NULL
) && (last
!= ret
))
6357 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
6359 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6366 int inputid
= ctxt
->input
->id
;
6367 /* Recurse on second child */
6370 last
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
6374 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6379 elem
= xmlParseName(ctxt
);
6381 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
6383 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6386 last
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6389 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6393 last
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6395 } else if (RAW
== '*') {
6396 last
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6398 } else if (RAW
== '+') {
6399 last
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6402 last
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6408 if ((cur
!= NULL
) && (last
!= NULL
)) {
6413 if (ctxt
->input
->id
!= inputchk
) {
6414 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6415 "Element content declaration doesn't start and stop in"
6416 " the same entity\n");
6421 if ((ret
->ocur
== XML_ELEMENT_CONTENT_PLUS
) ||
6422 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6423 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6425 ret
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6428 } else if (RAW
== '*') {
6430 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6433 * Some normalization:
6434 * (a | b* | c?)* == (a | b | c)*
6436 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6437 if ((cur
->c1
!= NULL
) &&
6438 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6439 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6440 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6441 if ((cur
->c2
!= NULL
) &&
6442 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6443 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6444 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6449 } else if (RAW
== '+') {
6453 if ((ret
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6454 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6455 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6457 ret
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6459 * Some normalization:
6460 * (a | b*)+ == (a | b)*
6461 * (a | b?)+ == (a | b)*
6463 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6464 if ((cur
->c1
!= NULL
) &&
6465 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6466 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6467 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6470 if ((cur
->c2
!= NULL
) &&
6471 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6472 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6473 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6479 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6487 * xmlParseElementChildrenContentDecl:
6488 * @ctxt: an XML parser context
6489 * @inputchk: the input used for the current entity, needed for boundary checks
6491 * DEPRECATED: Internal function, don't use.
6493 * parse the declaration for a Mixed Element content
6494 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6496 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6498 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6500 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6502 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6504 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6505 * TODO Parameter-entity replacement text must be properly nested
6506 * with parenthesized groups. That is to say, if either of the
6507 * opening or closing parentheses in a choice, seq, or Mixed
6508 * construct is contained in the replacement text for a parameter
6509 * entity, both must be contained in the same replacement text. For
6510 * interoperability, if a parameter-entity reference appears in a
6511 * choice, seq, or Mixed construct, its replacement text should not
6512 * be empty, and neither the first nor last non-blank character of
6513 * the replacement text should be a connector (| or ,).
6515 * Returns the tree of xmlElementContentPtr describing the element
6518 xmlElementContentPtr
6519 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6520 /* stub left for API/ABI compat */
6521 return(xmlParseElementChildrenContentDeclPriv(ctxt
, inputchk
, 1));
6525 * xmlParseElementContentDecl:
6526 * @ctxt: an XML parser context
6527 * @name: the name of the element being defined.
6528 * @result: the Element Content pointer will be stored here if any
6530 * DEPRECATED: Internal function, don't use.
6532 * parse the declaration for an Element content either Mixed or Children,
6533 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6535 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6537 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6541 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt
, const xmlChar
*name
,
6542 xmlElementContentPtr
*result
) {
6544 xmlElementContentPtr tree
= NULL
;
6545 int inputid
= ctxt
->input
->id
;
6551 xmlFatalErrMsgStr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6552 "xmlParseElementContentDecl : %s '(' expected\n", name
);
6557 if (ctxt
->instate
== XML_PARSER_EOF
)
6560 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6561 tree
= xmlParseElementMixedContentDecl(ctxt
, inputid
);
6562 res
= XML_ELEMENT_TYPE_MIXED
;
6564 tree
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
, 1);
6565 res
= XML_ELEMENT_TYPE_ELEMENT
;
6573 * xmlParseElementDecl:
6574 * @ctxt: an XML parser context
6576 * DEPRECATED: Internal function, don't use.
6578 * Parse an element declaration. Always consumes '<!'.
6580 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6582 * [ VC: Unique Element Type Declaration ]
6583 * No element type may be declared more than once
6585 * Returns the type of the element, or -1 in case of error
6588 xmlParseElementDecl(xmlParserCtxtPtr ctxt
) {
6589 const xmlChar
*name
;
6591 xmlElementContentPtr content
= NULL
;
6593 if ((CUR
!= '<') || (NXT(1) != '!'))
6597 /* GROW; done in the caller */
6598 if (CMP7(CUR_PTR
, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6599 int inputid
= ctxt
->input
->id
;
6602 if (SKIP_BLANKS
== 0) {
6603 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6604 "Space required after 'ELEMENT'\n");
6607 name
= xmlParseName(ctxt
);
6609 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6610 "xmlParseElementDecl: no name for Element\n");
6613 if (SKIP_BLANKS
== 0) {
6614 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6615 "Space required after the element name\n");
6617 if (CMP5(CUR_PTR
, 'E', 'M', 'P', 'T', 'Y')) {
6620 * Element must always be empty.
6622 ret
= XML_ELEMENT_TYPE_EMPTY
;
6623 } else if ((RAW
== 'A') && (NXT(1) == 'N') &&
6627 * Element is a generic container.
6629 ret
= XML_ELEMENT_TYPE_ANY
;
6630 } else if (RAW
== '(') {
6631 ret
= xmlParseElementContentDecl(ctxt
, name
, &content
);
6634 * [ WFC: PEs in Internal Subset ] error handling.
6636 if ((RAW
== '%') && (ctxt
->external
== 0) &&
6637 (ctxt
->inputNr
== 1)) {
6638 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_IN_INT_SUBSET
,
6639 "PEReference: forbidden within markup decl in internal subset\n");
6641 xmlFatalErrMsg(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6642 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6650 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
6651 if (content
!= NULL
) {
6652 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6655 if (inputid
!= ctxt
->input
->id
) {
6656 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6657 "Element declaration doesn't start and stop in"
6658 " the same entity\n");
6662 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6663 (ctxt
->sax
->elementDecl
!= NULL
)) {
6664 if (content
!= NULL
)
6665 content
->parent
= NULL
;
6666 ctxt
->sax
->elementDecl(ctxt
->userData
, name
, ret
,
6668 if ((content
!= NULL
) && (content
->parent
== NULL
)) {
6670 * this is a trick: if xmlAddElementDecl is called,
6671 * instead of copying the full tree it is plugged directly
6672 * if called from the parser. Avoid duplicating the
6673 * interfaces or change the API/ABI
6675 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6677 } else if (content
!= NULL
) {
6678 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6686 * xmlParseConditionalSections
6687 * @ctxt: an XML parser context
6689 * Parse a conditional section. Always consumes '<!['.
6691 * [61] conditionalSect ::= includeSect | ignoreSect
6692 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6693 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6694 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6695 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6699 xmlParseConditionalSections(xmlParserCtxtPtr ctxt
) {
6700 int *inputIds
= NULL
;
6701 size_t inputIdsSize
= 0;
6704 while (ctxt
->instate
!= XML_PARSER_EOF
) {
6705 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6706 int id
= ctxt
->input
->id
;
6711 if (CMP7(CUR_PTR
, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6715 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6716 xmlHaltParser(ctxt
);
6719 if (ctxt
->input
->id
!= id
) {
6720 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6721 "All markup of the conditional section is"
6722 " not in the same entity\n");
6726 if (inputIdsSize
<= depth
) {
6729 inputIdsSize
= (inputIdsSize
== 0 ? 4 : inputIdsSize
* 2);
6730 tmp
= (int *) xmlRealloc(inputIds
,
6731 inputIdsSize
* sizeof(int));
6733 xmlErrMemory(ctxt
, NULL
);
6738 inputIds
[depth
] = id
;
6740 } else if (CMP6(CUR_PTR
, 'I', 'G', 'N', 'O', 'R', 'E')) {
6741 size_t ignoreDepth
= 0;
6746 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6747 xmlHaltParser(ctxt
);
6750 if (ctxt
->input
->id
!= id
) {
6751 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6752 "All markup of the conditional section is"
6753 " not in the same entity\n");
6758 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6761 /* Check for integer overflow */
6762 if (ignoreDepth
== 0) {
6763 xmlErrMemory(ctxt
, NULL
);
6766 } else if ((RAW
== ']') && (NXT(1) == ']') &&
6768 if (ignoreDepth
== 0)
6778 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_NOT_FINISHED
, NULL
);
6781 if (ctxt
->input
->id
!= id
) {
6782 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6783 "All markup of the conditional section is"
6784 " not in the same entity\n");
6788 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID_KEYWORD
, NULL
);
6789 xmlHaltParser(ctxt
);
6792 } else if ((depth
> 0) &&
6793 (RAW
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6795 if (ctxt
->input
->id
!= inputIds
[depth
]) {
6796 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6797 "All markup of the conditional section is not"
6798 " in the same entity\n");
6801 } else if ((RAW
== '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6802 xmlParseMarkupDecl(ctxt
);
6804 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6805 xmlHaltParser(ctxt
);
6822 * xmlParseMarkupDecl:
6823 * @ctxt: an XML parser context
6825 * DEPRECATED: Internal function, don't use.
6827 * Parse markup declarations. Always consumes '<!' or '<?'.
6829 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6830 * NotationDecl | PI | Comment
6832 * [ VC: Proper Declaration/PE Nesting ]
6833 * Parameter-entity replacement text must be properly nested with
6834 * markup declarations. That is to say, if either the first character
6835 * or the last character of a markup declaration (markupdecl above) is
6836 * contained in the replacement text for a parameter-entity reference,
6837 * both must be contained in the same replacement text.
6839 * [ WFC: PEs in Internal Subset ]
6840 * In the internal DTD subset, parameter-entity references can occur
6841 * only where markup declarations can occur, not within markup declarations.
6842 * (This does not apply to references that occur in external parameter
6843 * entities or to the external subset.)
6846 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt
) {
6849 if (NXT(1) == '!') {
6853 xmlParseElementDecl(ctxt
);
6854 else if (NXT(3) == 'N')
6855 xmlParseEntityDecl(ctxt
);
6860 xmlParseAttributeListDecl(ctxt
);
6863 xmlParseNotationDecl(ctxt
);
6866 xmlParseComment(ctxt
);
6869 /* there is an error but it will be detected later */
6873 } else if (NXT(1) == '?') {
6879 * detect requirement to exit there and act accordingly
6880 * and avoid having instate overridden later on
6882 if (ctxt
->instate
== XML_PARSER_EOF
)
6885 ctxt
->instate
= XML_PARSER_DTD
;
6890 * @ctxt: an XML parser context
6892 * DEPRECATED: Internal function, don't use.
6894 * parse an XML declaration header for external entities
6896 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6900 xmlParseTextDecl(xmlParserCtxtPtr ctxt
) {
6902 const xmlChar
*encoding
;
6906 * We know that '<?xml' is here.
6908 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6911 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_STARTED
, NULL
);
6915 /* Avoid expansion of parameter entities when skipping blanks. */
6916 oldstate
= ctxt
->instate
;
6917 ctxt
->instate
= XML_PARSER_START
;
6919 if (SKIP_BLANKS
== 0) {
6920 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6921 "Space needed after '<?xml'\n");
6925 * We may have the VersionInfo here.
6927 version
= xmlParseVersionInfo(ctxt
);
6928 if (version
== NULL
)
6929 version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
6931 if (SKIP_BLANKS
== 0) {
6932 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6933 "Space needed here\n");
6936 ctxt
->input
->version
= version
;
6939 * We must have the encoding declaration
6941 encoding
= xmlParseEncodingDecl(ctxt
);
6942 if (ctxt
->instate
== XML_PARSER_EOF
)
6944 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
6946 * The XML REC instructs us to stop parsing right here
6948 ctxt
->instate
= oldstate
;
6951 if ((encoding
== NULL
) && (ctxt
->errNo
== XML_ERR_OK
)) {
6952 xmlFatalErrMsg(ctxt
, XML_ERR_MISSING_ENCODING
,
6953 "Missing encoding in text declaration\n");
6957 if ((RAW
== '?') && (NXT(1) == '>')) {
6959 } else if (RAW
== '>') {
6960 /* Deprecated old WD ... */
6961 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6966 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6967 while ((c
= CUR
) != 0) {
6974 ctxt
->instate
= oldstate
;
6978 * xmlParseExternalSubset:
6979 * @ctxt: an XML parser context
6980 * @ExternalID: the external identifier
6981 * @SystemID: the system identifier (or URL)
6983 * parse Markup declarations from an external subset
6985 * [30] extSubset ::= textDecl? extSubsetDecl
6987 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6990 xmlParseExternalSubset(xmlParserCtxtPtr ctxt
, const xmlChar
*ExternalID
,
6991 const xmlChar
*SystemID
) {
6992 xmlDetectSAX2(ctxt
);
6995 if ((ctxt
->encoding
== NULL
) &&
6996 (ctxt
->input
->end
- ctxt
->input
->cur
>= 4)) {
6998 xmlCharEncoding enc
;
7004 enc
= xmlDetectCharEncoding(start
, 4);
7005 if (enc
!= XML_CHAR_ENCODING_NONE
)
7006 xmlSwitchEncoding(ctxt
, enc
);
7009 if (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) {
7010 xmlParseTextDecl(ctxt
);
7011 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
7013 * The XML REC instructs us to stop parsing right here
7015 xmlHaltParser(ctxt
);
7019 if (ctxt
->myDoc
== NULL
) {
7020 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
7021 if (ctxt
->myDoc
== NULL
) {
7022 xmlErrMemory(ctxt
, "New Doc failed");
7025 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
7027 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->myDoc
->intSubset
== NULL
))
7028 xmlCreateIntSubset(ctxt
->myDoc
, NULL
, ExternalID
, SystemID
);
7030 ctxt
->instate
= XML_PARSER_DTD
;
7033 while ((ctxt
->instate
!= XML_PARSER_EOF
) && (RAW
!= 0)) {
7035 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7036 xmlParseConditionalSections(ctxt
);
7037 } else if ((RAW
== '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7038 xmlParseMarkupDecl(ctxt
);
7040 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7041 xmlHaltParser(ctxt
);
7049 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
7055 * xmlParseReference:
7056 * @ctxt: an XML parser context
7058 * DEPRECATED: Internal function, don't use.
7060 * parse and handle entity references in content, depending on the SAX
7061 * interface, this may end-up in a call to character() if this is a
7062 * CharRef, a predefined entity, if there is no reference() callback.
7063 * or if the parser was asked to switch to that mode.
7065 * Always consumes '&'.
7067 * [67] Reference ::= EntityRef | CharRef
7070 xmlParseReference(xmlParserCtxtPtr ctxt
) {
7074 xmlNodePtr list
= NULL
;
7075 xmlParserErrors ret
= XML_ERR_OK
;
7082 * Simple case of a CharRef
7084 if (NXT(1) == '#') {
7088 int value
= xmlParseCharRef(ctxt
);
7092 if (ctxt
->charset
!= XML_CHAR_ENCODING_UTF8
) {
7094 * So we are using non-UTF-8 buffers
7095 * Check that the char fit on 8bits, if not
7096 * generate a CharRef.
7098 if (value
<= 0xFF) {
7101 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7102 (!ctxt
->disableSAX
))
7103 ctxt
->sax
->characters(ctxt
->userData
, out
, 1);
7105 if ((hex
== 'x') || (hex
== 'X'))
7106 snprintf((char *)out
, sizeof(out
), "#x%X", value
);
7108 snprintf((char *)out
, sizeof(out
), "#%d", value
);
7109 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7110 (!ctxt
->disableSAX
))
7111 ctxt
->sax
->reference(ctxt
->userData
, out
);
7115 * Just encode the value in UTF-8
7117 COPY_BUF(0 ,out
, i
, value
);
7119 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7120 (!ctxt
->disableSAX
))
7121 ctxt
->sax
->characters(ctxt
->userData
, out
, i
);
7127 * We are seeing an entity reference
7129 ent
= xmlParseEntityRef(ctxt
);
7130 if (ent
== NULL
) return;
7131 if (!ctxt
->wellFormed
)
7133 was_checked
= ent
->flags
& XML_ENT_PARSED
;
7135 /* special case of predefined entities */
7136 if ((ent
->name
== NULL
) ||
7137 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
7139 if (val
== NULL
) return;
7141 * inline the entity.
7143 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
7144 (!ctxt
->disableSAX
))
7145 ctxt
->sax
->characters(ctxt
->userData
, val
, xmlStrlen(val
));
7150 * The first reference to the entity trigger a parsing phase
7151 * where the ent->children is filled with the result from
7153 * Note: external parsed entities will not be loaded, it is not
7154 * required for a non-validating parser, unless the parsing option
7155 * of validating, or substituting entities were given. Doing so is
7156 * far more secure as the parser will only process data coming from
7157 * the document entity by default.
7159 if (((ent
->flags
& XML_ENT_PARSED
) == 0) &&
7160 ((ent
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
) ||
7161 (ctxt
->options
& (XML_PARSE_NOENT
| XML_PARSE_DTDVALID
)))) {
7162 unsigned long oldsizeentcopy
= ctxt
->sizeentcopy
;
7165 * This is a bit hackish but this seems the best
7166 * way to make sure both SAX and DOM entity support
7170 if (ctxt
->userData
== ctxt
)
7173 user_data
= ctxt
->userData
;
7175 /* Avoid overflow as much as possible */
7176 ctxt
->sizeentcopy
= 0;
7178 if (ent
->flags
& XML_ENT_EXPANDING
) {
7179 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7180 xmlHaltParser(ctxt
);
7184 ent
->flags
|= XML_ENT_EXPANDING
;
7187 * Check that this entity is well formed
7188 * 4.3.2: An internal general parsed entity is well-formed
7189 * if its replacement text matches the production labeled
7192 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7194 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
, ent
->content
,
7198 } else if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7200 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
, ctxt
->sax
,
7201 user_data
, ctxt
->depth
, ent
->URI
,
7202 ent
->ExternalID
, &list
);
7205 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7206 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7207 "invalid entity type found\n", NULL
);
7210 ent
->flags
&= ~XML_ENT_EXPANDING
;
7211 ent
->flags
|= XML_ENT_PARSED
| XML_ENT_CHECKED
;
7212 ent
->expandedSize
= ctxt
->sizeentcopy
;
7213 if (ret
== XML_ERR_ENTITY_LOOP
) {
7214 xmlHaltParser(ctxt
);
7215 xmlFreeNodeList(list
);
7218 if (xmlParserEntityCheck(ctxt
, oldsizeentcopy
)) {
7219 xmlFreeNodeList(list
);
7223 if ((ret
== XML_ERR_OK
) && (list
!= NULL
)) {
7224 ent
->children
= list
;
7226 * Prune it directly in the generated document
7227 * except for single text nodes.
7229 if ((ctxt
->replaceEntities
== 0) ||
7230 (ctxt
->parseMode
== XML_PARSE_READER
) ||
7231 ((list
->type
== XML_TEXT_NODE
) &&
7232 (list
->next
== NULL
))) {
7234 while (list
!= NULL
) {
7235 list
->parent
= (xmlNodePtr
) ent
;
7236 if (list
->doc
!= ent
->doc
)
7237 xmlSetTreeDoc(list
, ent
->doc
);
7238 if (list
->next
== NULL
)
7245 while (list
!= NULL
) {
7246 list
->parent
= (xmlNodePtr
) ctxt
->node
;
7247 list
->doc
= ctxt
->myDoc
;
7248 if (list
->next
== NULL
)
7252 list
= ent
->children
;
7253 #ifdef LIBXML_LEGACY_ENABLED
7254 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7255 xmlAddEntityReference(ent
, list
, NULL
);
7256 #endif /* LIBXML_LEGACY_ENABLED */
7258 } else if ((ret
!= XML_ERR_OK
) &&
7259 (ret
!= XML_WAR_UNDECLARED_ENTITY
)) {
7260 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7261 "Entity '%s' failed to parse\n", ent
->name
);
7262 if (ent
->content
!= NULL
)
7263 ent
->content
[0] = 0;
7264 } else if (list
!= NULL
) {
7265 xmlFreeNodeList(list
);
7269 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7274 * Now that the entity content has been gathered
7275 * provide it to the application, this can take different forms based
7276 * on the parsing modes.
7278 if (ent
->children
== NULL
) {
7280 * Probably running in SAX mode and the callbacks don't
7281 * build the entity content. So unless we already went
7282 * though parsing for first checking go though the entity
7283 * content to generate callbacks associated to the entity
7285 if (was_checked
!= 0) {
7288 * This is a bit hackish but this seems the best
7289 * way to make sure both SAX and DOM entity support
7292 if (ctxt
->userData
== ctxt
)
7295 user_data
= ctxt
->userData
;
7297 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
7299 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
,
7300 ent
->content
, user_data
, NULL
);
7302 } else if (ent
->etype
==
7303 XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
7304 unsigned long oldsizeentities
= ctxt
->sizeentities
;
7307 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
,
7308 ctxt
->sax
, user_data
, ctxt
->depth
,
7309 ent
->URI
, ent
->ExternalID
, NULL
);
7312 /* Undo the change to sizeentities */
7313 ctxt
->sizeentities
= oldsizeentities
;
7315 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
7316 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7317 "invalid entity type found\n", NULL
);
7319 if (ret
== XML_ERR_ENTITY_LOOP
) {
7320 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7323 if (xmlParserEntityCheck(ctxt
, 0))
7326 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7327 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7329 * Entity reference callback comes second, it's somewhat
7330 * superfluous but a compatibility to historical behaviour
7332 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7338 * We also check for amplification if entities aren't substituted.
7339 * They might be expanded later.
7341 if ((was_checked
!= 0) &&
7342 (xmlParserEntityCheck(ctxt
, ent
->expandedSize
)))
7346 * If we didn't get any children for the entity being built
7348 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
7349 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
7353 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
7357 if (ctxt
->replaceEntities
) {
7359 * There is a problem on the handling of _private for entities
7360 * (bug 155816): Should we copy the content of the field from
7361 * the entity (possibly overwriting some value set by the user
7362 * when a copy is created), should we leave it alone, or should
7363 * we try to take care of different situations? The problem
7364 * is exacerbated by the usage of this field by the xmlReader.
7365 * To fix this bug, we look at _private on the created node
7366 * and, if it's NULL, we copy in whatever was in the entity.
7367 * If it's not NULL we leave it alone. This is somewhat of a
7368 * hack - maybe we should have further tests to determine
7371 if (ctxt
->node
!= NULL
) {
7373 * Seems we are generating the DOM content, do
7374 * a simple tree copy for all references except the first
7375 * In the first occurrence list contains the replacement.
7377 if (((list
== NULL
) && (ent
->owner
== 0)) ||
7378 (ctxt
->parseMode
== XML_PARSE_READER
)) {
7379 xmlNodePtr nw
= NULL
, cur
, firstChild
= NULL
;
7382 * when operating on a reader, the entities definitions
7383 * are always owning the entities subtree.
7384 if (ctxt->parseMode == XML_PARSE_READER)
7388 cur
= ent
->children
;
7389 while (cur
!= NULL
) {
7390 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7392 if (nw
->_private
== NULL
)
7393 nw
->_private
= cur
->_private
;
7394 if (firstChild
== NULL
){
7397 nw
= xmlAddChild(ctxt
->node
, nw
);
7399 if (cur
== ent
->last
) {
7401 * needed to detect some strange empty
7402 * node cases in the reader tests
7404 if ((ctxt
->parseMode
== XML_PARSE_READER
) &&
7406 (nw
->type
== XML_ELEMENT_NODE
) &&
7407 (nw
->children
== NULL
))
7414 #ifdef LIBXML_LEGACY_ENABLED
7415 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7416 xmlAddEntityReference(ent
, firstChild
, nw
);
7417 #endif /* LIBXML_LEGACY_ENABLED */
7418 } else if ((list
== NULL
) || (ctxt
->inputNr
> 0)) {
7419 xmlNodePtr nw
= NULL
, cur
, next
, last
,
7423 * Copy the entity child list and make it the new
7424 * entity child list. The goal is to make sure any
7425 * ID or REF referenced will be the one from the
7426 * document content and not the entity copy.
7428 cur
= ent
->children
;
7429 ent
->children
= NULL
;
7432 while (cur
!= NULL
) {
7436 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7438 if (nw
->_private
== NULL
)
7439 nw
->_private
= cur
->_private
;
7440 if (firstChild
== NULL
){
7443 xmlAddChild((xmlNodePtr
) ent
, nw
);
7445 xmlAddChild(ctxt
->node
, cur
);
7450 if (ent
->owner
== 0)
7452 #ifdef LIBXML_LEGACY_ENABLED
7453 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7454 xmlAddEntityReference(ent
, firstChild
, nw
);
7455 #endif /* LIBXML_LEGACY_ENABLED */
7457 const xmlChar
*nbktext
;
7460 * the name change is to avoid coalescing of the
7461 * node with a possible previous text one which
7462 * would make ent->children a dangling pointer
7464 nbktext
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"nbktext",
7466 if (ent
->children
->type
== XML_TEXT_NODE
)
7467 ent
->children
->name
= nbktext
;
7468 if ((ent
->last
!= ent
->children
) &&
7469 (ent
->last
->type
== XML_TEXT_NODE
))
7470 ent
->last
->name
= nbktext
;
7471 xmlAddChildList(ctxt
->node
, ent
->children
);
7475 * This is to avoid a nasty side effect, see
7476 * characters() in SAX.c
7486 * xmlParseEntityRef:
7487 * @ctxt: an XML parser context
7489 * DEPRECATED: Internal function, don't use.
7491 * Parse an entitiy reference. Always consumes '&'.
7493 * [68] EntityRef ::= '&' Name ';'
7495 * [ WFC: Entity Declared ]
7496 * In a document without any DTD, a document with only an internal DTD
7497 * subset which contains no parameter entity references, or a document
7498 * with "standalone='yes'", the Name given in the entity reference
7499 * must match that in an entity declaration, except that well-formed
7500 * documents need not declare any of the following entities: amp, lt,
7501 * gt, apos, quot. The declaration of a parameter entity must precede
7502 * any reference to it. Similarly, the declaration of a general entity
7503 * must precede any reference to it which appears in a default value in an
7504 * attribute-list declaration. Note that if entities are declared in the
7505 * external subset or in external parameter entities, a non-validating
7506 * processor is not obligated to read and process their declarations;
7507 * for such documents, the rule that an entity must be declared is a
7508 * well-formedness constraint only if standalone='yes'.
7510 * [ WFC: Parsed Entity ]
7511 * An entity reference must not contain the name of an unparsed entity
7513 * Returns the xmlEntityPtr if found, or NULL otherwise.
7516 xmlParseEntityRef(xmlParserCtxtPtr ctxt
) {
7517 const xmlChar
*name
;
7518 xmlEntityPtr ent
= NULL
;
7521 if (ctxt
->instate
== XML_PARSER_EOF
)
7527 name
= xmlParseName(ctxt
);
7529 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7530 "xmlParseEntityRef: no name\n");
7534 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7540 * Predefined entities override any extra definition
7542 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7543 ent
= xmlGetPredefinedEntity(name
);
7549 * Ask first SAX for entity resolution, otherwise try the
7550 * entities which may have stored in the parser context.
7552 if (ctxt
->sax
!= NULL
) {
7553 if (ctxt
->sax
->getEntity
!= NULL
)
7554 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7555 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7556 (ctxt
->options
& XML_PARSE_OLDSAX
))
7557 ent
= xmlGetPredefinedEntity(name
);
7558 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7559 (ctxt
->userData
==ctxt
)) {
7560 ent
= xmlSAX2GetEntity(ctxt
, name
);
7563 if (ctxt
->instate
== XML_PARSER_EOF
)
7566 * [ WFC: Entity Declared ]
7567 * In a document without any DTD, a document with only an
7568 * internal DTD subset which contains no parameter entity
7569 * references, or a document with "standalone='yes'", the
7570 * Name given in the entity reference must match that in an
7571 * entity declaration, except that well-formed documents
7572 * need not declare any of the following entities: amp, lt,
7574 * The declaration of a parameter entity must precede any
7576 * Similarly, the declaration of a general entity must
7577 * precede any reference to it which appears in a default
7578 * value in an attribute-list declaration. Note that if
7579 * entities are declared in the external subset or in
7580 * external parameter entities, a non-validating processor
7581 * is not obligated to read and process their declarations;
7582 * for such documents, the rule that an entity must be
7583 * declared is a well-formedness constraint only if
7587 if ((ctxt
->standalone
== 1) ||
7588 ((ctxt
->hasExternalSubset
== 0) &&
7589 (ctxt
->hasPErefs
== 0))) {
7590 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7591 "Entity '%s' not defined\n", name
);
7593 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7594 "Entity '%s' not defined\n", name
);
7595 if ((ctxt
->inSubset
== 0) &&
7596 (ctxt
->sax
!= NULL
) &&
7597 (ctxt
->sax
->reference
!= NULL
)) {
7598 ctxt
->sax
->reference(ctxt
->userData
, name
);
7605 * [ WFC: Parsed Entity ]
7606 * An entity reference must not contain the name of an
7609 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7610 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7611 "Entity reference to unparsed entity %s\n", name
);
7615 * [ WFC: No External Entity References ]
7616 * Attribute values cannot contain direct or indirect
7617 * entity references to external entities.
7619 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7620 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7621 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7622 "Attribute references external entity '%s'\n", name
);
7625 * [ WFC: No < in Attribute Values ]
7626 * The replacement text of any entity referred to directly or
7627 * indirectly in an attribute value (other than "<") must
7630 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7631 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
)) {
7632 if ((ent
->flags
& XML_ENT_CHECKED_LT
) == 0) {
7633 if ((ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<')))
7634 ent
->flags
|= XML_ENT_CONTAINS_LT
;
7635 ent
->flags
|= XML_ENT_CHECKED_LT
;
7637 if (ent
->flags
& XML_ENT_CONTAINS_LT
)
7638 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7639 "'<' in entity '%s' is not allowed in attributes "
7644 * Internal check, no parameter entities here ...
7647 switch (ent
->etype
) {
7648 case XML_INTERNAL_PARAMETER_ENTITY
:
7649 case XML_EXTERNAL_PARAMETER_ENTITY
:
7650 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7651 "Attempt to reference the parameter entity '%s'\n",
7660 * [ WFC: No Recursion ]
7661 * A parsed entity must not contain a recursive reference
7662 * to itself, either directly or indirectly.
7663 * Done somewhere else
7669 * xmlParseStringEntityRef:
7670 * @ctxt: an XML parser context
7671 * @str: a pointer to an index in the string
7673 * parse ENTITY references declarations, but this version parses it from
7676 * [68] EntityRef ::= '&' Name ';'
7678 * [ WFC: Entity Declared ]
7679 * In a document without any DTD, a document with only an internal DTD
7680 * subset which contains no parameter entity references, or a document
7681 * with "standalone='yes'", the Name given in the entity reference
7682 * must match that in an entity declaration, except that well-formed
7683 * documents need not declare any of the following entities: amp, lt,
7684 * gt, apos, quot. The declaration of a parameter entity must precede
7685 * any reference to it. Similarly, the declaration of a general entity
7686 * must precede any reference to it which appears in a default value in an
7687 * attribute-list declaration. Note that if entities are declared in the
7688 * external subset or in external parameter entities, a non-validating
7689 * processor is not obligated to read and process their declarations;
7690 * for such documents, the rule that an entity must be declared is a
7691 * well-formedness constraint only if standalone='yes'.
7693 * [ WFC: Parsed Entity ]
7694 * An entity reference must not contain the name of an unparsed entity
7696 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7697 * is updated to the current location in the string.
7700 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
7704 xmlEntityPtr ent
= NULL
;
7706 if ((str
== NULL
) || (*str
== NULL
))
7714 name
= xmlParseStringName(ctxt
, &ptr
);
7716 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7717 "xmlParseStringEntityRef: no name\n");
7722 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7731 * Predefined entities override any extra definition
7733 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7734 ent
= xmlGetPredefinedEntity(name
);
7743 * Ask first SAX for entity resolution, otherwise try the
7744 * entities which may have stored in the parser context.
7746 if (ctxt
->sax
!= NULL
) {
7747 if (ctxt
->sax
->getEntity
!= NULL
)
7748 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7749 if ((ent
== NULL
) && (ctxt
->options
& XML_PARSE_OLDSAX
))
7750 ent
= xmlGetPredefinedEntity(name
);
7751 if ((ent
== NULL
) && (ctxt
->userData
==ctxt
)) {
7752 ent
= xmlSAX2GetEntity(ctxt
, name
);
7755 if (ctxt
->instate
== XML_PARSER_EOF
) {
7761 * [ WFC: Entity Declared ]
7762 * In a document without any DTD, a document with only an
7763 * internal DTD subset which contains no parameter entity
7764 * references, or a document with "standalone='yes'", the
7765 * Name given in the entity reference must match that in an
7766 * entity declaration, except that well-formed documents
7767 * need not declare any of the following entities: amp, lt,
7769 * The declaration of a parameter entity must precede any
7771 * Similarly, the declaration of a general entity must
7772 * precede any reference to it which appears in a default
7773 * value in an attribute-list declaration. Note that if
7774 * entities are declared in the external subset or in
7775 * external parameter entities, a non-validating processor
7776 * is not obligated to read and process their declarations;
7777 * for such documents, the rule that an entity must be
7778 * declared is a well-formedness constraint only if
7782 if ((ctxt
->standalone
== 1) ||
7783 ((ctxt
->hasExternalSubset
== 0) &&
7784 (ctxt
->hasPErefs
== 0))) {
7785 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7786 "Entity '%s' not defined\n", name
);
7788 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7789 "Entity '%s' not defined\n",
7792 /* TODO ? check regressions ctxt->valid = 0; */
7796 * [ WFC: Parsed Entity ]
7797 * An entity reference must not contain the name of an
7800 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7801 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7802 "Entity reference to unparsed entity %s\n", name
);
7806 * [ WFC: No External Entity References ]
7807 * Attribute values cannot contain direct or indirect
7808 * entity references to external entities.
7810 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7811 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7812 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7813 "Attribute references external entity '%s'\n", name
);
7816 * [ WFC: No < in Attribute Values ]
7817 * The replacement text of any entity referred to directly or
7818 * indirectly in an attribute value (other than "<") must
7821 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7822 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
)) {
7823 if ((ent
->flags
& XML_ENT_CHECKED_LT
) == 0) {
7824 if ((ent
->content
!= NULL
) && (xmlStrchr(ent
->content
, '<')))
7825 ent
->flags
|= XML_ENT_CONTAINS_LT
;
7826 ent
->flags
|= XML_ENT_CHECKED_LT
;
7828 if (ent
->flags
& XML_ENT_CONTAINS_LT
)
7829 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7830 "'<' in entity '%s' is not allowed in attributes "
7835 * Internal check, no parameter entities here ...
7838 switch (ent
->etype
) {
7839 case XML_INTERNAL_PARAMETER_ENTITY
:
7840 case XML_EXTERNAL_PARAMETER_ENTITY
:
7841 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7842 "Attempt to reference the parameter entity '%s'\n",
7851 * [ WFC: No Recursion ]
7852 * A parsed entity must not contain a recursive reference
7853 * to itself, either directly or indirectly.
7854 * Done somewhere else
7863 * xmlParsePEReference:
7864 * @ctxt: an XML parser context
7866 * DEPRECATED: Internal function, don't use.
7868 * Parse a parameter entity reference. Always consumes '%'.
7870 * The entity content is handled directly by pushing it's content as
7871 * a new input stream.
7873 * [69] PEReference ::= '%' Name ';'
7875 * [ WFC: No Recursion ]
7876 * A parsed entity must not contain a recursive
7877 * reference to itself, either directly or indirectly.
7879 * [ WFC: Entity Declared ]
7880 * In a document without any DTD, a document with only an internal DTD
7881 * subset which contains no parameter entity references, or a document
7882 * with "standalone='yes'", ... ... The declaration of a parameter
7883 * entity must precede any reference to it...
7885 * [ VC: Entity Declared ]
7886 * In a document with an external subset or external parameter entities
7887 * with "standalone='no'", ... ... The declaration of a parameter entity
7888 * must precede any reference to it...
7891 * Parameter-entity references may only appear in the DTD.
7892 * NOTE: misleading but this is handled.
7895 xmlParsePEReference(xmlParserCtxtPtr ctxt
)
7897 const xmlChar
*name
;
7898 xmlEntityPtr entity
= NULL
;
7899 xmlParserInputPtr input
;
7904 name
= xmlParseName(ctxt
);
7906 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_NO_NAME
, "PEReference: no name\n");
7909 if (xmlParserDebugEntities
)
7910 xmlGenericError(xmlGenericErrorContext
,
7911 "PEReference: %s\n", name
);
7913 xmlFatalErr(ctxt
, XML_ERR_PEREF_SEMICOL_MISSING
, NULL
);
7920 * Request the entity from SAX
7922 if ((ctxt
->sax
!= NULL
) &&
7923 (ctxt
->sax
->getParameterEntity
!= NULL
))
7924 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
7925 if (ctxt
->instate
== XML_PARSER_EOF
)
7927 if (entity
== NULL
) {
7929 * [ WFC: Entity Declared ]
7930 * In a document without any DTD, a document with only an
7931 * internal DTD subset which contains no parameter entity
7932 * references, or a document with "standalone='yes'", ...
7933 * ... The declaration of a parameter entity must precede
7934 * any reference to it...
7936 if ((ctxt
->standalone
== 1) ||
7937 ((ctxt
->hasExternalSubset
== 0) &&
7938 (ctxt
->hasPErefs
== 0))) {
7939 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7940 "PEReference: %%%s; not found\n",
7944 * [ VC: Entity Declared ]
7945 * In a document with an external subset or external
7946 * parameter entities with "standalone='no'", ...
7947 * ... The declaration of a parameter entity must
7948 * precede any reference to it...
7950 if ((ctxt
->validate
) && (ctxt
->vctxt
.error
!= NULL
)) {
7951 xmlValidityError(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7952 "PEReference: %%%s; not found\n",
7955 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7956 "PEReference: %%%s; not found\n",
7962 * Internal checking in case the entity quest barfed
7964 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
7965 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
7966 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7967 "Internal: %%%s; is not a parameter entity\n",
7971 xmlCharEncoding enc
;
7972 unsigned long parentConsumed
;
7973 xmlEntityPtr oldEnt
;
7975 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
7976 ((ctxt
->options
& XML_PARSE_NOENT
) == 0) &&
7977 ((ctxt
->options
& XML_PARSE_DTDVALID
) == 0) &&
7978 ((ctxt
->options
& XML_PARSE_DTDLOAD
) == 0) &&
7979 ((ctxt
->options
& XML_PARSE_DTDATTR
) == 0) &&
7980 (ctxt
->replaceEntities
== 0) &&
7981 (ctxt
->validate
== 0))
7984 if (entity
->flags
& XML_ENT_EXPANDING
) {
7985 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
7986 xmlHaltParser(ctxt
);
7990 /* Must be computed from old input before pushing new input. */
7991 parentConsumed
= ctxt
->input
->parentConsumed
;
7992 oldEnt
= ctxt
->input
->entity
;
7993 if ((oldEnt
== NULL
) ||
7994 ((oldEnt
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
7995 ((oldEnt
->flags
& XML_ENT_PARSED
) == 0))) {
7996 xmlSaturatedAdd(&parentConsumed
, ctxt
->input
->consumed
);
7997 xmlSaturatedAddSizeT(&parentConsumed
,
7998 ctxt
->input
->cur
- ctxt
->input
->base
);
8001 input
= xmlNewEntityInputStream(ctxt
, entity
);
8002 if (xmlPushInput(ctxt
, input
) < 0) {
8003 xmlFreeInputStream(input
);
8007 entity
->flags
|= XML_ENT_EXPANDING
;
8009 input
->parentConsumed
= parentConsumed
;
8011 if (entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) {
8013 * Get the 4 first bytes and decode the charset
8014 * if enc != XML_CHAR_ENCODING_NONE
8015 * plug some encoding conversion routines.
8016 * Note that, since we may have some non-UTF8
8017 * encoding (like UTF16, bug 135229), the 'length'
8018 * is not known, but we can calculate based upon
8019 * the amount of data in the buffer.
8022 if (ctxt
->instate
== XML_PARSER_EOF
)
8024 if ((ctxt
->input
->end
- ctxt
->input
->cur
)>=4) {
8029 enc
= xmlDetectCharEncoding(start
, 4);
8030 if (enc
!= XML_CHAR_ENCODING_NONE
) {
8031 xmlSwitchEncoding(ctxt
, enc
);
8035 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) &&
8036 (IS_BLANK_CH(NXT(5)))) {
8037 xmlParseTextDecl(ctxt
);
8042 ctxt
->hasPErefs
= 1;
8046 * xmlLoadEntityContent:
8047 * @ctxt: an XML parser context
8048 * @entity: an unloaded system entity
8050 * Load the original content of the given system entity from the
8051 * ExternalID/SystemID given. This is to be used for Included in Literal
8052 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8054 * Returns 0 in case of success and -1 in case of failure
8057 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
8058 xmlParserInputPtr input
;
8062 if ((ctxt
== NULL
) || (entity
== NULL
) ||
8063 ((entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
) &&
8064 (entity
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) ||
8065 (entity
->content
!= NULL
)) {
8066 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8067 "xmlLoadEntityContent parameter error");
8071 if (xmlParserDebugEntities
)
8072 xmlGenericError(xmlGenericErrorContext
,
8073 "Reading %s entity content input\n", entity
->name
);
8075 buf
= xmlBufferCreate();
8077 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8078 "xmlLoadEntityContent parameter error");
8081 xmlBufferSetAllocationScheme(buf
, XML_BUFFER_ALLOC_DOUBLEIT
);
8083 input
= xmlNewEntityInputStream(ctxt
, entity
);
8084 if (input
== NULL
) {
8085 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8086 "xmlLoadEntityContent input error");
8092 * Push the entity as the current input, read char by char
8093 * saving to the buffer until the end of the entity or an error
8095 if (xmlPushInput(ctxt
, input
) < 0) {
8097 xmlFreeInputStream(input
);
8103 while ((ctxt
->input
== input
) && (ctxt
->input
->cur
< ctxt
->input
->end
) &&
8105 xmlBufferAdd(buf
, ctxt
->input
->cur
, l
);
8109 if (ctxt
->instate
== XML_PARSER_EOF
) {
8114 if ((ctxt
->input
== input
) && (ctxt
->input
->cur
>= ctxt
->input
->end
)) {
8115 xmlSaturatedAdd(&ctxt
->sizeentities
, ctxt
->input
->consumed
);
8117 } else if (!IS_CHAR(c
)) {
8118 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
8119 "xmlLoadEntityContent: invalid char value %d\n",
8124 entity
->content
= buf
->content
;
8125 entity
->length
= buf
->use
;
8126 buf
->content
= NULL
;
8133 * xmlParseStringPEReference:
8134 * @ctxt: an XML parser context
8135 * @str: a pointer to an index in the string
8137 * parse PEReference declarations
8139 * [69] PEReference ::= '%' Name ';'
8141 * [ WFC: No Recursion ]
8142 * A parsed entity must not contain a recursive
8143 * reference to itself, either directly or indirectly.
8145 * [ WFC: Entity Declared ]
8146 * In a document without any DTD, a document with only an internal DTD
8147 * subset which contains no parameter entity references, or a document
8148 * with "standalone='yes'", ... ... The declaration of a parameter
8149 * entity must precede any reference to it...
8151 * [ VC: Entity Declared ]
8152 * In a document with an external subset or external parameter entities
8153 * with "standalone='no'", ... ... The declaration of a parameter entity
8154 * must precede any reference to it...
8157 * Parameter-entity references may only appear in the DTD.
8158 * NOTE: misleading but this is handled.
8160 * Returns the string of the entity content.
8161 * str is updated to the current value of the index
8164 xmlParseStringPEReference(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
8168 xmlEntityPtr entity
= NULL
;
8170 if ((str
== NULL
) || (*str
== NULL
)) return(NULL
);
8176 name
= xmlParseStringName(ctxt
, &ptr
);
8178 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8179 "xmlParseStringPEReference: no name\n");
8185 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
8193 * Request the entity from SAX
8195 if ((ctxt
->sax
!= NULL
) &&
8196 (ctxt
->sax
->getParameterEntity
!= NULL
))
8197 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
8198 if (ctxt
->instate
== XML_PARSER_EOF
) {
8203 if (entity
== NULL
) {
8205 * [ WFC: Entity Declared ]
8206 * In a document without any DTD, a document with only an
8207 * internal DTD subset which contains no parameter entity
8208 * references, or a document with "standalone='yes'", ...
8209 * ... The declaration of a parameter entity must precede
8210 * any reference to it...
8212 if ((ctxt
->standalone
== 1) ||
8213 ((ctxt
->hasExternalSubset
== 0) && (ctxt
->hasPErefs
== 0))) {
8214 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
8215 "PEReference: %%%s; not found\n", name
);
8218 * [ VC: Entity Declared ]
8219 * In a document with an external subset or external
8220 * parameter entities with "standalone='no'", ...
8221 * ... The declaration of a parameter entity must
8222 * precede any reference to it...
8224 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8225 "PEReference: %%%s; not found\n",
8231 * Internal checking in case the entity quest barfed
8233 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
8234 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
8235 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
8236 "%%%s; is not a parameter entity\n",
8240 ctxt
->hasPErefs
= 1;
8247 * xmlParseDocTypeDecl:
8248 * @ctxt: an XML parser context
8250 * DEPRECATED: Internal function, don't use.
8252 * parse a DOCTYPE declaration
8254 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8255 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8257 * [ VC: Root Element Type ]
8258 * The Name in the document type declaration must match the element
8259 * type of the root element.
8263 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt
) {
8264 const xmlChar
*name
= NULL
;
8265 xmlChar
*ExternalID
= NULL
;
8266 xmlChar
*URI
= NULL
;
8269 * We know that '<!DOCTYPE' has been detected.
8276 * Parse the DOCTYPE name.
8278 name
= xmlParseName(ctxt
);
8280 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8281 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8283 ctxt
->intSubName
= name
;
8288 * Check for SystemID and ExternalID
8290 URI
= xmlParseExternalID(ctxt
, &ExternalID
, 1);
8292 if ((URI
!= NULL
) || (ExternalID
!= NULL
)) {
8293 ctxt
->hasExternalSubset
= 1;
8295 ctxt
->extSubURI
= URI
;
8296 ctxt
->extSubSystem
= ExternalID
;
8301 * Create and update the internal subset.
8303 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->internalSubset
!= NULL
) &&
8304 (!ctxt
->disableSAX
))
8305 ctxt
->sax
->internalSubset(ctxt
->userData
, name
, ExternalID
, URI
);
8306 if (ctxt
->instate
== XML_PARSER_EOF
)
8310 * Is there any internal subset declarations ?
8311 * they are handled separately in xmlParseInternalSubset()
8317 * We should be at the end of the DOCTYPE declaration.
8320 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8326 * xmlParseInternalSubset:
8327 * @ctxt: an XML parser context
8329 * parse the internal subset declaration
8331 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8335 xmlParseInternalSubset(xmlParserCtxtPtr ctxt
) {
8337 * Is there any DTD definition ?
8340 int baseInputNr
= ctxt
->inputNr
;
8341 ctxt
->instate
= XML_PARSER_DTD
;
8344 * Parse the succession of Markup declarations and
8346 * Subsequence (markupdecl | PEReference | S)*
8349 while (((RAW
!= ']') || (ctxt
->inputNr
> baseInputNr
)) &&
8350 (ctxt
->instate
!= XML_PARSER_EOF
)) {
8353 * Conditional sections are allowed from external entities included
8354 * by PE References in the internal subset.
8356 if ((ctxt
->inputNr
> 1) && (ctxt
->input
->filename
!= NULL
) &&
8357 (RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8358 xmlParseConditionalSections(ctxt
);
8359 } else if ((RAW
== '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8360 xmlParseMarkupDecl(ctxt
);
8361 } else if (RAW
== '%') {
8362 xmlParsePEReference(ctxt
);
8364 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8365 "xmlParseInternalSubset: error detected in"
8366 " Markup declaration\n");
8367 xmlHaltParser(ctxt
);
8381 * We should be at the end of the DOCTYPE declaration.
8384 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
8390 #ifdef LIBXML_SAX1_ENABLED
8392 * xmlParseAttribute:
8393 * @ctxt: an XML parser context
8394 * @value: a xmlChar ** used to store the value of the attribute
8396 * DEPRECATED: Internal function, don't use.
8398 * parse an attribute
8400 * [41] Attribute ::= Name Eq AttValue
8402 * [ WFC: No External Entity References ]
8403 * Attribute values cannot contain direct or indirect entity references
8404 * to external entities.
8406 * [ WFC: No < in Attribute Values ]
8407 * The replacement text of any entity referred to directly or indirectly in
8408 * an attribute value (other than "<") must not contain a <.
8410 * [ VC: Attribute Value Type ]
8411 * The attribute must have been declared; the value must be of the type
8414 * [25] Eq ::= S? '=' S?
8418 * [NS 11] Attribute ::= QName Eq AttValue
8420 * Also the case QName == xmlns:??? is handled independently as a namespace
8423 * Returns the attribute name, and the value in *value.
8427 xmlParseAttribute(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
8428 const xmlChar
*name
;
8433 name
= xmlParseName(ctxt
);
8435 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8436 "error parsing attribute name\n");
8447 val
= xmlParseAttValue(ctxt
);
8448 ctxt
->instate
= XML_PARSER_CONTENT
;
8450 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
8451 "Specification mandates value for attribute %s\n", name
);
8456 * Check that xml:lang conforms to the specification
8457 * No more registered as an error, just generate a warning now
8458 * since this was deprecated in XML second edition
8460 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"xml:lang"))) {
8461 if (!xmlCheckLanguageID(val
)) {
8462 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
8463 "Malformed value for xml:lang : %s\n",
8469 * Check that xml:space conforms to the specification
8471 if (xmlStrEqual(name
, BAD_CAST
"xml:space")) {
8472 if (xmlStrEqual(val
, BAD_CAST
"default"))
8474 else if (xmlStrEqual(val
, BAD_CAST
"preserve"))
8477 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
8478 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8489 * @ctxt: an XML parser context
8491 * DEPRECATED: Internal function, don't use.
8493 * Parse a start tag. Always consumes '<'.
8495 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8497 * [ WFC: Unique Att Spec ]
8498 * No attribute name may appear more than once in the same start-tag or
8499 * empty-element tag.
8501 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8503 * [ WFC: Unique Att Spec ]
8504 * No attribute name may appear more than once in the same start-tag or
8505 * empty-element tag.
8509 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8511 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8513 * Returns the element name parsed
8517 xmlParseStartTag(xmlParserCtxtPtr ctxt
) {
8518 const xmlChar
*name
;
8519 const xmlChar
*attname
;
8521 const xmlChar
**atts
= ctxt
->atts
;
8523 int maxatts
= ctxt
->maxatts
;
8526 if (RAW
!= '<') return(NULL
);
8529 name
= xmlParseName(ctxt
);
8531 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8532 "xmlParseStartTag: invalid element name\n");
8537 * Now parse the attributes, it ends up with the ending
8544 while (((RAW
!= '>') &&
8545 ((RAW
!= '/') || (NXT(1) != '>')) &&
8546 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
8547 attname
= xmlParseAttribute(ctxt
, &attvalue
);
8548 if (attname
== NULL
) {
8549 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
8550 "xmlParseStartTag: problem parsing attributes\n");
8553 if (attvalue
!= NULL
) {
8555 * [ WFC: Unique Att Spec ]
8556 * No attribute name may appear more than once in the same
8557 * start-tag or empty-element tag.
8559 for (i
= 0; i
< nbatts
;i
+= 2) {
8560 if (xmlStrEqual(atts
[i
], attname
)) {
8561 xmlErrAttributeDup(ctxt
, NULL
, attname
);
8567 * Add the pair to atts
8570 maxatts
= 22; /* allow for 10 attrs by default */
8571 atts
= (const xmlChar
**)
8572 xmlMalloc(maxatts
* sizeof(xmlChar
*));
8574 xmlErrMemory(ctxt
, NULL
);
8575 if (attvalue
!= NULL
)
8580 ctxt
->maxatts
= maxatts
;
8581 } else if (nbatts
+ 4 > maxatts
) {
8585 n
= (const xmlChar
**) xmlRealloc((void *) atts
,
8586 maxatts
* sizeof(const xmlChar
*));
8588 xmlErrMemory(ctxt
, NULL
);
8589 if (attvalue
!= NULL
)
8595 ctxt
->maxatts
= maxatts
;
8597 atts
[nbatts
++] = attname
;
8598 atts
[nbatts
++] = attvalue
;
8599 atts
[nbatts
] = NULL
;
8600 atts
[nbatts
+ 1] = NULL
;
8602 if (attvalue
!= NULL
)
8609 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
8611 if (SKIP_BLANKS
== 0) {
8612 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
8613 "attributes construct error\n");
8620 * SAX: Start of Element !
8622 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElement
!= NULL
) &&
8623 (!ctxt
->disableSAX
)) {
8625 ctxt
->sax
->startElement(ctxt
->userData
, name
, atts
);
8627 ctxt
->sax
->startElement(ctxt
->userData
, name
, NULL
);
8631 /* Free only the content strings */
8632 for (i
= 1;i
< nbatts
;i
+=2)
8633 if (atts
[i
] != NULL
)
8634 xmlFree((xmlChar
*) atts
[i
]);
8641 * @ctxt: an XML parser context
8642 * @line: line of the start tag
8643 * @nsNr: number of namespaces on the start tag
8645 * Parse an end tag. Always consumes '</'.
8647 * [42] ETag ::= '</' Name S? '>'
8651 * [NS 9] ETag ::= '</' QName S? '>'
8655 xmlParseEndTag1(xmlParserCtxtPtr ctxt
, int line
) {
8656 const xmlChar
*name
;
8659 if ((RAW
!= '<') || (NXT(1) != '/')) {
8660 xmlFatalErrMsg(ctxt
, XML_ERR_LTSLASH_REQUIRED
,
8661 "xmlParseEndTag: '</' not found\n");
8666 name
= xmlParseNameAndCompare(ctxt
,ctxt
->name
);
8669 * We should definitely be at the ending "S? '>'" part
8673 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
8674 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
8679 * [ WFC: Element Type Match ]
8680 * The Name in an element's end-tag must match the element type in the
8684 if (name
!= (xmlChar
*)1) {
8685 if (name
== NULL
) name
= BAD_CAST
"unparsable";
8686 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
8687 "Opening and ending tag mismatch: %s line %d and %s\n",
8688 ctxt
->name
, line
, name
);
8694 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
8695 (!ctxt
->disableSAX
))
8696 ctxt
->sax
->endElement(ctxt
->userData
, ctxt
->name
);
8705 * @ctxt: an XML parser context
8707 * DEPRECATED: Internal function, don't use.
8709 * parse an end of tag
8711 * [42] ETag ::= '</' Name S? '>'
8715 * [NS 9] ETag ::= '</' QName S? '>'
8719 xmlParseEndTag(xmlParserCtxtPtr ctxt
) {
8720 xmlParseEndTag1(ctxt
, 0);
8722 #endif /* LIBXML_SAX1_ENABLED */
8724 /************************************************************************
8726 * SAX 2 specific operations *
8728 ************************************************************************/
8732 * @ctxt: an XML parser context
8733 * @prefix: the prefix to lookup
8735 * Lookup the namespace name for the @prefix (which ca be NULL)
8736 * The prefix must come from the @ctxt->dict dictionary
8738 * Returns the namespace name or NULL if not bound
8740 static const xmlChar
*
8741 xmlGetNamespace(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
) {
8744 if (prefix
== ctxt
->str_xml
) return(ctxt
->str_xml_ns
);
8745 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-=2)
8746 if (ctxt
->nsTab
[i
] == prefix
) {
8747 if ((prefix
== NULL
) && (*ctxt
->nsTab
[i
+ 1] == 0))
8749 return(ctxt
->nsTab
[i
+ 1]);
8756 * @ctxt: an XML parser context
8757 * @prefix: pointer to store the prefix part
8759 * parse an XML Namespace QName
8761 * [6] QName ::= (Prefix ':')? LocalPart
8762 * [7] Prefix ::= NCName
8763 * [8] LocalPart ::= NCName
8765 * Returns the Name parsed or NULL
8768 static const xmlChar
*
8769 xmlParseQName(xmlParserCtxtPtr ctxt
, const xmlChar
**prefix
) {
8770 const xmlChar
*l
, *p
;
8773 if (ctxt
->instate
== XML_PARSER_EOF
)
8776 l
= xmlParseNCName(ctxt
);
8779 l
= xmlParseName(ctxt
);
8781 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8782 "Failed to parse QName '%s'\n", l
, NULL
, NULL
);
8792 l
= xmlParseNCName(ctxt
);
8796 if (ctxt
->instate
== XML_PARSER_EOF
)
8798 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8799 "Failed to parse QName '%s:'\n", p
, NULL
, NULL
);
8800 l
= xmlParseNmtoken(ctxt
);
8802 if (ctxt
->instate
== XML_PARSER_EOF
)
8804 tmp
= xmlBuildQName(BAD_CAST
"", p
, NULL
, 0);
8806 tmp
= xmlBuildQName(l
, p
, NULL
, 0);
8809 p
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8810 if (tmp
!= NULL
) xmlFree(tmp
);
8817 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8818 "Failed to parse QName '%s:%s:'\n", p
, l
, NULL
);
8820 tmp
= (xmlChar
*) xmlParseName(ctxt
);
8822 tmp
= xmlBuildQName(tmp
, l
, NULL
, 0);
8823 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8824 if (tmp
!= NULL
) xmlFree(tmp
);
8828 if (ctxt
->instate
== XML_PARSER_EOF
)
8830 tmp
= xmlBuildQName(BAD_CAST
"", l
, NULL
, 0);
8831 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8832 if (tmp
!= NULL
) xmlFree(tmp
);
8843 * xmlParseQNameAndCompare:
8844 * @ctxt: an XML parser context
8845 * @name: the localname
8846 * @prefix: the prefix, if any.
8848 * parse an XML name and compares for match
8849 * (specialized for endtag parsing)
8851 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8852 * and the name for mismatch
8855 static const xmlChar
*
8856 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *name
,
8857 xmlChar
const *prefix
) {
8861 const xmlChar
*prefix2
;
8863 if (prefix
== NULL
) return(xmlParseNameAndCompare(ctxt
, name
));
8866 in
= ctxt
->input
->cur
;
8869 while (*in
!= 0 && *in
== *cmp
) {
8873 if ((*cmp
== 0) && (*in
== ':')) {
8876 while (*in
!= 0 && *in
== *cmp
) {
8880 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
8882 ctxt
->input
->col
+= in
- ctxt
->input
->cur
;
8883 ctxt
->input
->cur
= in
;
8884 return((const xmlChar
*) 1);
8888 * all strings coms from the dictionary, equality can be done directly
8890 ret
= xmlParseQName (ctxt
, &prefix2
);
8891 if ((ret
== name
) && (prefix
== prefix2
))
8892 return((const xmlChar
*) 1);
8897 * xmlParseAttValueInternal:
8898 * @ctxt: an XML parser context
8899 * @len: attribute len result
8900 * @alloc: whether the attribute was reallocated as a new string
8901 * @normalize: if 1 then further non-CDATA normalization must be done
8903 * parse a value for an attribute.
8904 * NOTE: if no normalization is needed, the routine will return pointers
8905 * directly from the data buffer.
8907 * 3.3.3 Attribute-Value Normalization:
8908 * Before the value of an attribute is passed to the application or
8909 * checked for validity, the XML processor must normalize it as follows:
8910 * - a character reference is processed by appending the referenced
8911 * character to the attribute value
8912 * - an entity reference is processed by recursively processing the
8913 * replacement text of the entity
8914 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8915 * appending #x20 to the normalized value, except that only a single
8916 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8917 * parsed entity or the literal entity value of an internal parsed entity
8918 * - other characters are processed by appending them to the normalized value
8919 * If the declared value is not CDATA, then the XML processor must further
8920 * process the normalized attribute value by discarding any leading and
8921 * trailing space (#x20) characters, and by replacing sequences of space
8922 * (#x20) characters by a single space (#x20) character.
8923 * All attributes for which no declaration has been read should be treated
8924 * by a non-validating parser as if declared CDATA.
8926 * Returns the AttValue parsed or NULL. The value has to be freed by the
8927 * caller if it was copied, this can be detected by val[*len] == 0.
8930 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8931 const xmlChar *oldbase = ctxt->input->base;\
8933 if (ctxt->instate == XML_PARSER_EOF)\
8935 if (oldbase != ctxt->input->base) {\
8936 ptrdiff_t delta = ctxt->input->base - oldbase;\
8937 start = start + delta;\
8940 end = ctxt->input->end;
8943 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
, int *len
, int *alloc
,
8947 const xmlChar
*in
= NULL
, *start
, *end
, *last
;
8948 xmlChar
*ret
= NULL
;
8950 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
8951 XML_MAX_HUGE_LENGTH
:
8952 XML_MAX_TEXT_LENGTH
;
8955 in
= (xmlChar
*) CUR_PTR
;
8956 line
= ctxt
->input
->line
;
8957 col
= ctxt
->input
->col
;
8958 if (*in
!= '"' && *in
!= '\'') {
8959 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
8962 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
8965 * try to handle in this routine the most common case where no
8966 * allocation of a new string is required and where content is
8971 end
= ctxt
->input
->end
;
8974 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
8978 * Skip any leading spaces
8980 while ((in
< end
) && (*in
!= limit
) &&
8981 ((*in
== 0x20) || (*in
== 0x9) ||
8982 (*in
== 0xA) || (*in
== 0xD))) {
8991 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
8992 if ((in
- start
) > maxLength
) {
8993 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
8994 "AttValue length too long\n");
8999 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9000 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9002 if ((*in
++ == 0x20) && (*in
== 0x20)) break;
9004 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
9005 if ((in
- start
) > maxLength
) {
9006 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9007 "AttValue length too long\n");
9014 * skip the trailing blanks
9016 while ((last
[-1] == 0x20) && (last
> start
)) last
--;
9017 while ((in
< end
) && (*in
!= limit
) &&
9018 ((*in
== 0x20) || (*in
== 0x9) ||
9019 (*in
== 0xA) || (*in
== 0xD))) {
9027 const xmlChar
*oldbase
= ctxt
->input
->base
;
9029 if (ctxt
->instate
== XML_PARSER_EOF
)
9031 if (oldbase
!= ctxt
->input
->base
) {
9032 ptrdiff_t delta
= ctxt
->input
->base
- oldbase
;
9033 start
= start
+ delta
;
9035 last
= last
+ delta
;
9037 end
= ctxt
->input
->end
;
9038 if ((in
- start
) > maxLength
) {
9039 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9040 "AttValue length too long\n");
9045 if ((in
- start
) > maxLength
) {
9046 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9047 "AttValue length too long\n");
9050 if (*in
!= limit
) goto need_complex
;
9052 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
9053 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
9057 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt
, in
, start
, end
)
9058 if ((in
- start
) > maxLength
) {
9059 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9060 "AttValue length too long\n");
9066 if ((in
- start
) > maxLength
) {
9067 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
9068 "AttValue length too long\n");
9071 if (*in
!= limit
) goto need_complex
;
9076 if (alloc
) *alloc
= 0;
9077 *len
= last
- start
;
9078 ret
= (xmlChar
*) start
;
9080 if (alloc
) *alloc
= 1;
9081 ret
= xmlStrndup(start
, last
- start
);
9084 ctxt
->input
->line
= line
;
9085 ctxt
->input
->col
= col
;
9088 if (alloc
) *alloc
= 1;
9089 return xmlParseAttValueComplex(ctxt
, len
, normalize
);
9093 * xmlParseAttribute2:
9094 * @ctxt: an XML parser context
9095 * @pref: the element prefix
9096 * @elem: the element name
9097 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9098 * @value: a xmlChar ** used to store the value of the attribute
9099 * @len: an int * to save the length of the attribute
9100 * @alloc: an int * to indicate if the attribute was allocated
9102 * parse an attribute in the new SAX2 framework.
9104 * Returns the attribute name, and the value in *value, .
9107 static const xmlChar
*
9108 xmlParseAttribute2(xmlParserCtxtPtr ctxt
,
9109 const xmlChar
* pref
, const xmlChar
* elem
,
9110 const xmlChar
** prefix
, xmlChar
** value
,
9111 int *len
, int *alloc
)
9113 const xmlChar
*name
;
9114 xmlChar
*val
, *internal_val
= NULL
;
9119 name
= xmlParseQName(ctxt
, prefix
);
9121 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9122 "error parsing attribute name\n");
9127 * get the type if needed
9129 if (ctxt
->attsSpecial
!= NULL
) {
9132 type
= (int) (ptrdiff_t) xmlHashQLookup2(ctxt
->attsSpecial
,
9133 pref
, elem
, *prefix
, name
);
9145 val
= xmlParseAttValueInternal(ctxt
, len
, alloc
, normalize
);
9150 * Sometimes a second normalisation pass for spaces is needed
9151 * but that only happens if charrefs or entities references
9152 * have been used in the attribute value, i.e. the attribute
9153 * value have been extracted in an allocated string already.
9156 const xmlChar
*val2
;
9158 val2
= xmlAttrNormalizeSpace2(ctxt
, val
, len
);
9159 if ((val2
!= NULL
) && (val2
!= val
)) {
9161 val
= (xmlChar
*) val2
;
9165 ctxt
->instate
= XML_PARSER_CONTENT
;
9167 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
9168 "Specification mandates value for attribute %s\n",
9173 if (*prefix
== ctxt
->str_xml
) {
9175 * Check that xml:lang conforms to the specification
9176 * No more registered as an error, just generate a warning now
9177 * since this was deprecated in XML second edition
9179 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"lang"))) {
9180 internal_val
= xmlStrndup(val
, *len
);
9181 if (!xmlCheckLanguageID(internal_val
)) {
9182 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
9183 "Malformed value for xml:lang : %s\n",
9184 internal_val
, NULL
);
9189 * Check that xml:space conforms to the specification
9191 if (xmlStrEqual(name
, BAD_CAST
"space")) {
9192 internal_val
= xmlStrndup(val
, *len
);
9193 if (xmlStrEqual(internal_val
, BAD_CAST
"default"))
9195 else if (xmlStrEqual(internal_val
, BAD_CAST
"preserve"))
9198 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
9199 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9200 internal_val
, NULL
);
9204 xmlFree(internal_val
);
9212 * xmlParseStartTag2:
9213 * @ctxt: an XML parser context
9215 * Parse a start tag. Always consumes '<'.
9217 * This routine is called when running SAX2 parsing
9219 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9221 * [ WFC: Unique Att Spec ]
9222 * No attribute name may appear more than once in the same start-tag or
9223 * empty-element tag.
9225 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9227 * [ WFC: Unique Att Spec ]
9228 * No attribute name may appear more than once in the same start-tag or
9229 * empty-element tag.
9233 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9235 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9237 * Returns the element name parsed
9240 static const xmlChar
*
9241 xmlParseStartTag2(xmlParserCtxtPtr ctxt
, const xmlChar
**pref
,
9242 const xmlChar
**URI
, int *tlen
) {
9243 const xmlChar
*localname
;
9244 const xmlChar
*prefix
;
9245 const xmlChar
*attname
;
9246 const xmlChar
*aprefix
;
9247 const xmlChar
*nsname
;
9249 const xmlChar
**atts
= ctxt
->atts
;
9250 int maxatts
= ctxt
->maxatts
;
9251 int nratts
, nbatts
, nbdef
, inputid
;
9252 int i
, j
, nbNs
, attval
;
9254 int nsNr
= ctxt
->nsNr
;
9256 if (RAW
!= '<') return(NULL
);
9259 cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
9260 inputid
= ctxt
->input
->id
;
9266 /* Forget any namespaces added during an earlier parse of this element. */
9269 localname
= xmlParseQName(ctxt
, &prefix
);
9270 if (localname
== NULL
) {
9271 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
9272 "StartTag: invalid element name\n");
9275 *tlen
= ctxt
->input
->cur
- ctxt
->input
->base
- cur
;
9278 * Now parse the attributes, it ends up with the ending
9285 while (((RAW
!= '>') &&
9286 ((RAW
!= '/') || (NXT(1) != '>')) &&
9287 (IS_BYTE_CHAR(RAW
))) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
9288 int len
= -1, alloc
= 0;
9290 attname
= xmlParseAttribute2(ctxt
, prefix
, localname
,
9291 &aprefix
, &attvalue
, &len
, &alloc
);
9292 if (attname
== NULL
) {
9293 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9294 "xmlParseStartTag: problem parsing attributes\n");
9297 if (attvalue
== NULL
)
9299 if (len
< 0) len
= xmlStrlen(attvalue
);
9301 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9302 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9306 xmlErrMemory(ctxt
, "dictionary allocation failure");
9307 if ((attvalue
!= NULL
) && (alloc
!= 0))
9313 uri
= xmlParseURI((const char *) URL
);
9315 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9316 "xmlns: '%s' is not a valid URI\n",
9319 if (uri
->scheme
== NULL
) {
9320 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9321 "xmlns: URI %s is not absolute\n",
9326 if (URL
== ctxt
->str_xml_ns
) {
9327 if (attname
!= ctxt
->str_xml
) {
9328 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9329 "xml namespace URI cannot be the default namespace\n",
9336 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9337 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9338 "reuse of the xmlns namespace name is forbidden\n",
9344 * check that it's not a defined namespace
9346 for (j
= 1;j
<= nbNs
;j
++)
9347 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9350 xmlErrAttributeDup(ctxt
, NULL
, attname
);
9352 if (nsPush(ctxt
, NULL
, URL
) > 0) nbNs
++;
9354 } else if (aprefix
== ctxt
->str_xmlns
) {
9355 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
9358 if (attname
== ctxt
->str_xml
) {
9359 if (URL
!= ctxt
->str_xml_ns
) {
9360 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9361 "xml namespace prefix mapped to wrong URI\n",
9365 * Do not keep a namespace definition node
9369 if (URL
== ctxt
->str_xml_ns
) {
9370 if (attname
!= ctxt
->str_xml
) {
9371 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9372 "xml namespace URI mapped to wrong prefix\n",
9377 if (attname
== ctxt
->str_xmlns
) {
9378 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9379 "redefinition of the xmlns prefix is forbidden\n",
9385 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
9386 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9387 "reuse of the xmlns namespace name is forbidden\n",
9391 if ((URL
== NULL
) || (URL
[0] == 0)) {
9392 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
9393 "xmlns:%s: Empty XML namespace is not allowed\n",
9394 attname
, NULL
, NULL
);
9397 uri
= xmlParseURI((const char *) URL
);
9399 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
9400 "xmlns:%s: '%s' is not a valid URI\n",
9401 attname
, URL
, NULL
);
9403 if ((ctxt
->pedantic
) && (uri
->scheme
== NULL
)) {
9404 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
9405 "xmlns:%s: URI %s is not absolute\n",
9406 attname
, URL
, NULL
);
9413 * check that it's not a defined namespace
9415 for (j
= 1;j
<= nbNs
;j
++)
9416 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9419 xmlErrAttributeDup(ctxt
, aprefix
, attname
);
9421 if (nsPush(ctxt
, attname
, URL
) > 0) nbNs
++;
9425 * Add the pair to atts
9427 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9428 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9431 maxatts
= ctxt
->maxatts
;
9434 ctxt
->attallocs
[nratts
++] = alloc
;
9435 atts
[nbatts
++] = attname
;
9436 atts
[nbatts
++] = aprefix
;
9438 * The namespace URI field is used temporarily to point at the
9439 * base of the current input buffer for non-alloced attributes.
9440 * When the input buffer is reallocated, all the pointers become
9441 * invalid, but they can be reconstructed later.
9444 atts
[nbatts
++] = NULL
;
9446 atts
[nbatts
++] = ctxt
->input
->base
;
9447 atts
[nbatts
++] = attvalue
;
9449 atts
[nbatts
++] = attvalue
;
9451 * tag if some deallocation is needed
9453 if (alloc
!= 0) attval
= 1;
9454 attvalue
= NULL
; /* moved into atts */
9458 if ((attvalue
!= NULL
) && (alloc
!= 0)) {
9464 if (ctxt
->instate
== XML_PARSER_EOF
)
9466 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
9468 if (SKIP_BLANKS
== 0) {
9469 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
9470 "attributes construct error\n");
9476 if (ctxt
->input
->id
!= inputid
) {
9477 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9478 "Unexpected change of input\n");
9483 /* Reconstruct attribute value pointers. */
9484 for (i
= 0, j
= 0; j
< nratts
; i
+= 5, j
++) {
9485 if (atts
[i
+2] != NULL
) {
9487 * Arithmetic on dangling pointers is technically undefined
9488 * behavior, but well...
9490 const xmlChar
*old
= atts
[i
+2];
9491 atts
[i
+2] = NULL
; /* Reset repurposed namespace URI */
9492 atts
[i
+3] = ctxt
->input
->base
+ (atts
[i
+3] - old
); /* value */
9493 atts
[i
+4] = ctxt
->input
->base
+ (atts
[i
+4] - old
); /* valuend */
9498 * The attributes defaulting
9500 if (ctxt
->attsDefault
!= NULL
) {
9501 xmlDefAttrsPtr defaults
;
9503 defaults
= xmlHashLookup2(ctxt
->attsDefault
, localname
, prefix
);
9504 if (defaults
!= NULL
) {
9505 for (i
= 0;i
< defaults
->nbAttrs
;i
++) {
9506 attname
= defaults
->values
[5 * i
];
9507 aprefix
= defaults
->values
[5 * i
+ 1];
9510 * special work for namespaces defaulted defs
9512 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
9514 * check that it's not a defined namespace
9516 for (j
= 1;j
<= nbNs
;j
++)
9517 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
9519 if (j
<= nbNs
) continue;
9521 nsname
= xmlGetNamespace(ctxt
, NULL
);
9522 if (nsname
!= defaults
->values
[5 * i
+ 2]) {
9523 if (nsPush(ctxt
, NULL
,
9524 defaults
->values
[5 * i
+ 2]) > 0)
9527 } else if (aprefix
== ctxt
->str_xmlns
) {
9529 * check that it's not a defined namespace
9531 for (j
= 1;j
<= nbNs
;j
++)
9532 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9534 if (j
<= nbNs
) continue;
9536 nsname
= xmlGetNamespace(ctxt
, attname
);
9537 if (nsname
!= defaults
->values
[5 * i
+ 2]) {
9538 if (nsPush(ctxt
, attname
,
9539 defaults
->values
[5 * i
+ 2]) > 0)
9544 * check that it's not a defined attribute
9546 for (j
= 0;j
< nbatts
;j
+=5) {
9547 if ((attname
== atts
[j
]) && (aprefix
== atts
[j
+1]))
9550 if (j
< nbatts
) continue;
9552 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9553 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9557 maxatts
= ctxt
->maxatts
;
9560 atts
[nbatts
++] = attname
;
9561 atts
[nbatts
++] = aprefix
;
9562 if (aprefix
== NULL
)
9563 atts
[nbatts
++] = NULL
;
9565 atts
[nbatts
++] = xmlGetNamespace(ctxt
, aprefix
);
9566 atts
[nbatts
++] = defaults
->values
[5 * i
+ 2];
9567 atts
[nbatts
++] = defaults
->values
[5 * i
+ 3];
9568 if ((ctxt
->standalone
== 1) &&
9569 (defaults
->values
[5 * i
+ 4] != NULL
)) {
9570 xmlValidityError(ctxt
, XML_DTD_STANDALONE_DEFAULTED
,
9571 "standalone: attribute %s on %s defaulted from external subset\n",
9572 attname
, localname
);
9581 * The attributes checkings
9583 for (i
= 0; i
< nbatts
;i
+= 5) {
9585 * The default namespace does not apply to attribute names.
9587 if (atts
[i
+ 1] != NULL
) {
9588 nsname
= xmlGetNamespace(ctxt
, atts
[i
+ 1]);
9589 if (nsname
== NULL
) {
9590 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9591 "Namespace prefix %s for %s on %s is not defined\n",
9592 atts
[i
+ 1], atts
[i
], localname
);
9594 atts
[i
+ 2] = nsname
;
9598 * [ WFC: Unique Att Spec ]
9599 * No attribute name may appear more than once in the same
9600 * start-tag or empty-element tag.
9601 * As extended by the Namespace in XML REC.
9603 for (j
= 0; j
< i
;j
+= 5) {
9604 if (atts
[i
] == atts
[j
]) {
9605 if (atts
[i
+1] == atts
[j
+1]) {
9606 xmlErrAttributeDup(ctxt
, atts
[i
+1], atts
[i
]);
9609 if ((nsname
!= NULL
) && (atts
[j
+ 2] == nsname
)) {
9610 xmlNsErr(ctxt
, XML_NS_ERR_ATTRIBUTE_REDEFINED
,
9611 "Namespaced Attribute %s in '%s' redefined\n",
9612 atts
[i
], nsname
, NULL
);
9619 nsname
= xmlGetNamespace(ctxt
, prefix
);
9620 if ((prefix
!= NULL
) && (nsname
== NULL
)) {
9621 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9622 "Namespace prefix %s on %s is not defined\n",
9623 prefix
, localname
, NULL
);
9629 * SAX: Start of Element !
9631 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElementNs
!= NULL
) &&
9632 (!ctxt
->disableSAX
)) {
9634 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9635 nsname
, nbNs
, &ctxt
->nsTab
[ctxt
->nsNr
- 2 * nbNs
],
9636 nbatts
/ 5, nbdef
, atts
);
9638 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9639 nsname
, 0, NULL
, nbatts
/ 5, nbdef
, atts
);
9644 * Free up attribute allocated strings if needed
9647 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9648 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9649 xmlFree((xmlChar
*) atts
[i
]);
9657 * @ctxt: an XML parser context
9658 * @line: line of the start tag
9659 * @nsNr: number of namespaces on the start tag
9661 * Parse an end tag. Always consumes '</'.
9663 * [42] ETag ::= '</' Name S? '>'
9667 * [NS 9] ETag ::= '</' QName S? '>'
9671 xmlParseEndTag2(xmlParserCtxtPtr ctxt
, const xmlStartTag
*tag
) {
9672 const xmlChar
*name
;
9675 if ((RAW
!= '<') || (NXT(1) != '/')) {
9676 xmlFatalErr(ctxt
, XML_ERR_LTSLASH_REQUIRED
, NULL
);
9681 if (tag
->prefix
== NULL
)
9682 name
= xmlParseNameAndCompare(ctxt
, ctxt
->name
);
9684 name
= xmlParseQNameAndCompare(ctxt
, ctxt
->name
, tag
->prefix
);
9687 * We should definitely be at the ending "S? '>'" part
9690 if (ctxt
->instate
== XML_PARSER_EOF
)
9693 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
9694 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
9699 * [ WFC: Element Type Match ]
9700 * The Name in an element's end-tag must match the element type in the
9704 if (name
!= (xmlChar
*)1) {
9705 if (name
== NULL
) name
= BAD_CAST
"unparsable";
9706 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
9707 "Opening and ending tag mismatch: %s line %d and %s\n",
9708 ctxt
->name
, tag
->line
, name
);
9714 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
9715 (!ctxt
->disableSAX
))
9716 ctxt
->sax
->endElementNs(ctxt
->userData
, ctxt
->name
, tag
->prefix
,
9721 nsPop(ctxt
, tag
->nsNr
);
9726 * @ctxt: an XML parser context
9728 * DEPRECATED: Internal function, don't use.
9730 * Parse escaped pure raw content. Always consumes '<!['.
9732 * [18] CDSect ::= CDStart CData CDEnd
9734 * [19] CDStart ::= '<![CDATA['
9736 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9738 * [21] CDEnd ::= ']]>'
9741 xmlParseCDSect(xmlParserCtxtPtr ctxt
) {
9742 xmlChar
*buf
= NULL
;
9744 int size
= XML_PARSER_BUFFER_SIZE
;
9748 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
9749 XML_MAX_HUGE_LENGTH
:
9750 XML_MAX_TEXT_LENGTH
;
9752 if ((CUR
!= '<') || (NXT(1) != '!') || (NXT(2) != '['))
9756 if (!CMP6(CUR_PTR
, 'C', 'D', 'A', 'T', 'A', '['))
9760 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
9763 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9769 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9774 buf
= (xmlChar
*) xmlMallocAtomic(size
);
9776 xmlErrMemory(ctxt
, NULL
);
9779 while (IS_CHAR(cur
) &&
9780 ((r
!= ']') || (s
!= ']') || (cur
!= '>'))) {
9781 if (len
+ 5 >= size
) {
9784 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* 2);
9786 xmlErrMemory(ctxt
, NULL
);
9792 COPY_BUF(rl
,buf
,len
,r
);
9793 if (len
> maxLength
) {
9794 xmlFatalErrMsg(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9795 "CData section too big found\n");
9806 if (ctxt
->instate
== XML_PARSER_EOF
) {
9811 xmlFatalErrMsgStr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9812 "CData section not finished\n%.50s\n", buf
);
9818 * OK the buffer is to be consumed as cdata.
9820 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
9821 if (ctxt
->sax
->cdataBlock
!= NULL
)
9822 ctxt
->sax
->cdataBlock(ctxt
->userData
, buf
, len
);
9823 else if (ctxt
->sax
->characters
!= NULL
)
9824 ctxt
->sax
->characters(ctxt
->userData
, buf
, len
);
9828 if (ctxt
->instate
!= XML_PARSER_EOF
)
9829 ctxt
->instate
= XML_PARSER_CONTENT
;
9834 * xmlParseContentInternal:
9835 * @ctxt: an XML parser context
9837 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9838 * unexpected EOF to the caller.
9842 xmlParseContentInternal(xmlParserCtxtPtr ctxt
) {
9843 int nameNr
= ctxt
->nameNr
;
9846 while ((RAW
!= 0) &&
9847 (ctxt
->instate
!= XML_PARSER_EOF
)) {
9848 const xmlChar
*cur
= ctxt
->input
->cur
;
9851 * First case : a Processing Instruction.
9853 if ((*cur
== '<') && (cur
[1] == '?')) {
9858 * Second case : a CDSection
9860 /* 2.6.0 test was *cur not RAW */
9861 else if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9862 xmlParseCDSect(ctxt
);
9866 * Third case : a comment
9868 else if ((*cur
== '<') && (NXT(1) == '!') &&
9869 (NXT(2) == '-') && (NXT(3) == '-')) {
9870 xmlParseComment(ctxt
);
9871 ctxt
->instate
= XML_PARSER_CONTENT
;
9875 * Fourth case : a sub-element.
9877 else if (*cur
== '<') {
9878 if (NXT(1) == '/') {
9879 if (ctxt
->nameNr
<= nameNr
)
9881 xmlParseElementEnd(ctxt
);
9883 xmlParseElementStart(ctxt
);
9888 * Fifth case : a reference. If if has not been resolved,
9889 * parsing returns it's Name, create the node
9892 else if (*cur
== '&') {
9893 xmlParseReference(ctxt
);
9897 * Last case, text. Note that References are handled directly.
9900 xmlParseCharDataInternal(ctxt
, 0);
9910 * @ctxt: an XML parser context
9912 * Parse a content sequence. Stops at EOF or '</'.
9914 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9918 xmlParseContent(xmlParserCtxtPtr ctxt
) {
9919 int nameNr
= ctxt
->nameNr
;
9921 xmlParseContentInternal(ctxt
);
9923 if ((ctxt
->instate
!= XML_PARSER_EOF
) && (ctxt
->nameNr
> nameNr
)) {
9924 const xmlChar
*name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
9925 int line
= ctxt
->pushTab
[ctxt
->nameNr
- 1].line
;
9926 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9927 "Premature end of data in tag %s line %d\n",
9934 * @ctxt: an XML parser context
9936 * DEPRECATED: Internal function, don't use.
9938 * parse an XML element
9940 * [39] element ::= EmptyElemTag | STag content ETag
9942 * [ WFC: Element Type Match ]
9943 * The Name in an element's end-tag must match the element type in the
9949 xmlParseElement(xmlParserCtxtPtr ctxt
) {
9950 if (xmlParseElementStart(ctxt
) != 0)
9953 xmlParseContentInternal(ctxt
);
9954 if (ctxt
->instate
== XML_PARSER_EOF
)
9958 const xmlChar
*name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
9959 int line
= ctxt
->pushTab
[ctxt
->nameNr
- 1].line
;
9960 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9961 "Premature end of data in tag %s line %d\n",
9966 xmlParseElementEnd(ctxt
);
9970 * xmlParseElementStart:
9971 * @ctxt: an XML parser context
9973 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9974 * opening tag was parsed, 1 if an empty element was parsed.
9976 * Always consumes '<'.
9979 xmlParseElementStart(xmlParserCtxtPtr ctxt
) {
9980 const xmlChar
*name
;
9981 const xmlChar
*prefix
= NULL
;
9982 const xmlChar
*URI
= NULL
;
9983 xmlParserNodeInfo node_info
;
9986 int nsNr
= ctxt
->nsNr
;
9988 if (((unsigned int) ctxt
->nameNr
> xmlParserMaxDepth
) &&
9989 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9990 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
9991 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9993 xmlHaltParser(ctxt
);
9997 /* Capture start position */
9998 if (ctxt
->record_info
) {
9999 node_info
.begin_pos
= ctxt
->input
->consumed
+
10000 (CUR_PTR
- ctxt
->input
->base
);
10001 node_info
.begin_line
= ctxt
->input
->line
;
10004 if (ctxt
->spaceNr
== 0)
10005 spacePush(ctxt
, -1);
10006 else if (*ctxt
->space
== -2)
10007 spacePush(ctxt
, -1);
10009 spacePush(ctxt
, *ctxt
->space
);
10011 line
= ctxt
->input
->line
;
10012 #ifdef LIBXML_SAX1_ENABLED
10014 #endif /* LIBXML_SAX1_ENABLED */
10015 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
10016 #ifdef LIBXML_SAX1_ENABLED
10018 name
= xmlParseStartTag(ctxt
);
10019 #endif /* LIBXML_SAX1_ENABLED */
10020 if (ctxt
->instate
== XML_PARSER_EOF
)
10022 if (name
== NULL
) {
10026 nameNsPush(ctxt
, name
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
);
10029 #ifdef LIBXML_VALID_ENABLED
10031 * [ VC: Root Element Type ]
10032 * The Name in the document type declaration must match the element
10033 * type of the root element.
10035 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
10036 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
10037 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
10038 #endif /* LIBXML_VALID_ENABLED */
10041 * Check for an Empty Element.
10043 if ((RAW
== '/') && (NXT(1) == '>')) {
10046 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
10047 (!ctxt
->disableSAX
))
10048 ctxt
->sax
->endElementNs(ctxt
->userData
, name
, prefix
, URI
);
10049 #ifdef LIBXML_SAX1_ENABLED
10051 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
10052 (!ctxt
->disableSAX
))
10053 ctxt
->sax
->endElement(ctxt
->userData
, name
);
10054 #endif /* LIBXML_SAX1_ENABLED */
10058 if (nsNr
!= ctxt
->nsNr
)
10059 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10060 if (cur
!= NULL
&& ctxt
->record_info
) {
10061 node_info
.node
= cur
;
10062 node_info
.end_pos
= ctxt
->input
->consumed
+
10063 (CUR_PTR
- ctxt
->input
->base
);
10064 node_info
.end_line
= ctxt
->input
->line
;
10065 xmlParserAddNodeInfo(ctxt
, &node_info
);
10071 if (cur
!= NULL
&& ctxt
->record_info
) {
10072 node_info
.node
= cur
;
10073 node_info
.end_pos
= 0;
10074 node_info
.end_line
= 0;
10075 xmlParserAddNodeInfo(ctxt
, &node_info
);
10078 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_GT_REQUIRED
,
10079 "Couldn't find end of Start Tag %s line %d\n",
10083 * end of parsing of this node.
10088 if (nsNr
!= ctxt
->nsNr
)
10089 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10097 * xmlParseElementEnd:
10098 * @ctxt: an XML parser context
10100 * Parse the end of an XML element. Always consumes '</'.
10103 xmlParseElementEnd(xmlParserCtxtPtr ctxt
) {
10104 xmlNodePtr cur
= ctxt
->node
;
10106 if (ctxt
->nameNr
<= 0) {
10107 if ((RAW
== '<') && (NXT(1) == '/'))
10113 * parse the end of tag: '</' should be here.
10116 xmlParseEndTag2(ctxt
, &ctxt
->pushTab
[ctxt
->nameNr
- 1]);
10119 #ifdef LIBXML_SAX1_ENABLED
10121 xmlParseEndTag1(ctxt
, 0);
10122 #endif /* LIBXML_SAX1_ENABLED */
10125 * Capture end position
10127 if (cur
!= NULL
&& ctxt
->record_info
) {
10128 xmlParserNodeInfoPtr node_info
;
10130 node_info
= (xmlParserNodeInfoPtr
) xmlParserFindNodeInfo(ctxt
, cur
);
10131 if (node_info
!= NULL
) {
10132 node_info
->end_pos
= ctxt
->input
->consumed
+
10133 (CUR_PTR
- ctxt
->input
->base
);
10134 node_info
->end_line
= ctxt
->input
->line
;
10140 * xmlParseVersionNum:
10141 * @ctxt: an XML parser context
10143 * DEPRECATED: Internal function, don't use.
10145 * parse the XML version value.
10147 * [26] VersionNum ::= '1.' [0-9]+
10149 * In practice allow [0-9].[0-9]+ at that level
10151 * Returns the string giving the XML version number, or NULL
10154 xmlParseVersionNum(xmlParserCtxtPtr ctxt
) {
10155 xmlChar
*buf
= NULL
;
10160 buf
= (xmlChar
*) xmlMallocAtomic(size
);
10162 xmlErrMemory(ctxt
, NULL
);
10166 if (!((cur
>= '0') && (cur
<= '9'))) {
10180 while ((cur
>= '0') && (cur
<= '9')) {
10181 if (len
+ 1 >= size
) {
10185 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
10188 xmlErrMemory(ctxt
, NULL
);
10202 * xmlParseVersionInfo:
10203 * @ctxt: an XML parser context
10205 * DEPRECATED: Internal function, don't use.
10207 * parse the XML version.
10209 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10211 * [25] Eq ::= S? '=' S?
10213 * Returns the version string, e.g. "1.0"
10217 xmlParseVersionInfo(xmlParserCtxtPtr ctxt
) {
10218 xmlChar
*version
= NULL
;
10220 if (CMP7(CUR_PTR
, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10224 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10231 version
= xmlParseVersionNum(ctxt
);
10233 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10236 } else if (RAW
== '\''){
10238 version
= xmlParseVersionNum(ctxt
);
10240 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10244 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10252 * @ctxt: an XML parser context
10254 * DEPRECATED: Internal function, don't use.
10256 * parse the XML encoding name
10258 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10260 * Returns the encoding name value or NULL
10263 xmlParseEncName(xmlParserCtxtPtr ctxt
) {
10264 xmlChar
*buf
= NULL
;
10267 int maxLength
= (ctxt
->options
& XML_PARSE_HUGE
) ?
10268 XML_MAX_TEXT_LENGTH
:
10269 XML_MAX_NAME_LENGTH
;
10273 if (((cur
>= 'a') && (cur
<= 'z')) ||
10274 ((cur
>= 'A') && (cur
<= 'Z'))) {
10275 buf
= (xmlChar
*) xmlMallocAtomic(size
);
10277 xmlErrMemory(ctxt
, NULL
);
10284 while (((cur
>= 'a') && (cur
<= 'z')) ||
10285 ((cur
>= 'A') && (cur
<= 'Z')) ||
10286 ((cur
>= '0') && (cur
<= '9')) ||
10287 (cur
== '.') || (cur
== '_') ||
10289 if (len
+ 1 >= size
) {
10293 tmp
= (xmlChar
*) xmlRealloc(buf
, size
);
10295 xmlErrMemory(ctxt
, NULL
);
10302 if (len
> maxLength
) {
10303 xmlFatalErr(ctxt
, XML_ERR_NAME_TOO_LONG
, "EncName");
10312 xmlFatalErr(ctxt
, XML_ERR_ENCODING_NAME
, NULL
);
10318 * xmlParseEncodingDecl:
10319 * @ctxt: an XML parser context
10321 * DEPRECATED: Internal function, don't use.
10323 * parse the XML encoding declaration
10325 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10327 * this setups the conversion filters.
10329 * Returns the encoding value or NULL
10333 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt
) {
10334 xmlChar
*encoding
= NULL
;
10337 if (CMP8(CUR_PTR
, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10341 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10348 encoding
= xmlParseEncName(ctxt
);
10350 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10351 xmlFree((xmlChar
*) encoding
);
10355 } else if (RAW
== '\''){
10357 encoding
= xmlParseEncName(ctxt
);
10359 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10360 xmlFree((xmlChar
*) encoding
);
10365 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10369 * Non standard parsing, allowing the user to ignore encoding
10371 if (ctxt
->options
& XML_PARSE_IGNORE_ENC
) {
10372 xmlFree((xmlChar
*) encoding
);
10377 * UTF-16 encoding switch has already taken place at this stage,
10378 * more over the little-endian/big-endian selection is already done
10380 if ((encoding
!= NULL
) &&
10381 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-16")) ||
10382 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF16")))) {
10384 * If no encoding was passed to the parser, that we are
10385 * using UTF-16 and no decoder is present i.e. the
10386 * document is apparently UTF-8 compatible, then raise an
10387 * encoding mismatch fatal error
10389 if ((ctxt
->encoding
== NULL
) &&
10390 (ctxt
->input
->buf
!= NULL
) &&
10391 (ctxt
->input
->buf
->encoder
== NULL
)) {
10392 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_ENCODING
,
10393 "Document labelled UTF-16 but has UTF-8 content\n");
10395 if (ctxt
->encoding
!= NULL
)
10396 xmlFree((xmlChar
*) ctxt
->encoding
);
10397 ctxt
->encoding
= encoding
;
10400 * UTF-8 encoding is handled natively
10402 else if ((encoding
!= NULL
) &&
10403 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-8")) ||
10404 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF8")))) {
10405 /* TODO: Check for encoding mismatch. */
10406 if (ctxt
->encoding
!= NULL
)
10407 xmlFree((xmlChar
*) ctxt
->encoding
);
10408 ctxt
->encoding
= encoding
;
10410 else if (encoding
!= NULL
) {
10411 xmlCharEncodingHandlerPtr handler
;
10413 if (ctxt
->input
->encoding
!= NULL
)
10414 xmlFree((xmlChar
*) ctxt
->input
->encoding
);
10415 ctxt
->input
->encoding
= encoding
;
10417 handler
= xmlFindCharEncodingHandler((const char *) encoding
);
10418 if (handler
!= NULL
) {
10419 if (xmlSwitchToEncoding(ctxt
, handler
) < 0) {
10420 /* failed to convert */
10421 ctxt
->errNo
= XML_ERR_UNSUPPORTED_ENCODING
;
10425 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
10426 "Unsupported encoding %s\n", encoding
);
10436 * @ctxt: an XML parser context
10438 * DEPRECATED: Internal function, don't use.
10440 * parse the XML standalone declaration
10442 * [32] SDDecl ::= S 'standalone' Eq
10443 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10445 * [ VC: Standalone Document Declaration ]
10446 * TODO The standalone document declaration must have the value "no"
10447 * if any external markup declarations contain declarations of:
10448 * - attributes with default values, if elements to which these
10449 * attributes apply appear in the document without specifications
10450 * of values for these attributes, or
10451 * - entities (other than amp, lt, gt, apos, quot), if references
10452 * to those entities appear in the document, or
10453 * - attributes with values subject to normalization, where the
10454 * attribute appears in the document with a value which will change
10455 * as a result of normalization, or
10456 * - element types with element content, if white space occurs directly
10457 * within any instance of those types.
10460 * 1 if standalone="yes"
10461 * 0 if standalone="no"
10462 * -2 if standalone attribute is missing or invalid
10463 * (A standalone value of -2 means that the XML declaration was found,
10464 * but no value was specified for the standalone attribute).
10468 xmlParseSDDecl(xmlParserCtxtPtr ctxt
) {
10469 int standalone
= -2;
10472 if (CMP10(CUR_PTR
, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10476 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
10477 return(standalone
);
10483 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10486 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10491 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10494 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10497 } else if (RAW
== '"'){
10499 if ((RAW
== 'n') && (NXT(1) == 'o')) {
10502 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
10507 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
10510 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
10514 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
10517 return(standalone
);
10522 * @ctxt: an XML parser context
10524 * DEPRECATED: Internal function, don't use.
10526 * parse an XML declaration header
10528 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10532 xmlParseXMLDecl(xmlParserCtxtPtr ctxt
) {
10536 * This value for standalone indicates that the document has an
10537 * XML declaration but it does not have a standalone attribute.
10538 * It will be overwritten later if a standalone attribute is found.
10540 ctxt
->input
->standalone
= -2;
10543 * We know that '<?xml' is here.
10547 if (!IS_BLANK_CH(RAW
)) {
10548 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
10549 "Blank needed after '<?xml'\n");
10554 * We must have the VersionInfo here.
10556 version
= xmlParseVersionInfo(ctxt
);
10557 if (version
== NULL
) {
10558 xmlFatalErr(ctxt
, XML_ERR_VERSION_MISSING
, NULL
);
10560 if (!xmlStrEqual(version
, (const xmlChar
*) XML_DEFAULT_VERSION
)) {
10562 * Changed here for XML-1.0 5th edition
10564 if (ctxt
->options
& XML_PARSE_OLD10
) {
10565 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10566 "Unsupported version '%s'\n",
10569 if ((version
[0] == '1') && ((version
[1] == '.'))) {
10570 xmlWarningMsg(ctxt
, XML_WAR_UNKNOWN_VERSION
,
10571 "Unsupported version '%s'\n",
10574 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
10575 "Unsupported version '%s'\n",
10580 if (ctxt
->version
!= NULL
)
10581 xmlFree((void *) ctxt
->version
);
10582 ctxt
->version
= version
;
10586 * We may have the encoding declaration
10588 if (!IS_BLANK_CH(RAW
)) {
10589 if ((RAW
== '?') && (NXT(1) == '>')) {
10593 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10595 xmlParseEncodingDecl(ctxt
);
10596 if ((ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) ||
10597 (ctxt
->instate
== XML_PARSER_EOF
)) {
10599 * The XML REC instructs us to stop parsing right here
10605 * We may have the standalone status.
10607 if ((ctxt
->input
->encoding
!= NULL
) && (!IS_BLANK_CH(RAW
))) {
10608 if ((RAW
== '?') && (NXT(1) == '>')) {
10612 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10616 * We can grow the input buffer freely at that point
10621 ctxt
->input
->standalone
= xmlParseSDDecl(ctxt
);
10624 if ((RAW
== '?') && (NXT(1) == '>')) {
10626 } else if (RAW
== '>') {
10627 /* Deprecated old WD ... */
10628 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10633 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10634 while ((c
= CUR
) != 0) {
10644 * @ctxt: an XML parser context
10646 * DEPRECATED: Internal function, don't use.
10648 * parse an XML Misc* optional field.
10650 * [27] Misc ::= Comment | PI | S
10654 xmlParseMisc(xmlParserCtxtPtr ctxt
) {
10655 while (ctxt
->instate
!= XML_PARSER_EOF
) {
10658 if ((RAW
== '<') && (NXT(1) == '?')) {
10660 } else if (CMP4(CUR_PTR
, '<', '!', '-', '-')) {
10661 xmlParseComment(ctxt
);
10669 * xmlParseDocument:
10670 * @ctxt: an XML parser context
10672 * parse an XML document (and build a tree if using the standard SAX
10675 * [1] document ::= prolog element Misc*
10677 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10679 * Returns 0, -1 in case of error. the parser context is augmented
10680 * as a result of the parsing.
10684 xmlParseDocument(xmlParserCtxtPtr ctxt
) {
10686 xmlCharEncoding enc
;
10690 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10696 * SAX: detecting the level.
10698 xmlDetectSAX2(ctxt
);
10701 * SAX: beginning of the document processing.
10703 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10704 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10705 if (ctxt
->instate
== XML_PARSER_EOF
)
10708 if ((ctxt
->encoding
== NULL
) &&
10709 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
10711 * Get the 4 first bytes and decode the charset
10712 * if enc != XML_CHAR_ENCODING_NONE
10713 * plug some encoding conversion routines.
10719 enc
= xmlDetectCharEncoding(&start
[0], 4);
10720 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10721 xmlSwitchEncoding(ctxt
, enc
);
10727 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10732 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10735 * Note that we will switch encoding on the fly.
10737 xmlParseXMLDecl(ctxt
);
10738 if ((ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) ||
10739 (ctxt
->instate
== XML_PARSER_EOF
)) {
10741 * The XML REC instructs us to stop parsing right here
10745 ctxt
->standalone
= ctxt
->input
->standalone
;
10748 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10750 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10751 ctxt
->sax
->startDocument(ctxt
->userData
);
10752 if (ctxt
->instate
== XML_PARSER_EOF
)
10754 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->input
!= NULL
) &&
10755 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->compressed
>= 0)) {
10756 ctxt
->myDoc
->compression
= ctxt
->input
->buf
->compressed
;
10760 * The Misc part of the Prolog
10762 xmlParseMisc(ctxt
);
10765 * Then possibly doc type declaration(s) and more Misc
10766 * (doctypedecl Misc*)?
10769 if (CMP9(CUR_PTR
, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10771 ctxt
->inSubset
= 1;
10772 xmlParseDocTypeDecl(ctxt
);
10774 ctxt
->instate
= XML_PARSER_DTD
;
10775 xmlParseInternalSubset(ctxt
);
10776 if (ctxt
->instate
== XML_PARSER_EOF
)
10781 * Create and update the external subset.
10783 ctxt
->inSubset
= 2;
10784 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->externalSubset
!= NULL
) &&
10785 (!ctxt
->disableSAX
))
10786 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
10787 ctxt
->extSubSystem
, ctxt
->extSubURI
);
10788 if (ctxt
->instate
== XML_PARSER_EOF
)
10790 ctxt
->inSubset
= 0;
10792 xmlCleanSpecialAttr(ctxt
);
10794 ctxt
->instate
= XML_PARSER_PROLOG
;
10795 xmlParseMisc(ctxt
);
10799 * Time to start parsing the tree itself
10803 xmlFatalErrMsg(ctxt
, XML_ERR_DOCUMENT_EMPTY
,
10804 "Start tag expected, '<' not found\n");
10806 ctxt
->instate
= XML_PARSER_CONTENT
;
10807 xmlParseElement(ctxt
);
10808 ctxt
->instate
= XML_PARSER_EPILOG
;
10812 * The Misc part at the end
10814 xmlParseMisc(ctxt
);
10817 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
10819 ctxt
->instate
= XML_PARSER_EOF
;
10823 * SAX: end of the document processing.
10825 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10826 ctxt
->sax
->endDocument(ctxt
->userData
);
10829 * Remove locally kept entity definitions if the tree was not built
10831 if ((ctxt
->myDoc
!= NULL
) &&
10832 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
10833 xmlFreeDoc(ctxt
->myDoc
);
10834 ctxt
->myDoc
= NULL
;
10837 if ((ctxt
->wellFormed
) && (ctxt
->myDoc
!= NULL
)) {
10838 ctxt
->myDoc
->properties
|= XML_DOC_WELLFORMED
;
10840 ctxt
->myDoc
->properties
|= XML_DOC_DTDVALID
;
10841 if (ctxt
->nsWellFormed
)
10842 ctxt
->myDoc
->properties
|= XML_DOC_NSVALID
;
10843 if (ctxt
->options
& XML_PARSE_OLD10
)
10844 ctxt
->myDoc
->properties
|= XML_DOC_OLD10
;
10846 if (! ctxt
->wellFormed
) {
10854 * xmlParseExtParsedEnt:
10855 * @ctxt: an XML parser context
10857 * parse a general parsed entity
10858 * An external general parsed entity is well-formed if it matches the
10859 * production labeled extParsedEnt.
10861 * [78] extParsedEnt ::= TextDecl? content
10863 * Returns 0, -1 in case of error. the parser context is augmented
10864 * as a result of the parsing.
10868 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt
) {
10870 xmlCharEncoding enc
;
10872 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10875 xmlDetectSAX2(ctxt
);
10880 * SAX: beginning of the document processing.
10882 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10883 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10886 * Get the 4 first bytes and decode the charset
10887 * if enc != XML_CHAR_ENCODING_NONE
10888 * plug some encoding conversion routines.
10890 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
10895 enc
= xmlDetectCharEncoding(start
, 4);
10896 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10897 xmlSwitchEncoding(ctxt
, enc
);
10903 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10907 * Check for the XMLDecl in the Prolog.
10910 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10913 * Note that we will switch encoding on the fly.
10915 xmlParseXMLDecl(ctxt
);
10916 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10918 * The XML REC instructs us to stop parsing right here
10924 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10926 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10927 ctxt
->sax
->startDocument(ctxt
->userData
);
10928 if (ctxt
->instate
== XML_PARSER_EOF
)
10932 * Doing validity checking on chunk doesn't make sense
10934 ctxt
->instate
= XML_PARSER_CONTENT
;
10935 ctxt
->validate
= 0;
10936 ctxt
->loadsubset
= 0;
10939 xmlParseContent(ctxt
);
10940 if (ctxt
->instate
== XML_PARSER_EOF
)
10943 if ((RAW
== '<') && (NXT(1) == '/')) {
10944 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
10945 } else if (RAW
!= 0) {
10946 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
10950 * SAX: end of the document processing.
10952 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10953 ctxt
->sax
->endDocument(ctxt
->userData
);
10955 if (! ctxt
->wellFormed
) return(-1);
10959 #ifdef LIBXML_PUSH_ENABLED
10960 /************************************************************************
10962 * Progressive parsing interfaces *
10964 ************************************************************************/
10967 * xmlParseLookupChar:
10968 * @ctxt: an XML parser context
10971 * Check whether the input buffer contains a character.
10974 xmlParseLookupChar(xmlParserCtxtPtr ctxt
, int c
) {
10975 const xmlChar
*cur
;
10977 if (ctxt
->checkIndex
== 0) {
10978 cur
= ctxt
->input
->cur
+ 1;
10980 cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
10983 if (memchr(cur
, c
, ctxt
->input
->end
- cur
) == NULL
) {
10984 size_t index
= ctxt
->input
->end
- ctxt
->input
->cur
;
10986 if (index
> LONG_MAX
) {
10987 ctxt
->checkIndex
= 0;
10990 ctxt
->checkIndex
= index
;
10993 ctxt
->checkIndex
= 0;
10999 * xmlParseLookupString:
11000 * @ctxt: an XML parser context
11001 * @startDelta: delta to apply at the start
11003 * @strLen: length of string
11005 * Check whether the input buffer contains a string.
11007 static const xmlChar
*
11008 xmlParseLookupString(xmlParserCtxtPtr ctxt
, size_t startDelta
,
11009 const char *str
, size_t strLen
) {
11010 const xmlChar
*cur
, *term
;
11012 if (ctxt
->checkIndex
== 0) {
11013 cur
= ctxt
->input
->cur
+ startDelta
;
11015 cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
11018 term
= BAD_CAST
strstr((const char *) cur
, str
);
11019 if (term
== NULL
) {
11020 const xmlChar
*end
= ctxt
->input
->end
;
11023 /* Rescan (strLen - 1) characters. */
11024 if ((size_t) (end
- cur
) < strLen
)
11028 index
= end
- ctxt
->input
->cur
;
11029 if (index
> LONG_MAX
) {
11030 ctxt
->checkIndex
= 0;
11031 return(ctxt
->input
->end
- strLen
);
11033 ctxt
->checkIndex
= index
;
11035 ctxt
->checkIndex
= 0;
11042 * xmlParseLookupCharData:
11043 * @ctxt: an XML parser context
11045 * Check whether the input buffer contains terminated char data.
11048 xmlParseLookupCharData(xmlParserCtxtPtr ctxt
) {
11049 const xmlChar
*cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
11050 const xmlChar
*end
= ctxt
->input
->end
;
11053 while (cur
< end
) {
11054 if ((*cur
== '<') || (*cur
== '&')) {
11055 ctxt
->checkIndex
= 0;
11061 index
= cur
- ctxt
->input
->cur
;
11062 if (index
> LONG_MAX
) {
11063 ctxt
->checkIndex
= 0;
11066 ctxt
->checkIndex
= index
;
11071 * xmlParseLookupGt:
11072 * @ctxt: an XML parser context
11074 * Check whether there's enough data in the input buffer to finish parsing
11075 * a start tag. This has to take quotes into account.
11078 xmlParseLookupGt(xmlParserCtxtPtr ctxt
) {
11079 const xmlChar
*cur
;
11080 const xmlChar
*end
= ctxt
->input
->end
;
11081 int state
= ctxt
->endCheckState
;
11084 if (ctxt
->checkIndex
== 0)
11085 cur
= ctxt
->input
->cur
+ 1;
11087 cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
11089 while (cur
< end
) {
11093 } else if (*cur
== '\'' || *cur
== '"') {
11095 } else if (*cur
== '>') {
11096 ctxt
->checkIndex
= 0;
11097 ctxt
->endCheckState
= 0;
11103 index
= cur
- ctxt
->input
->cur
;
11104 if (index
> LONG_MAX
) {
11105 ctxt
->checkIndex
= 0;
11106 ctxt
->endCheckState
= 0;
11109 ctxt
->checkIndex
= index
;
11110 ctxt
->endCheckState
= state
;
11115 * xmlParseLookupInternalSubset:
11116 * @ctxt: an XML parser context
11118 * Check whether there's enough data in the input buffer to finish parsing
11119 * the internal subset.
11122 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt
) {
11124 * Sorry, but progressive parsing of the internal subset is not
11125 * supported. We first check that the full content of the internal
11126 * subset is available and parsing is launched only at that point.
11127 * Internal subset ends with "']' S? '>'" in an unescaped section and
11128 * not in a ']]>' sequence which are conditional sections.
11130 const xmlChar
*cur
, *start
;
11131 const xmlChar
*end
= ctxt
->input
->end
;
11132 int state
= ctxt
->endCheckState
;
11135 if (ctxt
->checkIndex
== 0) {
11136 cur
= ctxt
->input
->cur
+ 1;
11138 cur
= ctxt
->input
->cur
+ ctxt
->checkIndex
;
11142 while (cur
< end
) {
11143 if (state
== '-') {
11144 if ((*cur
== '-') &&
11153 else if (state
== ']') {
11155 ctxt
->checkIndex
= 0;
11156 ctxt
->endCheckState
= 0;
11159 if (IS_BLANK_CH(*cur
)) {
11161 } else if (*cur
!= ']') {
11167 else if (state
== ' ') {
11169 ctxt
->checkIndex
= 0;
11170 ctxt
->endCheckState
= 0;
11173 if (!IS_BLANK_CH(*cur
)) {
11179 else if (state
!= 0) {
11180 if (*cur
== state
) {
11185 else if (*cur
== '<') {
11186 if ((cur
[1] == '!') &&
11191 /* Don't treat <!--> as comment */
11196 else if ((*cur
== '"') || (*cur
== '\'') || (*cur
== ']')) {
11204 * Rescan the three last characters to detect "<!--" and "-->"
11205 * split across chunks.
11207 if ((state
== 0) || (state
== '-')) {
11208 if (cur
- start
< 3)
11213 index
= cur
- ctxt
->input
->cur
;
11214 if (index
> LONG_MAX
) {
11215 ctxt
->checkIndex
= 0;
11216 ctxt
->endCheckState
= 0;
11219 ctxt
->checkIndex
= index
;
11220 ctxt
->endCheckState
= state
;
11225 * xmlCheckCdataPush:
11226 * @cur: pointer to the block of characters
11227 * @len: length of the block in bytes
11228 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11230 * Check that the block of characters is okay as SCdata content [20]
11232 * Returns the number of bytes to pass if okay, a negative index where an
11233 * UTF-8 error occurred otherwise
11236 xmlCheckCdataPush(const xmlChar
*utf
, int len
, int complete
) {
11241 if ((utf
== NULL
) || (len
<= 0))
11244 for (ix
= 0; ix
< len
;) { /* string is 0-terminated */
11246 if ((c
& 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11249 else if ((c
== 0xA) || (c
== 0xD) || (c
== 0x9))
11253 } else if ((c
& 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11254 if (ix
+ 2 > len
) return(complete
? -ix
: ix
);
11255 if ((utf
[ix
+1] & 0xc0 ) != 0x80)
11257 codepoint
= (utf
[ix
] & 0x1f) << 6;
11258 codepoint
|= utf
[ix
+1] & 0x3f;
11259 if (!xmlIsCharQ(codepoint
))
11262 } else if ((c
& 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11263 if (ix
+ 3 > len
) return(complete
? -ix
: ix
);
11264 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11265 ((utf
[ix
+2] & 0xc0) != 0x80))
11267 codepoint
= (utf
[ix
] & 0xf) << 12;
11268 codepoint
|= (utf
[ix
+1] & 0x3f) << 6;
11269 codepoint
|= utf
[ix
+2] & 0x3f;
11270 if (!xmlIsCharQ(codepoint
))
11273 } else if ((c
& 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11274 if (ix
+ 4 > len
) return(complete
? -ix
: ix
);
11275 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
11276 ((utf
[ix
+2] & 0xc0) != 0x80) ||
11277 ((utf
[ix
+3] & 0xc0) != 0x80))
11279 codepoint
= (utf
[ix
] & 0x7) << 18;
11280 codepoint
|= (utf
[ix
+1] & 0x3f) << 12;
11281 codepoint
|= (utf
[ix
+2] & 0x3f) << 6;
11282 codepoint
|= utf
[ix
+3] & 0x3f;
11283 if (!xmlIsCharQ(codepoint
))
11286 } else /* unknown encoding */
11293 * xmlParseTryOrFinish:
11294 * @ctxt: an XML parser context
11295 * @terminate: last chunk indicator
11297 * Try to progress on parsing
11299 * Returns zero if no parsing was possible
11302 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt
, int terminate
) {
11308 if (ctxt
->input
== NULL
)
11312 switch (ctxt
->instate
) {
11313 case XML_PARSER_EOF
:
11314 xmlGenericError(xmlGenericErrorContext
,
11315 "PP: try EOF\n"); break;
11316 case XML_PARSER_START
:
11317 xmlGenericError(xmlGenericErrorContext
,
11318 "PP: try START\n"); break;
11319 case XML_PARSER_MISC
:
11320 xmlGenericError(xmlGenericErrorContext
,
11321 "PP: try MISC\n");break;
11322 case XML_PARSER_COMMENT
:
11323 xmlGenericError(xmlGenericErrorContext
,
11324 "PP: try COMMENT\n");break;
11325 case XML_PARSER_PROLOG
:
11326 xmlGenericError(xmlGenericErrorContext
,
11327 "PP: try PROLOG\n");break;
11328 case XML_PARSER_START_TAG
:
11329 xmlGenericError(xmlGenericErrorContext
,
11330 "PP: try START_TAG\n");break;
11331 case XML_PARSER_CONTENT
:
11332 xmlGenericError(xmlGenericErrorContext
,
11333 "PP: try CONTENT\n");break;
11334 case XML_PARSER_CDATA_SECTION
:
11335 xmlGenericError(xmlGenericErrorContext
,
11336 "PP: try CDATA_SECTION\n");break;
11337 case XML_PARSER_END_TAG
:
11338 xmlGenericError(xmlGenericErrorContext
,
11339 "PP: try END_TAG\n");break;
11340 case XML_PARSER_ENTITY_DECL
:
11341 xmlGenericError(xmlGenericErrorContext
,
11342 "PP: try ENTITY_DECL\n");break;
11343 case XML_PARSER_ENTITY_VALUE
:
11344 xmlGenericError(xmlGenericErrorContext
,
11345 "PP: try ENTITY_VALUE\n");break;
11346 case XML_PARSER_ATTRIBUTE_VALUE
:
11347 xmlGenericError(xmlGenericErrorContext
,
11348 "PP: try ATTRIBUTE_VALUE\n");break;
11349 case XML_PARSER_DTD
:
11350 xmlGenericError(xmlGenericErrorContext
,
11351 "PP: try DTD\n");break;
11352 case XML_PARSER_EPILOG
:
11353 xmlGenericError(xmlGenericErrorContext
,
11354 "PP: try EPILOG\n");break;
11355 case XML_PARSER_PI
:
11356 xmlGenericError(xmlGenericErrorContext
,
11357 "PP: try PI\n");break;
11358 case XML_PARSER_IGNORE
:
11359 xmlGenericError(xmlGenericErrorContext
,
11360 "PP: try IGNORE\n");break;
11364 if ((ctxt
->input
!= NULL
) &&
11365 (ctxt
->input
->cur
- ctxt
->input
->base
> 4096)) {
11366 xmlParserShrink(ctxt
);
11369 while (ctxt
->instate
!= XML_PARSER_EOF
) {
11370 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
11373 if (ctxt
->input
== NULL
) break;
11374 if (ctxt
->input
->buf
!= NULL
) {
11376 * If we are operating on converted input, try to flush
11377 * remaining chars to avoid them stalling in the non-converted
11380 if ((ctxt
->input
->buf
->raw
!= NULL
) &&
11381 (xmlBufIsEmpty(ctxt
->input
->buf
->raw
) == 0)) {
11382 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
11384 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
11386 xmlParserInputBufferPush(ctxt
->input
->buf
, 0, "");
11387 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
11391 avail
= ctxt
->input
->end
- ctxt
->input
->cur
;
11394 switch (ctxt
->instate
) {
11395 case XML_PARSER_EOF
:
11397 * Document parsing is done !
11400 case XML_PARSER_START
:
11401 if (ctxt
->charset
== XML_CHAR_ENCODING_NONE
) {
11403 xmlCharEncoding enc
;
11406 * Very first chars read from the document flow.
11412 * Get the 4 first bytes and decode the charset
11413 * if enc != XML_CHAR_ENCODING_NONE
11414 * plug some encoding conversion routines,
11415 * else xmlSwitchEncoding will set to (default)
11422 enc
= xmlDetectCharEncoding(start
, 4);
11424 * We need more bytes to detect EBCDIC code pages.
11425 * See xmlDetectEBCDIC.
11427 if ((enc
== XML_CHAR_ENCODING_EBCDIC
) &&
11428 (!terminate
) && (avail
< 200))
11430 xmlSwitchEncoding(ctxt
, enc
);
11436 cur
= ctxt
->input
->cur
[0];
11437 next
= ctxt
->input
->cur
[1];
11439 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11440 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11441 &xmlDefaultSAXLocator
);
11442 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11443 xmlHaltParser(ctxt
);
11445 xmlGenericError(xmlGenericErrorContext
,
11446 "PP: entering EOF\n");
11448 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11449 ctxt
->sax
->endDocument(ctxt
->userData
);
11452 if ((cur
== '<') && (next
== '?')) {
11453 /* PI or XML decl */
11454 if (avail
< 5) goto done
;
11455 if ((!terminate
) &&
11456 (!xmlParseLookupString(ctxt
, 2, "?>", 2)))
11458 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11459 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11460 &xmlDefaultSAXLocator
);
11461 if ((ctxt
->input
->cur
[2] == 'x') &&
11462 (ctxt
->input
->cur
[3] == 'm') &&
11463 (ctxt
->input
->cur
[4] == 'l') &&
11464 (IS_BLANK_CH(ctxt
->input
->cur
[5]))) {
11467 xmlGenericError(xmlGenericErrorContext
,
11468 "PP: Parsing XML Decl\n");
11470 xmlParseXMLDecl(ctxt
);
11471 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
11473 * The XML REC instructs us to stop parsing right
11476 xmlHaltParser(ctxt
);
11479 ctxt
->standalone
= ctxt
->input
->standalone
;
11480 if ((ctxt
->encoding
== NULL
) &&
11481 (ctxt
->input
->encoding
!= NULL
))
11482 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
11483 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11484 (!ctxt
->disableSAX
))
11485 ctxt
->sax
->startDocument(ctxt
->userData
);
11486 ctxt
->instate
= XML_PARSER_MISC
;
11488 xmlGenericError(xmlGenericErrorContext
,
11489 "PP: entering MISC\n");
11492 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11493 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11494 (!ctxt
->disableSAX
))
11495 ctxt
->sax
->startDocument(ctxt
->userData
);
11496 ctxt
->instate
= XML_PARSER_MISC
;
11498 xmlGenericError(xmlGenericErrorContext
,
11499 "PP: entering MISC\n");
11503 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
11504 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
11505 &xmlDefaultSAXLocator
);
11506 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
11507 if (ctxt
->version
== NULL
) {
11508 xmlErrMemory(ctxt
, NULL
);
11511 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
11512 (!ctxt
->disableSAX
))
11513 ctxt
->sax
->startDocument(ctxt
->userData
);
11514 ctxt
->instate
= XML_PARSER_MISC
;
11516 xmlGenericError(xmlGenericErrorContext
,
11517 "PP: entering MISC\n");
11521 case XML_PARSER_START_TAG
: {
11522 const xmlChar
*name
;
11523 const xmlChar
*prefix
= NULL
;
11524 const xmlChar
*URI
= NULL
;
11525 int line
= ctxt
->input
->line
;
11526 int nsNr
= ctxt
->nsNr
;
11528 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11530 cur
= ctxt
->input
->cur
[0];
11532 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
11533 xmlHaltParser(ctxt
);
11534 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11535 ctxt
->sax
->endDocument(ctxt
->userData
);
11538 if ((!terminate
) && (!xmlParseLookupGt(ctxt
)))
11540 if (ctxt
->spaceNr
== 0)
11541 spacePush(ctxt
, -1);
11542 else if (*ctxt
->space
== -2)
11543 spacePush(ctxt
, -1);
11545 spacePush(ctxt
, *ctxt
->space
);
11546 #ifdef LIBXML_SAX1_ENABLED
11548 #endif /* LIBXML_SAX1_ENABLED */
11549 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
11550 #ifdef LIBXML_SAX1_ENABLED
11552 name
= xmlParseStartTag(ctxt
);
11553 #endif /* LIBXML_SAX1_ENABLED */
11554 if (ctxt
->instate
== XML_PARSER_EOF
)
11556 if (name
== NULL
) {
11558 xmlHaltParser(ctxt
);
11559 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11560 ctxt
->sax
->endDocument(ctxt
->userData
);
11563 #ifdef LIBXML_VALID_ENABLED
11565 * [ VC: Root Element Type ]
11566 * The Name in the document type declaration must match
11567 * the element type of the root element.
11569 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
11570 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
11571 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
11572 #endif /* LIBXML_VALID_ENABLED */
11575 * Check for an Empty Element.
11577 if ((RAW
== '/') && (NXT(1) == '>')) {
11581 if ((ctxt
->sax
!= NULL
) &&
11582 (ctxt
->sax
->endElementNs
!= NULL
) &&
11583 (!ctxt
->disableSAX
))
11584 ctxt
->sax
->endElementNs(ctxt
->userData
, name
,
11586 if (ctxt
->nsNr
- nsNr
> 0)
11587 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
11588 #ifdef LIBXML_SAX1_ENABLED
11590 if ((ctxt
->sax
!= NULL
) &&
11591 (ctxt
->sax
->endElement
!= NULL
) &&
11592 (!ctxt
->disableSAX
))
11593 ctxt
->sax
->endElement(ctxt
->userData
, name
);
11594 #endif /* LIBXML_SAX1_ENABLED */
11596 if (ctxt
->instate
== XML_PARSER_EOF
)
11599 if (ctxt
->nameNr
== 0) {
11600 ctxt
->instate
= XML_PARSER_EPILOG
;
11602 ctxt
->instate
= XML_PARSER_CONTENT
;
11609 xmlFatalErrMsgStr(ctxt
, XML_ERR_GT_REQUIRED
,
11610 "Couldn't find end of Start Tag %s\n",
11615 nameNsPush(ctxt
, name
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
);
11617 ctxt
->instate
= XML_PARSER_CONTENT
;
11620 case XML_PARSER_CONTENT
: {
11621 if ((avail
< 2) && (ctxt
->inputNr
== 1))
11623 cur
= ctxt
->input
->cur
[0];
11624 next
= ctxt
->input
->cur
[1];
11626 if ((cur
== '<') && (next
== '/')) {
11627 ctxt
->instate
= XML_PARSER_END_TAG
;
11629 } else if ((cur
== '<') && (next
== '?')) {
11630 if ((!terminate
) &&
11631 (!xmlParseLookupString(ctxt
, 2, "?>", 2)))
11634 ctxt
->instate
= XML_PARSER_CONTENT
;
11635 } else if ((cur
== '<') && (next
!= '!')) {
11636 ctxt
->instate
= XML_PARSER_START_TAG
;
11638 } else if ((cur
== '<') && (next
== '!') &&
11639 (ctxt
->input
->cur
[2] == '-') &&
11640 (ctxt
->input
->cur
[3] == '-')) {
11641 if ((!terminate
) &&
11642 (!xmlParseLookupString(ctxt
, 4, "-->", 3)))
11644 xmlParseComment(ctxt
);
11645 ctxt
->instate
= XML_PARSER_CONTENT
;
11646 } else if ((cur
== '<') && (ctxt
->input
->cur
[1] == '!') &&
11647 (ctxt
->input
->cur
[2] == '[') &&
11648 (ctxt
->input
->cur
[3] == 'C') &&
11649 (ctxt
->input
->cur
[4] == 'D') &&
11650 (ctxt
->input
->cur
[5] == 'A') &&
11651 (ctxt
->input
->cur
[6] == 'T') &&
11652 (ctxt
->input
->cur
[7] == 'A') &&
11653 (ctxt
->input
->cur
[8] == '[')) {
11655 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
11657 } else if ((cur
== '<') && (next
== '!') &&
11660 } else if (cur
== '<') {
11661 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
11662 "detected an error in element content\n");
11664 } else if (cur
== '&') {
11665 if ((!terminate
) && (!xmlParseLookupChar(ctxt
, ';')))
11667 xmlParseReference(ctxt
);
11669 /* TODO Avoid the extra copy, handle directly !!! */
11671 * Goal of the following test is:
11672 * - minimize calls to the SAX 'character' callback
11673 * when they are mergeable
11674 * - handle an problem for isBlank when we only parse
11675 * a sequence of blank chars and the next one is
11676 * not available to check against '<' presence.
11677 * - tries to homogenize the differences in SAX
11678 * callbacks between the push and pull versions
11681 if ((ctxt
->inputNr
== 1) &&
11682 (avail
< XML_PARSER_BIG_BUFFER_SIZE
)) {
11683 if ((!terminate
) && (!xmlParseLookupCharData(ctxt
)))
11686 ctxt
->checkIndex
= 0;
11687 xmlParseCharDataInternal(ctxt
, !terminate
);
11691 case XML_PARSER_END_TAG
:
11694 if ((!terminate
) && (!xmlParseLookupChar(ctxt
, '>')))
11697 xmlParseEndTag2(ctxt
, &ctxt
->pushTab
[ctxt
->nameNr
- 1]);
11700 #ifdef LIBXML_SAX1_ENABLED
11702 xmlParseEndTag1(ctxt
, 0);
11703 #endif /* LIBXML_SAX1_ENABLED */
11704 if (ctxt
->instate
== XML_PARSER_EOF
) {
11706 } else if (ctxt
->nameNr
== 0) {
11707 ctxt
->instate
= XML_PARSER_EPILOG
;
11709 ctxt
->instate
= XML_PARSER_CONTENT
;
11712 case XML_PARSER_CDATA_SECTION
: {
11714 * The Push mode need to have the SAX callback for
11715 * cdataBlock merge back contiguous callbacks.
11717 const xmlChar
*term
;
11721 * Don't call xmlParseLookupString. If 'terminate'
11722 * is set, checkIndex is invalid.
11724 term
= BAD_CAST
strstr((const char *) ctxt
->input
->cur
,
11727 term
= xmlParseLookupString(ctxt
, 0, "]]>", 3);
11730 if (term
== NULL
) {
11734 /* Unfinished CDATA section */
11735 size
= ctxt
->input
->end
- ctxt
->input
->cur
;
11737 if (avail
< XML_PARSER_BIG_BUFFER_SIZE
+ 2)
11739 ctxt
->checkIndex
= 0;
11740 /* XXX: Why don't we pass the full buffer? */
11741 size
= XML_PARSER_BIG_BUFFER_SIZE
;
11743 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
, size
, 0);
11746 ctxt
->input
->cur
+= tmp
;
11747 goto encoding_error
;
11749 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
11750 if (ctxt
->sax
->cdataBlock
!= NULL
)
11751 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11752 ctxt
->input
->cur
, tmp
);
11753 else if (ctxt
->sax
->characters
!= NULL
)
11754 ctxt
->sax
->characters(ctxt
->userData
,
11755 ctxt
->input
->cur
, tmp
);
11757 if (ctxt
->instate
== XML_PARSER_EOF
)
11761 int base
= term
- CUR_PTR
;
11764 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
, base
, 1);
11765 if ((tmp
< 0) || (tmp
!= base
)) {
11767 ctxt
->input
->cur
+= tmp
;
11768 goto encoding_error
;
11770 if ((ctxt
->sax
!= NULL
) && (base
== 0) &&
11771 (ctxt
->sax
->cdataBlock
!= NULL
) &&
11772 (!ctxt
->disableSAX
)) {
11774 * Special case to provide identical behaviour
11775 * between pull and push parsers on enpty CDATA
11778 if ((ctxt
->input
->cur
- ctxt
->input
->base
>= 9) &&
11779 (!strncmp((const char *)&ctxt
->input
->cur
[-9],
11781 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11783 } else if ((ctxt
->sax
!= NULL
) && (base
> 0) &&
11784 (!ctxt
->disableSAX
)) {
11785 if (ctxt
->sax
->cdataBlock
!= NULL
)
11786 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11787 ctxt
->input
->cur
, base
);
11788 else if (ctxt
->sax
->characters
!= NULL
)
11789 ctxt
->sax
->characters(ctxt
->userData
,
11790 ctxt
->input
->cur
, base
);
11792 if (ctxt
->instate
== XML_PARSER_EOF
)
11795 ctxt
->instate
= XML_PARSER_CONTENT
;
11797 xmlGenericError(xmlGenericErrorContext
,
11798 "PP: entering CONTENT\n");
11803 case XML_PARSER_MISC
:
11804 case XML_PARSER_PROLOG
:
11805 case XML_PARSER_EPILOG
:
11807 avail
= ctxt
->input
->end
- ctxt
->input
->cur
;
11810 cur
= ctxt
->input
->cur
[0];
11811 next
= ctxt
->input
->cur
[1];
11812 if ((cur
== '<') && (next
== '?')) {
11813 if ((!terminate
) &&
11814 (!xmlParseLookupString(ctxt
, 2, "?>", 2)))
11817 xmlGenericError(xmlGenericErrorContext
,
11818 "PP: Parsing PI\n");
11821 if (ctxt
->instate
== XML_PARSER_EOF
)
11823 } else if ((cur
== '<') && (next
== '!') &&
11824 (ctxt
->input
->cur
[2] == '-') &&
11825 (ctxt
->input
->cur
[3] == '-')) {
11826 if ((!terminate
) &&
11827 (!xmlParseLookupString(ctxt
, 4, "-->", 3)))
11830 xmlGenericError(xmlGenericErrorContext
,
11831 "PP: Parsing Comment\n");
11833 xmlParseComment(ctxt
);
11834 if (ctxt
->instate
== XML_PARSER_EOF
)
11836 } else if ((ctxt
->instate
== XML_PARSER_MISC
) &&
11837 (cur
== '<') && (next
== '!') &&
11838 (ctxt
->input
->cur
[2] == 'D') &&
11839 (ctxt
->input
->cur
[3] == 'O') &&
11840 (ctxt
->input
->cur
[4] == 'C') &&
11841 (ctxt
->input
->cur
[5] == 'T') &&
11842 (ctxt
->input
->cur
[6] == 'Y') &&
11843 (ctxt
->input
->cur
[7] == 'P') &&
11844 (ctxt
->input
->cur
[8] == 'E')) {
11845 if ((!terminate
) && (!xmlParseLookupGt(ctxt
)))
11848 xmlGenericError(xmlGenericErrorContext
,
11849 "PP: Parsing internal subset\n");
11851 ctxt
->inSubset
= 1;
11852 xmlParseDocTypeDecl(ctxt
);
11853 if (ctxt
->instate
== XML_PARSER_EOF
)
11856 ctxt
->instate
= XML_PARSER_DTD
;
11858 xmlGenericError(xmlGenericErrorContext
,
11859 "PP: entering DTD\n");
11863 * Create and update the external subset.
11865 ctxt
->inSubset
= 2;
11866 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11867 (ctxt
->sax
->externalSubset
!= NULL
))
11868 ctxt
->sax
->externalSubset(ctxt
->userData
,
11869 ctxt
->intSubName
, ctxt
->extSubSystem
,
11871 ctxt
->inSubset
= 0;
11872 xmlCleanSpecialAttr(ctxt
);
11873 ctxt
->instate
= XML_PARSER_PROLOG
;
11875 xmlGenericError(xmlGenericErrorContext
,
11876 "PP: entering PROLOG\n");
11879 } else if ((cur
== '<') && (next
== '!') &&
11881 (ctxt
->instate
== XML_PARSER_MISC
? 9 : 4))) {
11883 } else if (ctxt
->instate
== XML_PARSER_EPILOG
) {
11884 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11885 xmlHaltParser(ctxt
);
11887 xmlGenericError(xmlGenericErrorContext
,
11888 "PP: entering EOF\n");
11890 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11891 ctxt
->sax
->endDocument(ctxt
->userData
);
11894 ctxt
->instate
= XML_PARSER_START_TAG
;
11896 xmlGenericError(xmlGenericErrorContext
,
11897 "PP: entering START_TAG\n");
11901 case XML_PARSER_DTD
: {
11902 if ((!terminate
) && (!xmlParseLookupInternalSubset(ctxt
)))
11904 xmlParseInternalSubset(ctxt
);
11905 if (ctxt
->instate
== XML_PARSER_EOF
)
11907 ctxt
->inSubset
= 2;
11908 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11909 (ctxt
->sax
->externalSubset
!= NULL
))
11910 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
11911 ctxt
->extSubSystem
, ctxt
->extSubURI
);
11912 ctxt
->inSubset
= 0;
11913 xmlCleanSpecialAttr(ctxt
);
11914 if (ctxt
->instate
== XML_PARSER_EOF
)
11916 ctxt
->instate
= XML_PARSER_PROLOG
;
11918 xmlGenericError(xmlGenericErrorContext
,
11919 "PP: entering PROLOG\n");
11923 case XML_PARSER_COMMENT
:
11924 xmlGenericError(xmlGenericErrorContext
,
11925 "PP: internal error, state == COMMENT\n");
11926 ctxt
->instate
= XML_PARSER_CONTENT
;
11928 xmlGenericError(xmlGenericErrorContext
,
11929 "PP: entering CONTENT\n");
11932 case XML_PARSER_IGNORE
:
11933 xmlGenericError(xmlGenericErrorContext
,
11934 "PP: internal error, state == IGNORE");
11935 ctxt
->instate
= XML_PARSER_DTD
;
11937 xmlGenericError(xmlGenericErrorContext
,
11938 "PP: entering DTD\n");
11941 case XML_PARSER_PI
:
11942 xmlGenericError(xmlGenericErrorContext
,
11943 "PP: internal error, state == PI\n");
11944 ctxt
->instate
= XML_PARSER_CONTENT
;
11946 xmlGenericError(xmlGenericErrorContext
,
11947 "PP: entering CONTENT\n");
11950 case XML_PARSER_ENTITY_DECL
:
11951 xmlGenericError(xmlGenericErrorContext
,
11952 "PP: internal error, state == ENTITY_DECL\n");
11953 ctxt
->instate
= XML_PARSER_DTD
;
11955 xmlGenericError(xmlGenericErrorContext
,
11956 "PP: entering DTD\n");
11959 case XML_PARSER_ENTITY_VALUE
:
11960 xmlGenericError(xmlGenericErrorContext
,
11961 "PP: internal error, state == ENTITY_VALUE\n");
11962 ctxt
->instate
= XML_PARSER_CONTENT
;
11964 xmlGenericError(xmlGenericErrorContext
,
11965 "PP: entering DTD\n");
11968 case XML_PARSER_ATTRIBUTE_VALUE
:
11969 xmlGenericError(xmlGenericErrorContext
,
11970 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11971 ctxt
->instate
= XML_PARSER_START_TAG
;
11973 xmlGenericError(xmlGenericErrorContext
,
11974 "PP: entering START_TAG\n");
11977 case XML_PARSER_SYSTEM_LITERAL
:
11978 xmlGenericError(xmlGenericErrorContext
,
11979 "PP: internal error, state == SYSTEM_LITERAL\n");
11980 ctxt
->instate
= XML_PARSER_START_TAG
;
11982 xmlGenericError(xmlGenericErrorContext
,
11983 "PP: entering START_TAG\n");
11986 case XML_PARSER_PUBLIC_LITERAL
:
11987 xmlGenericError(xmlGenericErrorContext
,
11988 "PP: internal error, state == PUBLIC_LITERAL\n");
11989 ctxt
->instate
= XML_PARSER_START_TAG
;
11991 xmlGenericError(xmlGenericErrorContext
,
11992 "PP: entering START_TAG\n");
11999 xmlGenericError(xmlGenericErrorContext
, "PP: done %d\n", ret
);
12003 if (ctxt
->input
->end
- ctxt
->input
->cur
< 4) {
12004 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
12005 "Input is not proper UTF-8, indicate encoding !\n",
12010 snprintf(buffer
, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12011 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
12012 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
12013 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
12014 "Input is not proper UTF-8, indicate encoding !\n%s",
12015 BAD_CAST buffer
, NULL
);
12022 * @ctxt: an XML parser context
12023 * @chunk: an char array
12024 * @size: the size in byte of the chunk
12025 * @terminate: last chunk indicator
12027 * Parse a Chunk of memory
12029 * Returns zero if no error, the xmlParserErrors otherwise.
12032 xmlParseChunk(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
,
12037 return(XML_ERR_INTERNAL_ERROR
);
12038 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12039 return(ctxt
->errNo
);
12040 if (ctxt
->instate
== XML_PARSER_EOF
)
12042 if (ctxt
->input
== NULL
)
12045 ctxt
->progressive
= 1;
12046 if (ctxt
->instate
== XML_PARSER_START
)
12047 xmlDetectSAX2(ctxt
);
12048 if ((size
> 0) && (chunk
!= NULL
) && (!terminate
) &&
12049 (chunk
[size
- 1] == '\r')) {
12054 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
12055 (ctxt
->input
->buf
!= NULL
) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
12056 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12057 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12060 res
= xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12061 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12063 ctxt
->errNo
= XML_PARSER_EOF
;
12064 xmlHaltParser(ctxt
);
12065 return (XML_PARSER_EOF
);
12068 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12071 } else if (ctxt
->instate
!= XML_PARSER_EOF
) {
12072 if ((ctxt
->input
!= NULL
) && ctxt
->input
->buf
!= NULL
) {
12073 xmlParserInputBufferPtr in
= ctxt
->input
->buf
;
12074 if ((in
->encoder
!= NULL
) && (in
->buffer
!= NULL
) &&
12075 (in
->raw
!= NULL
)) {
12077 size_t base
= xmlBufGetInputBase(in
->buffer
, ctxt
->input
);
12078 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12080 nbchars
= xmlCharEncInput(in
, terminate
);
12081 xmlBufSetInputBaseCur(in
->buffer
, ctxt
->input
, base
, current
);
12084 xmlGenericError(xmlGenericErrorContext
,
12085 "xmlParseChunk: encoder error\n");
12086 xmlHaltParser(ctxt
);
12087 return(XML_ERR_INVALID_ENCODING
);
12093 xmlParseTryOrFinish(ctxt
, terminate
);
12094 if (ctxt
->instate
== XML_PARSER_EOF
)
12095 return(ctxt
->errNo
);
12097 if ((ctxt
->input
!= NULL
) &&
12098 (((ctxt
->input
->end
- ctxt
->input
->cur
) > XML_MAX_LOOKUP_LIMIT
) ||
12099 ((ctxt
->input
->cur
- ctxt
->input
->base
) > XML_MAX_LOOKUP_LIMIT
)) &&
12100 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
12101 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
, "Huge input lookup");
12102 xmlHaltParser(ctxt
);
12104 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
12105 return(ctxt
->errNo
);
12107 if ((end_in_lf
== 1) && (ctxt
->input
!= NULL
) &&
12108 (ctxt
->input
->buf
!= NULL
)) {
12109 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
,
12111 size_t current
= ctxt
->input
->cur
- ctxt
->input
->base
;
12113 xmlParserInputBufferPush(ctxt
->input
->buf
, 1, "\r");
12115 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
,
12120 * Check for termination
12122 if ((ctxt
->instate
!= XML_PARSER_EOF
) &&
12123 (ctxt
->instate
!= XML_PARSER_EPILOG
)) {
12124 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12126 if ((ctxt
->instate
== XML_PARSER_EPILOG
) &&
12127 (ctxt
->input
->cur
< ctxt
->input
->end
)) {
12128 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
12130 if (ctxt
->instate
!= XML_PARSER_EOF
) {
12131 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
12132 ctxt
->sax
->endDocument(ctxt
->userData
);
12134 ctxt
->instate
= XML_PARSER_EOF
;
12136 if (ctxt
->wellFormed
== 0)
12137 return((xmlParserErrors
) ctxt
->errNo
);
12142 /************************************************************************
12144 * I/O front end functions to the parser *
12146 ************************************************************************/
12149 * xmlCreatePushParserCtxt:
12150 * @sax: a SAX handler
12151 * @user_data: The user data returned on SAX callbacks
12152 * @chunk: a pointer to an array of chars
12153 * @size: number of chars in the array
12154 * @filename: an optional file name or URI
12156 * Create a parser context for using the XML parser in push mode.
12157 * If @buffer and @size are non-NULL, the data is used to detect
12158 * the encoding. The remaining characters will be parsed so they
12159 * don't need to be fed in again through xmlParseChunk.
12160 * To allow content encoding detection, @size should be >= 4
12161 * The value of @filename is used for fetching external entities
12162 * and error/warning reports.
12164 * Returns the new parser context or NULL
12168 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12169 const char *chunk
, int size
, const char *filename
) {
12170 xmlParserCtxtPtr ctxt
;
12171 xmlParserInputPtr inputStream
;
12172 xmlParserInputBufferPtr buf
;
12174 buf
= xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE
);
12175 if (buf
== NULL
) return(NULL
);
12177 ctxt
= xmlNewSAXParserCtxt(sax
, user_data
);
12178 if (ctxt
== NULL
) {
12179 xmlErrMemory(NULL
, "creating parser: out of memory\n");
12180 xmlFreeParserInputBuffer(buf
);
12183 ctxt
->dictNames
= 1;
12184 if (filename
== NULL
) {
12185 ctxt
->directory
= NULL
;
12187 ctxt
->directory
= xmlParserGetDirectory(filename
);
12190 inputStream
= xmlNewInputStream(ctxt
);
12191 if (inputStream
== NULL
) {
12192 xmlFreeParserCtxt(ctxt
);
12193 xmlFreeParserInputBuffer(buf
);
12197 if (filename
== NULL
)
12198 inputStream
->filename
= NULL
;
12200 inputStream
->filename
= (char *)
12201 xmlCanonicPath((const xmlChar
*) filename
);
12202 if (inputStream
->filename
== NULL
) {
12203 xmlFreeInputStream(inputStream
);
12204 xmlFreeParserCtxt(ctxt
);
12205 xmlFreeParserInputBuffer(buf
);
12209 inputStream
->buf
= buf
;
12210 xmlBufResetInput(inputStream
->buf
->buffer
, inputStream
);
12211 inputPush(ctxt
, inputStream
);
12214 * If the caller didn't provide an initial 'chunk' for determining
12215 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12216 * that it can be automatically determined later
12218 ctxt
->charset
= XML_CHAR_ENCODING_NONE
;
12220 if ((size
!= 0) && (chunk
!= NULL
) &&
12221 (ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
12222 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
12223 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
12225 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
12227 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
12229 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
12235 #endif /* LIBXML_PUSH_ENABLED */
12239 * @ctxt: an XML parser context
12241 * Blocks further parser processing
12244 xmlStopParser(xmlParserCtxtPtr ctxt
) {
12247 xmlHaltParser(ctxt
);
12248 ctxt
->errNo
= XML_ERR_USER_STOP
;
12252 * xmlCreateIOParserCtxt:
12253 * @sax: a SAX handler
12254 * @user_data: The user data returned on SAX callbacks
12255 * @ioread: an I/O read function
12256 * @ioclose: an I/O close function
12257 * @ioctx: an I/O handler
12258 * @enc: the charset encoding if known
12260 * Create a parser context for using the XML parser with an existing
12263 * Returns the new parser context or NULL
12266 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
12267 xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
12268 void *ioctx
, xmlCharEncoding enc
) {
12269 xmlParserCtxtPtr ctxt
;
12270 xmlParserInputPtr inputStream
;
12271 xmlParserInputBufferPtr buf
;
12273 if (ioread
== NULL
) return(NULL
);
12275 buf
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
, enc
);
12277 if (ioclose
!= NULL
)
12282 ctxt
= xmlNewSAXParserCtxt(sax
, user_data
);
12283 if (ctxt
== NULL
) {
12284 xmlFreeParserInputBuffer(buf
);
12288 inputStream
= xmlNewIOInputStream(ctxt
, buf
, enc
);
12289 if (inputStream
== NULL
) {
12290 xmlFreeParserCtxt(ctxt
);
12293 inputPush(ctxt
, inputStream
);
12298 #ifdef LIBXML_VALID_ENABLED
12299 /************************************************************************
12301 * Front ends when parsing a DTD *
12303 ************************************************************************/
12307 * @sax: the SAX handler block or NULL
12308 * @input: an Input Buffer
12309 * @enc: the charset encoding if known
12311 * Load and parse a DTD
12313 * Returns the resulting xmlDtdPtr or NULL in case of error.
12314 * @input will be freed by the function in any case.
12318 xmlIOParseDTD(xmlSAXHandlerPtr sax
, xmlParserInputBufferPtr input
,
12319 xmlCharEncoding enc
) {
12320 xmlDtdPtr ret
= NULL
;
12321 xmlParserCtxtPtr ctxt
;
12322 xmlParserInputPtr pinput
= NULL
;
12328 ctxt
= xmlNewSAXParserCtxt(sax
, NULL
);
12329 if (ctxt
== NULL
) {
12330 xmlFreeParserInputBuffer(input
);
12334 /* We are loading a DTD */
12335 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12337 xmlDetectSAX2(ctxt
);
12340 * generate a parser input from the I/O handler
12343 pinput
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
12344 if (pinput
== NULL
) {
12345 xmlFreeParserInputBuffer(input
);
12346 xmlFreeParserCtxt(ctxt
);
12351 * plug some encoding conversion routines here.
12353 if (xmlPushInput(ctxt
, pinput
) < 0) {
12354 xmlFreeParserCtxt(ctxt
);
12357 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12358 xmlSwitchEncoding(ctxt
, enc
);
12361 pinput
->filename
= NULL
;
12364 pinput
->base
= ctxt
->input
->cur
;
12365 pinput
->cur
= ctxt
->input
->cur
;
12366 pinput
->free
= NULL
;
12369 * let's parse that entity knowing it's an external subset.
12371 ctxt
->inSubset
= 2;
12372 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12373 if (ctxt
->myDoc
== NULL
) {
12374 xmlErrMemory(ctxt
, "New Doc failed");
12377 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12378 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12379 BAD_CAST
"none", BAD_CAST
"none");
12381 if ((enc
== XML_CHAR_ENCODING_NONE
) &&
12382 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
12384 * Get the 4 first bytes and decode the charset
12385 * if enc != XML_CHAR_ENCODING_NONE
12386 * plug some encoding conversion routines.
12392 enc
= xmlDetectCharEncoding(start
, 4);
12393 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12394 xmlSwitchEncoding(ctxt
, enc
);
12398 xmlParseExternalSubset(ctxt
, BAD_CAST
"none", BAD_CAST
"none");
12400 if (ctxt
->myDoc
!= NULL
) {
12401 if (ctxt
->wellFormed
) {
12402 ret
= ctxt
->myDoc
->extSubset
;
12403 ctxt
->myDoc
->extSubset
= NULL
;
12408 tmp
= ret
->children
;
12409 while (tmp
!= NULL
) {
12417 xmlFreeDoc(ctxt
->myDoc
);
12418 ctxt
->myDoc
= NULL
;
12420 xmlFreeParserCtxt(ctxt
);
12427 * @sax: the SAX handler block
12428 * @ExternalID: a NAME* containing the External ID of the DTD
12429 * @SystemID: a NAME* containing the URL to the DTD
12431 * DEPRECATED: Don't use.
12433 * Load and parse an external subset.
12435 * Returns the resulting xmlDtdPtr or NULL in case of error.
12439 xmlSAXParseDTD(xmlSAXHandlerPtr sax
, const xmlChar
*ExternalID
,
12440 const xmlChar
*SystemID
) {
12441 xmlDtdPtr ret
= NULL
;
12442 xmlParserCtxtPtr ctxt
;
12443 xmlParserInputPtr input
= NULL
;
12444 xmlCharEncoding enc
;
12445 xmlChar
* systemIdCanonic
;
12447 if ((ExternalID
== NULL
) && (SystemID
== NULL
)) return(NULL
);
12449 ctxt
= xmlNewSAXParserCtxt(sax
, NULL
);
12450 if (ctxt
== NULL
) {
12454 /* We are loading a DTD */
12455 ctxt
->options
|= XML_PARSE_DTDLOAD
;
12458 * Canonicalise the system ID
12460 systemIdCanonic
= xmlCanonicPath(SystemID
);
12461 if ((SystemID
!= NULL
) && (systemIdCanonic
== NULL
)) {
12462 xmlFreeParserCtxt(ctxt
);
12467 * Ask the Entity resolver to load the damn thing
12470 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->resolveEntity
!= NULL
))
12471 input
= ctxt
->sax
->resolveEntity(ctxt
->userData
, ExternalID
,
12473 if (input
== NULL
) {
12474 xmlFreeParserCtxt(ctxt
);
12475 if (systemIdCanonic
!= NULL
)
12476 xmlFree(systemIdCanonic
);
12481 * plug some encoding conversion routines here.
12483 if (xmlPushInput(ctxt
, input
) < 0) {
12484 xmlFreeParserCtxt(ctxt
);
12485 if (systemIdCanonic
!= NULL
)
12486 xmlFree(systemIdCanonic
);
12489 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12490 enc
= xmlDetectCharEncoding(ctxt
->input
->cur
, 4);
12491 xmlSwitchEncoding(ctxt
, enc
);
12494 if (input
->filename
== NULL
)
12495 input
->filename
= (char *) systemIdCanonic
;
12497 xmlFree(systemIdCanonic
);
12500 input
->base
= ctxt
->input
->cur
;
12501 input
->cur
= ctxt
->input
->cur
;
12502 input
->free
= NULL
;
12505 * let's parse that entity knowing it's an external subset.
12507 ctxt
->inSubset
= 2;
12508 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12509 if (ctxt
->myDoc
== NULL
) {
12510 xmlErrMemory(ctxt
, "New Doc failed");
12511 xmlFreeParserCtxt(ctxt
);
12514 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12515 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12516 ExternalID
, SystemID
);
12517 xmlParseExternalSubset(ctxt
, ExternalID
, SystemID
);
12519 if (ctxt
->myDoc
!= NULL
) {
12520 if (ctxt
->wellFormed
) {
12521 ret
= ctxt
->myDoc
->extSubset
;
12522 ctxt
->myDoc
->extSubset
= NULL
;
12527 tmp
= ret
->children
;
12528 while (tmp
!= NULL
) {
12536 xmlFreeDoc(ctxt
->myDoc
);
12537 ctxt
->myDoc
= NULL
;
12539 xmlFreeParserCtxt(ctxt
);
12547 * @ExternalID: a NAME* containing the External ID of the DTD
12548 * @SystemID: a NAME* containing the URL to the DTD
12550 * Load and parse an external subset.
12552 * Returns the resulting xmlDtdPtr or NULL in case of error.
12556 xmlParseDTD(const xmlChar
*ExternalID
, const xmlChar
*SystemID
) {
12557 return(xmlSAXParseDTD(NULL
, ExternalID
, SystemID
));
12559 #endif /* LIBXML_VALID_ENABLED */
12561 /************************************************************************
12563 * Front ends when parsing an Entity *
12565 ************************************************************************/
12568 * xmlParseCtxtExternalEntity:
12569 * @ctx: the existing parsing context
12570 * @URL: the URL for the entity to load
12571 * @ID: the System ID for the entity to load
12572 * @lst: the return value for the set of parsed nodes
12574 * Parse an external general entity within an existing parsing context
12575 * An external general parsed entity is well-formed if it matches the
12576 * production labeled extParsedEnt.
12578 * [78] extParsedEnt ::= TextDecl? content
12580 * Returns 0 if the entity is well formed, -1 in case of args problem and
12581 * the parser error code otherwise
12585 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx
, const xmlChar
*URL
,
12586 const xmlChar
*ID
, xmlNodePtr
*lst
) {
12589 if (ctx
== NULL
) return(-1);
12591 * If the user provided their own SAX callbacks, then reuse the
12592 * userData callback field, otherwise the expected setup in a
12593 * DOM builder is to have userData == ctxt
12595 if (ctx
->userData
== ctx
)
12598 userData
= ctx
->userData
;
12599 return xmlParseExternalEntityPrivate(ctx
->myDoc
, ctx
, ctx
->sax
,
12600 userData
, ctx
->depth
+ 1,
12605 * xmlParseExternalEntityPrivate:
12606 * @doc: the document the chunk pertains to
12607 * @oldctxt: the previous parser context if available
12608 * @sax: the SAX handler block (possibly NULL)
12609 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12610 * @depth: Used for loop detection, use 0
12611 * @URL: the URL for the entity to load
12612 * @ID: the System ID for the entity to load
12613 * @list: the return value for the set of parsed nodes
12615 * Private version of xmlParseExternalEntity()
12617 * Returns 0 if the entity is well formed, -1 in case of args problem and
12618 * the parser error code otherwise
12621 static xmlParserErrors
12622 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
12623 xmlSAXHandlerPtr sax
,
12624 void *user_data
, int depth
, const xmlChar
*URL
,
12625 const xmlChar
*ID
, xmlNodePtr
*list
) {
12626 xmlParserCtxtPtr ctxt
;
12628 xmlNodePtr newRoot
;
12629 xmlParserErrors ret
= XML_ERR_OK
;
12631 xmlCharEncoding enc
;
12633 if (((depth
> 40) &&
12634 ((oldctxt
== NULL
) || (oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
12636 xmlFatalErrMsg(oldctxt
, XML_ERR_ENTITY_LOOP
,
12637 "Maximum entity nesting depth exceeded");
12638 return(XML_ERR_ENTITY_LOOP
);
12643 if ((URL
== NULL
) && (ID
== NULL
))
12644 return(XML_ERR_INTERNAL_ERROR
);
12646 return(XML_ERR_INTERNAL_ERROR
);
12648 ctxt
= xmlCreateEntityParserCtxtInternal(sax
, user_data
, URL
, ID
, NULL
,
12650 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
12651 if (oldctxt
!= NULL
) {
12652 ctxt
->nbErrors
= oldctxt
->nbErrors
;
12653 ctxt
->nbWarnings
= oldctxt
->nbWarnings
;
12655 xmlDetectSAX2(ctxt
);
12657 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12658 if (newDoc
== NULL
) {
12659 xmlFreeParserCtxt(ctxt
);
12660 return(XML_ERR_INTERNAL_ERROR
);
12662 newDoc
->properties
= XML_DOC_INTERNAL
;
12664 newDoc
->intSubset
= doc
->intSubset
;
12665 newDoc
->extSubset
= doc
->extSubset
;
12667 newDoc
->dict
= doc
->dict
;
12668 xmlDictReference(newDoc
->dict
);
12670 if (doc
->URL
!= NULL
) {
12671 newDoc
->URL
= xmlStrdup(doc
->URL
);
12674 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12675 if (newRoot
== NULL
) {
12677 xmlFreeParserCtxt(ctxt
);
12678 newDoc
->intSubset
= NULL
;
12679 newDoc
->extSubset
= NULL
;
12680 xmlFreeDoc(newDoc
);
12681 return(XML_ERR_INTERNAL_ERROR
);
12683 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
12684 nodePush(ctxt
, newDoc
->children
);
12686 ctxt
->myDoc
= newDoc
;
12689 newRoot
->doc
= doc
;
12693 * Get the 4 first bytes and decode the charset
12694 * if enc != XML_CHAR_ENCODING_NONE
12695 * plug some encoding conversion routines.
12698 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12703 enc
= xmlDetectCharEncoding(start
, 4);
12704 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12705 xmlSwitchEncoding(ctxt
, enc
);
12710 * Parse a possible text declaration first
12712 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12713 xmlParseTextDecl(ctxt
);
12715 * An XML-1.0 document can't reference an entity not XML-1.0
12717 if ((xmlStrEqual(oldctxt
->version
, BAD_CAST
"1.0")) &&
12718 (!xmlStrEqual(ctxt
->input
->version
, BAD_CAST
"1.0"))) {
12719 xmlFatalErrMsg(ctxt
, XML_ERR_VERSION_MISMATCH
,
12720 "Version mismatch between document and entity\n");
12724 ctxt
->instate
= XML_PARSER_CONTENT
;
12725 ctxt
->depth
= depth
;
12726 if (oldctxt
!= NULL
) {
12727 ctxt
->_private
= oldctxt
->_private
;
12728 ctxt
->loadsubset
= oldctxt
->loadsubset
;
12729 ctxt
->validate
= oldctxt
->validate
;
12730 ctxt
->valid
= oldctxt
->valid
;
12731 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
12732 if (oldctxt
->validate
) {
12733 ctxt
->vctxt
.error
= oldctxt
->vctxt
.error
;
12734 ctxt
->vctxt
.warning
= oldctxt
->vctxt
.warning
;
12735 ctxt
->vctxt
.userData
= oldctxt
->vctxt
.userData
;
12736 ctxt
->vctxt
.flags
= oldctxt
->vctxt
.flags
;
12738 ctxt
->external
= oldctxt
->external
;
12739 if (ctxt
->dict
) xmlDictFree(ctxt
->dict
);
12740 ctxt
->dict
= oldctxt
->dict
;
12741 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
12742 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
12743 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
12744 ctxt
->dictNames
= oldctxt
->dictNames
;
12745 ctxt
->attsDefault
= oldctxt
->attsDefault
;
12746 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
12747 ctxt
->linenumbers
= oldctxt
->linenumbers
;
12748 ctxt
->record_info
= oldctxt
->record_info
;
12749 ctxt
->node_seq
.maximum
= oldctxt
->node_seq
.maximum
;
12750 ctxt
->node_seq
.length
= oldctxt
->node_seq
.length
;
12751 ctxt
->node_seq
.buffer
= oldctxt
->node_seq
.buffer
;
12754 * Doing validity checking on chunk without context
12755 * doesn't make sense
12757 ctxt
->_private
= NULL
;
12758 ctxt
->validate
= 0;
12759 ctxt
->external
= 2;
12760 ctxt
->loadsubset
= 0;
12763 xmlParseContent(ctxt
);
12765 if ((RAW
== '<') && (NXT(1) == '/')) {
12766 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12767 } else if (RAW
!= 0) {
12768 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
12770 if (ctxt
->node
!= newDoc
->children
) {
12771 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12774 if (!ctxt
->wellFormed
) {
12775 ret
= (xmlParserErrors
)ctxt
->errNo
;
12776 if (oldctxt
!= NULL
) {
12777 oldctxt
->errNo
= ctxt
->errNo
;
12778 oldctxt
->wellFormed
= 0;
12779 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
12782 if (list
!= NULL
) {
12786 * Return the newly created nodeset after unlinking it from
12787 * they pseudo parent.
12789 cur
= newDoc
->children
->children
;
12791 while (cur
!= NULL
) {
12792 cur
->parent
= NULL
;
12795 newDoc
->children
->children
= NULL
;
12801 * Also record the size of the entity parsed
12803 if (ctxt
->input
!= NULL
&& oldctxt
!= NULL
) {
12804 unsigned long consumed
= ctxt
->input
->consumed
;
12806 xmlSaturatedAddSizeT(&consumed
, ctxt
->input
->cur
- ctxt
->input
->base
);
12808 xmlSaturatedAdd(&oldctxt
->sizeentities
, consumed
);
12809 xmlSaturatedAdd(&oldctxt
->sizeentities
, ctxt
->sizeentities
);
12811 xmlSaturatedAdd(&oldctxt
->sizeentcopy
, consumed
);
12812 xmlSaturatedAdd(&oldctxt
->sizeentcopy
, ctxt
->sizeentcopy
);
12815 if (oldctxt
!= NULL
) {
12817 ctxt
->attsDefault
= NULL
;
12818 ctxt
->attsSpecial
= NULL
;
12819 oldctxt
->nbErrors
= ctxt
->nbErrors
;
12820 oldctxt
->nbWarnings
= ctxt
->nbWarnings
;
12821 oldctxt
->validate
= ctxt
->validate
;
12822 oldctxt
->valid
= ctxt
->valid
;
12823 oldctxt
->node_seq
.maximum
= ctxt
->node_seq
.maximum
;
12824 oldctxt
->node_seq
.length
= ctxt
->node_seq
.length
;
12825 oldctxt
->node_seq
.buffer
= ctxt
->node_seq
.buffer
;
12827 ctxt
->node_seq
.maximum
= 0;
12828 ctxt
->node_seq
.length
= 0;
12829 ctxt
->node_seq
.buffer
= NULL
;
12830 xmlFreeParserCtxt(ctxt
);
12831 newDoc
->intSubset
= NULL
;
12832 newDoc
->extSubset
= NULL
;
12833 xmlFreeDoc(newDoc
);
12838 #ifdef LIBXML_SAX1_ENABLED
12840 * xmlParseExternalEntity:
12841 * @doc: the document the chunk pertains to
12842 * @sax: the SAX handler block (possibly NULL)
12843 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12844 * @depth: Used for loop detection, use 0
12845 * @URL: the URL for the entity to load
12846 * @ID: the System ID for the entity to load
12847 * @lst: the return value for the set of parsed nodes
12849 * Parse an external general entity
12850 * An external general parsed entity is well-formed if it matches the
12851 * production labeled extParsedEnt.
12853 * [78] extParsedEnt ::= TextDecl? content
12855 * Returns 0 if the entity is well formed, -1 in case of args problem and
12856 * the parser error code otherwise
12860 xmlParseExternalEntity(xmlDocPtr doc
, xmlSAXHandlerPtr sax
, void *user_data
,
12861 int depth
, const xmlChar
*URL
, const xmlChar
*ID
, xmlNodePtr
*lst
) {
12862 return(xmlParseExternalEntityPrivate(doc
, NULL
, sax
, user_data
, depth
, URL
,
12867 * xmlParseBalancedChunkMemory:
12868 * @doc: the document the chunk pertains to (must not be NULL)
12869 * @sax: the SAX handler block (possibly NULL)
12870 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12871 * @depth: Used for loop detection, use 0
12872 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12873 * @lst: the return value for the set of parsed nodes
12875 * Parse a well-balanced chunk of an XML document
12876 * called by the parser
12877 * The allowed sequence for the Well Balanced Chunk is the one defined by
12878 * the content production in the XML grammar:
12880 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12882 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12883 * the parser error code otherwise
12887 xmlParseBalancedChunkMemory(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
12888 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
) {
12889 return xmlParseBalancedChunkMemoryRecover( doc
, sax
, user_data
,
12890 depth
, string
, lst
, 0 );
12892 #endif /* LIBXML_SAX1_ENABLED */
12895 * xmlParseBalancedChunkMemoryInternal:
12896 * @oldctxt: the existing parsing context
12897 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12898 * @user_data: the user data field for the parser context
12899 * @lst: the return value for the set of parsed nodes
12902 * Parse a well-balanced chunk of an XML document
12903 * called by the parser
12904 * The allowed sequence for the Well Balanced Chunk is the one defined by
12905 * the content production in the XML grammar:
12907 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12909 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12910 * error code otherwise
12912 * In case recover is set to 1, the nodelist will not be empty even if
12913 * the parsed chunk is not well balanced.
12915 static xmlParserErrors
12916 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
12917 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
) {
12918 xmlParserCtxtPtr ctxt
;
12919 xmlDocPtr newDoc
= NULL
;
12920 xmlNodePtr newRoot
;
12921 xmlSAXHandlerPtr oldsax
= NULL
;
12922 xmlNodePtr content
= NULL
;
12923 xmlNodePtr last
= NULL
;
12925 xmlParserErrors ret
= XML_ERR_OK
;
12930 if (((oldctxt
->depth
> 40) && ((oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
12931 (oldctxt
->depth
> 100)) {
12932 xmlFatalErrMsg(oldctxt
, XML_ERR_ENTITY_LOOP
,
12933 "Maximum entity nesting depth exceeded");
12934 return(XML_ERR_ENTITY_LOOP
);
12940 if (string
== NULL
)
12941 return(XML_ERR_INTERNAL_ERROR
);
12943 size
= xmlStrlen(string
);
12945 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
12946 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
12947 ctxt
->nbErrors
= oldctxt
->nbErrors
;
12948 ctxt
->nbWarnings
= oldctxt
->nbWarnings
;
12949 if (user_data
!= NULL
)
12950 ctxt
->userData
= user_data
;
12952 ctxt
->userData
= ctxt
;
12953 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
12954 ctxt
->dict
= oldctxt
->dict
;
12955 ctxt
->input_id
= oldctxt
->input_id
;
12956 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
12957 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
12958 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
12961 /* propagate namespaces down the entity */
12962 for (i
= 0;i
< oldctxt
->nsNr
;i
+= 2) {
12963 nsPush(ctxt
, oldctxt
->nsTab
[i
], oldctxt
->nsTab
[i
+1]);
12967 oldsax
= ctxt
->sax
;
12968 ctxt
->sax
= oldctxt
->sax
;
12969 xmlDetectSAX2(ctxt
);
12970 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
12971 ctxt
->options
= oldctxt
->options
;
12973 ctxt
->_private
= oldctxt
->_private
;
12974 if (oldctxt
->myDoc
== NULL
) {
12975 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12976 if (newDoc
== NULL
) {
12977 ctxt
->sax
= oldsax
;
12979 xmlFreeParserCtxt(ctxt
);
12980 return(XML_ERR_INTERNAL_ERROR
);
12982 newDoc
->properties
= XML_DOC_INTERNAL
;
12983 newDoc
->dict
= ctxt
->dict
;
12984 xmlDictReference(newDoc
->dict
);
12985 ctxt
->myDoc
= newDoc
;
12987 ctxt
->myDoc
= oldctxt
->myDoc
;
12988 content
= ctxt
->myDoc
->children
;
12989 last
= ctxt
->myDoc
->last
;
12991 newRoot
= xmlNewDocNode(ctxt
->myDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12992 if (newRoot
== NULL
) {
12993 ctxt
->sax
= oldsax
;
12995 xmlFreeParserCtxt(ctxt
);
12996 if (newDoc
!= NULL
) {
12997 xmlFreeDoc(newDoc
);
12999 return(XML_ERR_INTERNAL_ERROR
);
13001 ctxt
->myDoc
->children
= NULL
;
13002 ctxt
->myDoc
->last
= NULL
;
13003 xmlAddChild((xmlNodePtr
) ctxt
->myDoc
, newRoot
);
13004 nodePush(ctxt
, ctxt
->myDoc
->children
);
13005 ctxt
->instate
= XML_PARSER_CONTENT
;
13006 ctxt
->depth
= oldctxt
->depth
;
13008 ctxt
->validate
= 0;
13009 ctxt
->loadsubset
= oldctxt
->loadsubset
;
13010 if ((oldctxt
->validate
) || (oldctxt
->replaceEntities
!= 0)) {
13012 * ID/IDREF registration will be done in xmlValidateElement below
13014 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13016 ctxt
->dictNames
= oldctxt
->dictNames
;
13017 ctxt
->attsDefault
= oldctxt
->attsDefault
;
13018 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
13020 xmlParseContent(ctxt
);
13021 if ((RAW
== '<') && (NXT(1) == '/')) {
13022 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13023 } else if (RAW
!= 0) {
13024 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13026 if (ctxt
->node
!= ctxt
->myDoc
->children
) {
13027 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13030 if (!ctxt
->wellFormed
) {
13031 ret
= (xmlParserErrors
)ctxt
->errNo
;
13032 oldctxt
->errNo
= ctxt
->errNo
;
13033 oldctxt
->wellFormed
= 0;
13034 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
13039 if ((lst
!= NULL
) && (ret
== XML_ERR_OK
)) {
13043 * Return the newly created nodeset after unlinking it from
13044 * they pseudo parent.
13046 cur
= ctxt
->myDoc
->children
->children
;
13048 while (cur
!= NULL
) {
13049 #ifdef LIBXML_VALID_ENABLED
13050 if ((oldctxt
->validate
) && (oldctxt
->wellFormed
) &&
13051 (oldctxt
->myDoc
) && (oldctxt
->myDoc
->intSubset
) &&
13052 (cur
->type
== XML_ELEMENT_NODE
)) {
13053 oldctxt
->valid
&= xmlValidateElement(&oldctxt
->vctxt
,
13054 oldctxt
->myDoc
, cur
);
13056 #endif /* LIBXML_VALID_ENABLED */
13057 cur
->parent
= NULL
;
13060 ctxt
->myDoc
->children
->children
= NULL
;
13062 if (ctxt
->myDoc
!= NULL
) {
13063 xmlFreeNode(ctxt
->myDoc
->children
);
13064 ctxt
->myDoc
->children
= content
;
13065 ctxt
->myDoc
->last
= last
;
13069 * Also record the size of the entity parsed
13071 if (ctxt
->input
!= NULL
&& oldctxt
!= NULL
) {
13072 unsigned long consumed
= ctxt
->input
->consumed
;
13074 xmlSaturatedAddSizeT(&consumed
, ctxt
->input
->cur
- ctxt
->input
->base
);
13076 xmlSaturatedAdd(&oldctxt
->sizeentcopy
, consumed
);
13077 xmlSaturatedAdd(&oldctxt
->sizeentcopy
, ctxt
->sizeentcopy
);
13080 oldctxt
->nbErrors
= ctxt
->nbErrors
;
13081 oldctxt
->nbWarnings
= ctxt
->nbWarnings
;
13082 ctxt
->sax
= oldsax
;
13084 ctxt
->attsDefault
= NULL
;
13085 ctxt
->attsSpecial
= NULL
;
13086 xmlFreeParserCtxt(ctxt
);
13087 if (newDoc
!= NULL
) {
13088 xmlFreeDoc(newDoc
);
13095 * xmlParseInNodeContext:
13096 * @node: the context node
13097 * @data: the input string
13098 * @datalen: the input string length in bytes
13099 * @options: a combination of xmlParserOption
13100 * @lst: the return value for the set of parsed nodes
13102 * Parse a well-balanced chunk of an XML document
13103 * within the context (DTD, namespaces, etc ...) of the given node.
13105 * The allowed sequence for the data is a Well Balanced Chunk defined by
13106 * the content production in the XML grammar:
13108 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13110 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13111 * error code otherwise
13114 xmlParseInNodeContext(xmlNodePtr node
, const char *data
, int datalen
,
13115 int options
, xmlNodePtr
*lst
) {
13117 xmlParserCtxtPtr ctxt
;
13118 xmlDocPtr doc
= NULL
;
13119 xmlNodePtr fake
, cur
;
13122 xmlParserErrors ret
= XML_ERR_OK
;
13125 * check all input parameters, grab the document
13127 if ((lst
== NULL
) || (node
== NULL
) || (data
== NULL
) || (datalen
< 0))
13128 return(XML_ERR_INTERNAL_ERROR
);
13129 switch (node
->type
) {
13130 case XML_ELEMENT_NODE
:
13131 case XML_ATTRIBUTE_NODE
:
13132 case XML_TEXT_NODE
:
13133 case XML_CDATA_SECTION_NODE
:
13134 case XML_ENTITY_REF_NODE
:
13136 case XML_COMMENT_NODE
:
13137 case XML_DOCUMENT_NODE
:
13138 case XML_HTML_DOCUMENT_NODE
:
13141 return(XML_ERR_INTERNAL_ERROR
);
13144 while ((node
!= NULL
) && (node
->type
!= XML_ELEMENT_NODE
) &&
13145 (node
->type
!= XML_DOCUMENT_NODE
) &&
13146 (node
->type
!= XML_HTML_DOCUMENT_NODE
))
13147 node
= node
->parent
;
13149 return(XML_ERR_INTERNAL_ERROR
);
13150 if (node
->type
== XML_ELEMENT_NODE
)
13153 doc
= (xmlDocPtr
) node
;
13155 return(XML_ERR_INTERNAL_ERROR
);
13158 * allocate a context and set-up everything not related to the
13159 * node position in the tree
13161 if (doc
->type
== XML_DOCUMENT_NODE
)
13162 ctxt
= xmlCreateMemoryParserCtxt((char *) data
, datalen
);
13163 #ifdef LIBXML_HTML_ENABLED
13164 else if (doc
->type
== XML_HTML_DOCUMENT_NODE
) {
13165 ctxt
= htmlCreateMemoryParserCtxt((char *) data
, datalen
);
13167 * When parsing in context, it makes no sense to add implied
13168 * elements like html/body/etc...
13170 options
|= HTML_PARSE_NOIMPLIED
;
13174 return(XML_ERR_INTERNAL_ERROR
);
13177 return(XML_ERR_NO_MEMORY
);
13180 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13181 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13182 * we must wait until the last moment to free the original one.
13184 if (doc
->dict
!= NULL
) {
13185 if (ctxt
->dict
!= NULL
)
13186 xmlDictFree(ctxt
->dict
);
13187 ctxt
->dict
= doc
->dict
;
13189 options
|= XML_PARSE_NODICT
;
13191 if (doc
->encoding
!= NULL
) {
13192 xmlCharEncodingHandlerPtr hdlr
;
13194 if (ctxt
->encoding
!= NULL
)
13195 xmlFree((xmlChar
*) ctxt
->encoding
);
13196 ctxt
->encoding
= xmlStrdup((const xmlChar
*) doc
->encoding
);
13198 hdlr
= xmlFindCharEncodingHandler((const char *) doc
->encoding
);
13199 if (hdlr
!= NULL
) {
13200 xmlSwitchToEncoding(ctxt
, hdlr
);
13202 return(XML_ERR_UNSUPPORTED_ENCODING
);
13206 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13207 xmlDetectSAX2(ctxt
);
13209 /* parsing in context, i.e. as within existing content */
13210 ctxt
->input_id
= 2;
13211 ctxt
->instate
= XML_PARSER_CONTENT
;
13213 fake
= xmlNewDocComment(node
->doc
, NULL
);
13214 if (fake
== NULL
) {
13215 xmlFreeParserCtxt(ctxt
);
13216 return(XML_ERR_NO_MEMORY
);
13218 xmlAddChild(node
, fake
);
13220 if (node
->type
== XML_ELEMENT_NODE
) {
13221 nodePush(ctxt
, node
);
13223 * initialize the SAX2 namespaces stack
13226 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_NODE
)) {
13227 xmlNsPtr ns
= cur
->nsDef
;
13228 const xmlChar
*iprefix
, *ihref
;
13230 while (ns
!= NULL
) {
13232 iprefix
= xmlDictLookup(ctxt
->dict
, ns
->prefix
, -1);
13233 ihref
= xmlDictLookup(ctxt
->dict
, ns
->href
, -1);
13235 iprefix
= ns
->prefix
;
13239 if (xmlGetNamespace(ctxt
, iprefix
) == NULL
) {
13240 nsPush(ctxt
, iprefix
, ihref
);
13249 if ((ctxt
->validate
) || (ctxt
->replaceEntities
!= 0)) {
13251 * ID/IDREF registration will be done in xmlValidateElement below
13253 ctxt
->loadsubset
|= XML_SKIP_IDS
;
13256 #ifdef LIBXML_HTML_ENABLED
13257 if (doc
->type
== XML_HTML_DOCUMENT_NODE
)
13258 __htmlParseContent(ctxt
);
13261 xmlParseContent(ctxt
);
13264 if ((RAW
== '<') && (NXT(1) == '/')) {
13265 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13266 } else if (RAW
!= 0) {
13267 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13269 if ((ctxt
->node
!= NULL
) && (ctxt
->node
!= node
)) {
13270 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13271 ctxt
->wellFormed
= 0;
13274 if (!ctxt
->wellFormed
) {
13275 if (ctxt
->errNo
== 0)
13276 ret
= XML_ERR_INTERNAL_ERROR
;
13278 ret
= (xmlParserErrors
)ctxt
->errNo
;
13284 * Return the newly created nodeset after unlinking it from
13285 * the pseudo sibling.
13298 while (cur
!= NULL
) {
13299 cur
->parent
= NULL
;
13303 xmlUnlinkNode(fake
);
13307 if (ret
!= XML_ERR_OK
) {
13308 xmlFreeNodeList(*lst
);
13312 if (doc
->dict
!= NULL
)
13314 xmlFreeParserCtxt(ctxt
);
13318 return(XML_ERR_INTERNAL_ERROR
);
13322 #ifdef LIBXML_SAX1_ENABLED
13324 * xmlParseBalancedChunkMemoryRecover:
13325 * @doc: the document the chunk pertains to (must not be NULL)
13326 * @sax: the SAX handler block (possibly NULL)
13327 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13328 * @depth: Used for loop detection, use 0
13329 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13330 * @lst: the return value for the set of parsed nodes
13331 * @recover: return nodes even if the data is broken (use 0)
13334 * Parse a well-balanced chunk of an XML document
13335 * called by the parser
13336 * The allowed sequence for the Well Balanced Chunk is the one defined by
13337 * the content production in the XML grammar:
13339 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13341 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13342 * the parser error code otherwise
13344 * In case recover is set to 1, the nodelist will not be empty even if
13345 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13349 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13350 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
,
13352 xmlParserCtxtPtr ctxt
;
13354 xmlSAXHandlerPtr oldsax
= NULL
;
13355 xmlNodePtr content
, newRoot
;
13360 return(XML_ERR_ENTITY_LOOP
);
13366 if (string
== NULL
)
13369 size
= xmlStrlen(string
);
13371 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13372 if (ctxt
== NULL
) return(-1);
13373 ctxt
->userData
= ctxt
;
13375 oldsax
= ctxt
->sax
;
13377 if (user_data
!= NULL
)
13378 ctxt
->userData
= user_data
;
13380 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13381 if (newDoc
== NULL
) {
13382 xmlFreeParserCtxt(ctxt
);
13385 newDoc
->properties
= XML_DOC_INTERNAL
;
13386 if ((doc
!= NULL
) && (doc
->dict
!= NULL
)) {
13387 xmlDictFree(ctxt
->dict
);
13388 ctxt
->dict
= doc
->dict
;
13389 xmlDictReference(ctxt
->dict
);
13390 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13391 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13392 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13393 ctxt
->dictNames
= 1;
13395 xmlCtxtUseOptionsInternal(ctxt
, XML_PARSE_NODICT
, NULL
);
13397 /* doc == NULL is only supported for historic reasons */
13399 newDoc
->intSubset
= doc
->intSubset
;
13400 newDoc
->extSubset
= doc
->extSubset
;
13402 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13403 if (newRoot
== NULL
) {
13405 ctxt
->sax
= oldsax
;
13406 xmlFreeParserCtxt(ctxt
);
13407 newDoc
->intSubset
= NULL
;
13408 newDoc
->extSubset
= NULL
;
13409 xmlFreeDoc(newDoc
);
13412 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13413 nodePush(ctxt
, newRoot
);
13414 /* doc == NULL is only supported for historic reasons */
13416 ctxt
->myDoc
= newDoc
;
13418 ctxt
->myDoc
= newDoc
;
13419 newDoc
->children
->doc
= doc
;
13420 /* Ensure that doc has XML spec namespace */
13421 xmlSearchNsByHref(doc
, (xmlNodePtr
)doc
, XML_XML_NAMESPACE
);
13422 newDoc
->oldNs
= doc
->oldNs
;
13424 ctxt
->instate
= XML_PARSER_CONTENT
;
13425 ctxt
->input_id
= 2;
13426 ctxt
->depth
= depth
;
13429 * Doing validity checking on chunk doesn't make sense
13431 ctxt
->validate
= 0;
13432 ctxt
->loadsubset
= 0;
13433 xmlDetectSAX2(ctxt
);
13435 if ( doc
!= NULL
){
13436 content
= doc
->children
;
13437 doc
->children
= NULL
;
13438 xmlParseContent(ctxt
);
13439 doc
->children
= content
;
13442 xmlParseContent(ctxt
);
13444 if ((RAW
== '<') && (NXT(1) == '/')) {
13445 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13446 } else if (RAW
!= 0) {
13447 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13449 if (ctxt
->node
!= newDoc
->children
) {
13450 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13453 if (!ctxt
->wellFormed
) {
13454 if (ctxt
->errNo
== 0)
13462 if ((lst
!= NULL
) && ((ret
== 0) || (recover
== 1))) {
13466 * Return the newly created nodeset after unlinking it from
13467 * they pseudo parent.
13469 cur
= newDoc
->children
->children
;
13471 while (cur
!= NULL
) {
13472 xmlSetTreeDoc(cur
, doc
);
13473 cur
->parent
= NULL
;
13476 newDoc
->children
->children
= NULL
;
13480 ctxt
->sax
= oldsax
;
13481 xmlFreeParserCtxt(ctxt
);
13482 newDoc
->intSubset
= NULL
;
13483 newDoc
->extSubset
= NULL
;
13484 /* This leaks the namespace list if doc == NULL */
13485 newDoc
->oldNs
= NULL
;
13486 xmlFreeDoc(newDoc
);
13492 * xmlSAXParseEntity:
13493 * @sax: the SAX handler block
13494 * @filename: the filename
13496 * DEPRECATED: Don't use.
13498 * parse an XML external entity out of context and build a tree.
13499 * It use the given SAX function block to handle the parsing callback.
13500 * If sax is NULL, fallback to the default DOM tree building routines.
13502 * [78] extParsedEnt ::= TextDecl? content
13504 * This correspond to a "Well Balanced" chunk
13506 * Returns the resulting document tree
13510 xmlSAXParseEntity(xmlSAXHandlerPtr sax
, const char *filename
) {
13512 xmlParserCtxtPtr ctxt
;
13514 ctxt
= xmlCreateFileParserCtxt(filename
);
13515 if (ctxt
== NULL
) {
13519 if (ctxt
->sax
!= NULL
)
13520 xmlFree(ctxt
->sax
);
13522 ctxt
->userData
= NULL
;
13525 xmlParseExtParsedEnt(ctxt
);
13527 if (ctxt
->wellFormed
)
13531 xmlFreeDoc(ctxt
->myDoc
);
13532 ctxt
->myDoc
= NULL
;
13536 xmlFreeParserCtxt(ctxt
);
13543 * @filename: the filename
13545 * parse an XML external entity out of context and build a tree.
13547 * [78] extParsedEnt ::= TextDecl? content
13549 * This correspond to a "Well Balanced" chunk
13551 * Returns the resulting document tree
13555 xmlParseEntity(const char *filename
) {
13556 return(xmlSAXParseEntity(NULL
, filename
));
13558 #endif /* LIBXML_SAX1_ENABLED */
13561 * xmlCreateEntityParserCtxtInternal:
13562 * @URL: the entity URL
13563 * @ID: the entity PUBLIC ID
13564 * @base: a possible base for the target URI
13565 * @pctx: parser context used to set options on new context
13567 * Create a parser context for an external entity
13568 * Automatic support for ZLIB/Compress compressed document is provided
13569 * by default if found at compile-time.
13571 * Returns the new parser context or NULL
13573 static xmlParserCtxtPtr
13574 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax
, void *userData
,
13575 const xmlChar
*URL
, const xmlChar
*ID
, const xmlChar
*base
,
13576 xmlParserCtxtPtr pctx
) {
13577 xmlParserCtxtPtr ctxt
;
13578 xmlParserInputPtr inputStream
;
13579 char *directory
= NULL
;
13582 ctxt
= xmlNewSAXParserCtxt(sax
, userData
);
13583 if (ctxt
== NULL
) {
13587 if (pctx
!= NULL
) {
13588 ctxt
->options
= pctx
->options
;
13589 ctxt
->_private
= pctx
->_private
;
13590 ctxt
->input_id
= pctx
->input_id
;
13593 /* Don't read from stdin. */
13594 if (xmlStrcmp(URL
, BAD_CAST
"-") == 0)
13595 URL
= BAD_CAST
"./-";
13597 uri
= xmlBuildURI(URL
, base
);
13600 inputStream
= xmlLoadExternalEntity((char *)URL
, (char *)ID
, ctxt
);
13601 if (inputStream
== NULL
) {
13602 xmlFreeParserCtxt(ctxt
);
13606 inputPush(ctxt
, inputStream
);
13608 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13609 directory
= xmlParserGetDirectory((char *)URL
);
13610 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13611 ctxt
->directory
= directory
;
13613 inputStream
= xmlLoadExternalEntity((char *)uri
, (char *)ID
, ctxt
);
13614 if (inputStream
== NULL
) {
13616 xmlFreeParserCtxt(ctxt
);
13620 inputPush(ctxt
, inputStream
);
13622 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13623 directory
= xmlParserGetDirectory((char *)uri
);
13624 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13625 ctxt
->directory
= directory
;
13632 * xmlCreateEntityParserCtxt:
13633 * @URL: the entity URL
13634 * @ID: the entity PUBLIC ID
13635 * @base: a possible base for the target URI
13637 * Create a parser context for an external entity
13638 * Automatic support for ZLIB/Compress compressed document is provided
13639 * by default if found at compile-time.
13641 * Returns the new parser context or NULL
13644 xmlCreateEntityParserCtxt(const xmlChar
*URL
, const xmlChar
*ID
,
13645 const xmlChar
*base
) {
13646 return xmlCreateEntityParserCtxtInternal(NULL
, NULL
, URL
, ID
, base
, NULL
);
13650 /************************************************************************
13652 * Front ends when parsing from a file *
13654 ************************************************************************/
13657 * xmlCreateURLParserCtxt:
13658 * @filename: the filename or URL
13659 * @options: a combination of xmlParserOption
13661 * Create a parser context for a file or URL content.
13662 * Automatic support for ZLIB/Compress compressed document is provided
13663 * by default if found at compile-time and for file accesses
13665 * Returns the new parser context or NULL
13668 xmlCreateURLParserCtxt(const char *filename
, int options
)
13670 xmlParserCtxtPtr ctxt
;
13671 xmlParserInputPtr inputStream
;
13672 char *directory
= NULL
;
13674 ctxt
= xmlNewParserCtxt();
13675 if (ctxt
== NULL
) {
13676 xmlErrMemory(NULL
, "cannot allocate parser context");
13681 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13682 ctxt
->linenumbers
= 1;
13684 inputStream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
13685 if (inputStream
== NULL
) {
13686 xmlFreeParserCtxt(ctxt
);
13690 inputPush(ctxt
, inputStream
);
13691 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13692 directory
= xmlParserGetDirectory(filename
);
13693 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13694 ctxt
->directory
= directory
;
13700 * xmlCreateFileParserCtxt:
13701 * @filename: the filename
13703 * Create a parser context for a file content.
13704 * Automatic support for ZLIB/Compress compressed document is provided
13705 * by default if found at compile-time.
13707 * Returns the new parser context or NULL
13710 xmlCreateFileParserCtxt(const char *filename
)
13712 return(xmlCreateURLParserCtxt(filename
, 0));
13715 #ifdef LIBXML_SAX1_ENABLED
13717 * xmlSAXParseFileWithData:
13718 * @sax: the SAX handler block
13719 * @filename: the filename
13720 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13722 * @data: the userdata
13724 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13726 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13727 * compressed document is provided by default if found at compile-time.
13728 * It use the given SAX function block to handle the parsing callback.
13729 * If sax is NULL, fallback to the default DOM tree building routines.
13731 * User data (void *) is stored within the parser context in the
13732 * context's _private member, so it is available nearly everywhere in libxml
13734 * Returns the resulting document tree
13738 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax
, const char *filename
,
13739 int recovery
, void *data
) {
13741 xmlParserCtxtPtr ctxt
;
13745 ctxt
= xmlCreateFileParserCtxt(filename
);
13746 if (ctxt
== NULL
) {
13750 if (ctxt
->sax
!= NULL
)
13751 xmlFree(ctxt
->sax
);
13754 xmlDetectSAX2(ctxt
);
13756 ctxt
->_private
= data
;
13759 if (ctxt
->directory
== NULL
)
13760 ctxt
->directory
= xmlParserGetDirectory(filename
);
13762 ctxt
->recovery
= recovery
;
13764 xmlParseDocument(ctxt
);
13766 if ((ctxt
->wellFormed
) || recovery
) {
13768 if ((ret
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
13769 if (ctxt
->input
->buf
->compressed
> 0)
13770 ret
->compression
= 9;
13772 ret
->compression
= ctxt
->input
->buf
->compressed
;
13777 xmlFreeDoc(ctxt
->myDoc
);
13778 ctxt
->myDoc
= NULL
;
13782 xmlFreeParserCtxt(ctxt
);
13789 * @sax: the SAX handler block
13790 * @filename: the filename
13791 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13794 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13796 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13797 * compressed document is provided by default if found at compile-time.
13798 * It use the given SAX function block to handle the parsing callback.
13799 * If sax is NULL, fallback to the default DOM tree building routines.
13801 * Returns the resulting document tree
13805 xmlSAXParseFile(xmlSAXHandlerPtr sax
, const char *filename
,
13807 return(xmlSAXParseFileWithData(sax
,filename
,recovery
,NULL
));
13812 * @cur: a pointer to an array of xmlChar
13814 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13816 * parse an XML in-memory document and build a tree.
13817 * In the case the document is not Well Formed, a attempt to build a
13818 * tree is tried anyway
13820 * Returns the resulting document tree or NULL in case of failure
13824 xmlRecoverDoc(const xmlChar
*cur
) {
13825 return(xmlSAXParseDoc(NULL
, cur
, 1));
13830 * @filename: the filename
13832 * DEPRECATED: Use xmlReadFile.
13834 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13835 * compressed document is provided by default if found at compile-time.
13837 * Returns the resulting document tree if the file was wellformed,
13842 xmlParseFile(const char *filename
) {
13843 return(xmlSAXParseFile(NULL
, filename
, 0));
13848 * @filename: the filename
13850 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13852 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13853 * compressed document is provided by default if found at compile-time.
13854 * In the case the document is not Well Formed, it attempts to build
13857 * Returns the resulting document tree or NULL in case of failure
13861 xmlRecoverFile(const char *filename
) {
13862 return(xmlSAXParseFile(NULL
, filename
, 1));
13867 * xmlSetupParserForBuffer:
13868 * @ctxt: an XML parser context
13869 * @buffer: a xmlChar * buffer
13870 * @filename: a file name
13872 * DEPRECATED: Don't use.
13874 * Setup the parser context to parse a new buffer; Clears any prior
13875 * contents from the parser context. The buffer parameter must not be
13876 * NULL, but the filename parameter can be
13879 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt
, const xmlChar
* buffer
,
13880 const char* filename
)
13882 xmlParserInputPtr input
;
13884 if ((ctxt
== NULL
) || (buffer
== NULL
))
13887 input
= xmlNewInputStream(ctxt
);
13888 if (input
== NULL
) {
13889 xmlErrMemory(NULL
, "parsing new buffer: out of memory\n");
13890 xmlClearParserCtxt(ctxt
);
13894 xmlClearParserCtxt(ctxt
);
13895 if (filename
!= NULL
)
13896 input
->filename
= (char *) xmlCanonicPath((const xmlChar
*)filename
);
13897 input
->base
= buffer
;
13898 input
->cur
= buffer
;
13899 input
->end
= &buffer
[xmlStrlen(buffer
)];
13900 inputPush(ctxt
, input
);
13904 * xmlSAXUserParseFile:
13905 * @sax: a SAX handler
13906 * @user_data: The user data returned on SAX callbacks
13907 * @filename: a file name
13909 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13911 * parse an XML file and call the given SAX handler routines.
13912 * Automatic support for ZLIB/Compress compressed document is provided
13914 * Returns 0 in case of success or a error number otherwise
13917 xmlSAXUserParseFile(xmlSAXHandlerPtr sax
, void *user_data
,
13918 const char *filename
) {
13920 xmlParserCtxtPtr ctxt
;
13922 ctxt
= xmlCreateFileParserCtxt(filename
);
13923 if (ctxt
== NULL
) return -1;
13924 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
13925 xmlFree(ctxt
->sax
);
13927 xmlDetectSAX2(ctxt
);
13929 if (user_data
!= NULL
)
13930 ctxt
->userData
= user_data
;
13932 xmlParseDocument(ctxt
);
13934 if (ctxt
->wellFormed
)
13937 if (ctxt
->errNo
!= 0)
13944 if (ctxt
->myDoc
!= NULL
) {
13945 xmlFreeDoc(ctxt
->myDoc
);
13946 ctxt
->myDoc
= NULL
;
13948 xmlFreeParserCtxt(ctxt
);
13952 #endif /* LIBXML_SAX1_ENABLED */
13954 /************************************************************************
13956 * Front ends when parsing from memory *
13958 ************************************************************************/
13961 * xmlCreateMemoryParserCtxt:
13962 * @buffer: a pointer to a char array
13963 * @size: the size of the array
13965 * Create a parser context for an XML in-memory document.
13967 * Returns the new parser context or NULL
13970 xmlCreateMemoryParserCtxt(const char *buffer
, int size
) {
13971 xmlParserCtxtPtr ctxt
;
13972 xmlParserInputPtr input
;
13973 xmlParserInputBufferPtr buf
;
13975 if (buffer
== NULL
)
13980 ctxt
= xmlNewParserCtxt();
13984 buf
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
13986 xmlFreeParserCtxt(ctxt
);
13990 input
= xmlNewInputStream(ctxt
);
13991 if (input
== NULL
) {
13992 xmlFreeParserInputBuffer(buf
);
13993 xmlFreeParserCtxt(ctxt
);
13997 input
->filename
= NULL
;
13999 xmlBufResetInput(input
->buf
->buffer
, input
);
14001 inputPush(ctxt
, input
);
14005 #ifdef LIBXML_SAX1_ENABLED
14007 * xmlSAXParseMemoryWithData:
14008 * @sax: the SAX handler block
14009 * @buffer: an pointer to a char array
14010 * @size: the size of the array
14011 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14013 * @data: the userdata
14015 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14017 * parse an XML in-memory block and use the given SAX function block
14018 * to handle the parsing callback. If sax is NULL, fallback to the default
14019 * DOM tree building routines.
14021 * User data (void *) is stored within the parser context in the
14022 * context's _private member, so it is available nearly everywhere in libxml
14024 * Returns the resulting document tree
14028 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax
, const char *buffer
,
14029 int size
, int recovery
, void *data
) {
14031 xmlParserCtxtPtr ctxt
;
14035 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14036 if (ctxt
== NULL
) return(NULL
);
14038 if (ctxt
->sax
!= NULL
)
14039 xmlFree(ctxt
->sax
);
14042 xmlDetectSAX2(ctxt
);
14044 ctxt
->_private
=data
;
14047 ctxt
->recovery
= recovery
;
14049 xmlParseDocument(ctxt
);
14051 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14054 xmlFreeDoc(ctxt
->myDoc
);
14055 ctxt
->myDoc
= NULL
;
14059 xmlFreeParserCtxt(ctxt
);
14065 * xmlSAXParseMemory:
14066 * @sax: the SAX handler block
14067 * @buffer: an pointer to a char array
14068 * @size: the size of the array
14069 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14072 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14074 * parse an XML in-memory block and use the given SAX function block
14075 * to handle the parsing callback. If sax is NULL, fallback to the default
14076 * DOM tree building routines.
14078 * Returns the resulting document tree
14081 xmlSAXParseMemory(xmlSAXHandlerPtr sax
, const char *buffer
,
14082 int size
, int recovery
) {
14083 return xmlSAXParseMemoryWithData(sax
, buffer
, size
, recovery
, NULL
);
14088 * @buffer: an pointer to a char array
14089 * @size: the size of the array
14091 * DEPRECATED: Use xmlReadMemory.
14093 * parse an XML in-memory block and build a tree.
14095 * Returns the resulting document tree
14098 xmlDocPtr
xmlParseMemory(const char *buffer
, int size
) {
14099 return(xmlSAXParseMemory(NULL
, buffer
, size
, 0));
14103 * xmlRecoverMemory:
14104 * @buffer: an pointer to a char array
14105 * @size: the size of the array
14107 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14109 * parse an XML in-memory block and build a tree.
14110 * In the case the document is not Well Formed, an attempt to
14111 * build a tree is tried anyway
14113 * Returns the resulting document tree or NULL in case of error
14116 xmlDocPtr
xmlRecoverMemory(const char *buffer
, int size
) {
14117 return(xmlSAXParseMemory(NULL
, buffer
, size
, 1));
14121 * xmlSAXUserParseMemory:
14122 * @sax: a SAX handler
14123 * @user_data: The user data returned on SAX callbacks
14124 * @buffer: an in-memory XML document input
14125 * @size: the length of the XML document in bytes
14127 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14129 * parse an XML in-memory buffer and call the given SAX handler routines.
14131 * Returns 0 in case of success or a error number otherwise
14133 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax
, void *user_data
,
14134 const char *buffer
, int size
) {
14136 xmlParserCtxtPtr ctxt
;
14140 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14141 if (ctxt
== NULL
) return -1;
14142 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
14143 xmlFree(ctxt
->sax
);
14145 xmlDetectSAX2(ctxt
);
14147 if (user_data
!= NULL
)
14148 ctxt
->userData
= user_data
;
14150 xmlParseDocument(ctxt
);
14152 if (ctxt
->wellFormed
)
14155 if (ctxt
->errNo
!= 0)
14162 if (ctxt
->myDoc
!= NULL
) {
14163 xmlFreeDoc(ctxt
->myDoc
);
14164 ctxt
->myDoc
= NULL
;
14166 xmlFreeParserCtxt(ctxt
);
14170 #endif /* LIBXML_SAX1_ENABLED */
14173 * xmlCreateDocParserCtxt:
14174 * @cur: a pointer to an array of xmlChar
14176 * Creates a parser context for an XML in-memory document.
14178 * Returns the new parser context or NULL
14181 xmlCreateDocParserCtxt(const xmlChar
*cur
) {
14186 len
= xmlStrlen(cur
);
14187 return(xmlCreateMemoryParserCtxt((const char *)cur
, len
));
14190 #ifdef LIBXML_SAX1_ENABLED
14193 * @sax: the SAX handler block
14194 * @cur: a pointer to an array of xmlChar
14195 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14198 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14200 * parse an XML in-memory document and build a tree.
14201 * It use the given SAX function block to handle the parsing callback.
14202 * If sax is NULL, fallback to the default DOM tree building routines.
14204 * Returns the resulting document tree
14208 xmlSAXParseDoc(xmlSAXHandlerPtr sax
, const xmlChar
*cur
, int recovery
) {
14210 xmlParserCtxtPtr ctxt
;
14211 xmlSAXHandlerPtr oldsax
= NULL
;
14213 if (cur
== NULL
) return(NULL
);
14216 ctxt
= xmlCreateDocParserCtxt(cur
);
14217 if (ctxt
== NULL
) return(NULL
);
14219 oldsax
= ctxt
->sax
;
14221 ctxt
->userData
= NULL
;
14223 xmlDetectSAX2(ctxt
);
14225 xmlParseDocument(ctxt
);
14226 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
14229 xmlFreeDoc(ctxt
->myDoc
);
14230 ctxt
->myDoc
= NULL
;
14233 ctxt
->sax
= oldsax
;
14234 xmlFreeParserCtxt(ctxt
);
14241 * @cur: a pointer to an array of xmlChar
14243 * DEPRECATED: Use xmlReadDoc.
14245 * parse an XML in-memory document and build a tree.
14247 * Returns the resulting document tree
14251 xmlParseDoc(const xmlChar
*cur
) {
14252 return(xmlSAXParseDoc(NULL
, cur
, 0));
14254 #endif /* LIBXML_SAX1_ENABLED */
14256 #ifdef LIBXML_LEGACY_ENABLED
14257 /************************************************************************
14259 * Specific function to keep track of entities references *
14260 * and used by the XSLT debugger *
14262 ************************************************************************/
14264 static xmlEntityReferenceFunc xmlEntityRefFunc
= NULL
;
14267 * xmlAddEntityReference:
14268 * @ent : A valid entity
14269 * @firstNode : A valid first node for children of entity
14270 * @lastNode : A valid last node of children entity
14272 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14275 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
14276 xmlNodePtr lastNode
)
14278 if (xmlEntityRefFunc
!= NULL
) {
14279 (*xmlEntityRefFunc
) (ent
, firstNode
, lastNode
);
14285 * xmlSetEntityReferenceFunc:
14286 * @func: A valid function
14288 * Set the function to call call back when a xml reference has been made
14291 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func
)
14293 xmlEntityRefFunc
= func
;
14295 #endif /* LIBXML_LEGACY_ENABLED */
14297 /************************************************************************
14301 ************************************************************************/
14303 static int xmlParserInitialized
= 0;
14308 * Initialization function for the XML parser.
14309 * This is not reentrant. Call once before processing in case of
14310 * use in multithreaded programs.
14314 xmlInitParser(void) {
14316 * Note that the initialization code must not make memory allocations.
14318 if (xmlParserInitialized
!= 0)
14321 #ifdef LIBXML_THREAD_ENABLED
14322 __xmlGlobalInitMutexLock();
14323 if (xmlParserInitialized
== 0) {
14325 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14326 if (xmlFree
== free
)
14327 atexit(xmlCleanupParser
);
14330 xmlInitThreadsInternal();
14331 xmlInitGlobalsInternal();
14332 xmlInitMemoryInternal();
14333 __xmlInitializeDict();
14334 xmlInitEncodingInternal();
14335 xmlRegisterDefaultInputCallbacks();
14336 #ifdef LIBXML_OUTPUT_ENABLED
14337 xmlRegisterDefaultOutputCallbacks();
14338 #endif /* LIBXML_OUTPUT_ENABLED */
14339 #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14340 xmlInitXPathInternal();
14342 xmlParserInitialized
= 1;
14343 #ifdef LIBXML_THREAD_ENABLED
14345 __xmlGlobalInitMutexUnlock();
14350 * xmlCleanupParser:
14352 * This function name is somewhat misleading. It does not clean up
14353 * parser state, it cleans up memory allocated by the library itself.
14354 * It is a cleanup function for the XML library. It tries to reclaim all
14355 * related global memory allocated for the library processing.
14356 * It doesn't deallocate any document related memory. One should
14357 * call xmlCleanupParser() only when the process has finished using
14358 * the library and all XML/HTML documents built with it.
14359 * See also xmlInitParser() which has the opposite function of preparing
14360 * the library for operations.
14362 * WARNING: if your application is multithreaded or has plugin support
14363 * calling this may crash the application if another thread or
14364 * a plugin is still using libxml2. It's sometimes very hard to
14365 * guess if libxml2 is in use in the application, some libraries
14366 * or plugins may use it without notice. In case of doubt abstain
14367 * from calling this function or do it just before calling exit()
14368 * to avoid leak reports from valgrind !
14372 xmlCleanupParser(void) {
14373 if (!xmlParserInitialized
)
14376 xmlCleanupCharEncodingHandlers();
14377 #ifdef LIBXML_CATALOG_ENABLED
14378 xmlCatalogCleanup();
14380 xmlCleanupDictInternal();
14381 xmlCleanupInputCallbacks();
14382 #ifdef LIBXML_OUTPUT_ENABLED
14383 xmlCleanupOutputCallbacks();
14385 #ifdef LIBXML_SCHEMAS_ENABLED
14386 xmlSchemaCleanupTypes();
14387 xmlRelaxNGCleanupTypes();
14389 xmlCleanupGlobalsInternal();
14390 xmlCleanupThreadsInternal();
14391 xmlCleanupMemoryInternal();
14392 xmlParserInitialized
= 0;
14395 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14398 ATTRIBUTE_DESTRUCTOR
14399 xmlDestructor(void) {
14401 * Calling custom deallocation functions in a destructor can cause
14402 * problems, for example with Nokogiri.
14404 if (xmlFree
== free
)
14405 xmlCleanupParser();
14409 /************************************************************************
14411 * New set (2.6.0) of simpler and more flexible APIs *
14413 ************************************************************************/
14419 * Free a string if it is not owned by the "dict" dictionary in the
14422 #define DICT_FREE(str) \
14423 if ((str) && ((!dict) || \
14424 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14425 xmlFree((char *)(str));
14429 * @ctxt: an XML parser context
14431 * Reset a parser context
14434 xmlCtxtReset(xmlParserCtxtPtr ctxt
)
14436 xmlParserInputPtr input
;
14444 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
14445 xmlFreeInputStream(input
);
14448 ctxt
->input
= NULL
;
14451 if (ctxt
->spaceTab
!= NULL
) {
14452 ctxt
->spaceTab
[0] = -1;
14453 ctxt
->space
= &ctxt
->spaceTab
[0];
14455 ctxt
->space
= NULL
;
14467 DICT_FREE(ctxt
->version
);
14468 ctxt
->version
= NULL
;
14469 DICT_FREE(ctxt
->encoding
);
14470 ctxt
->encoding
= NULL
;
14471 DICT_FREE(ctxt
->directory
);
14472 ctxt
->directory
= NULL
;
14473 DICT_FREE(ctxt
->extSubURI
);
14474 ctxt
->extSubURI
= NULL
;
14475 DICT_FREE(ctxt
->extSubSystem
);
14476 ctxt
->extSubSystem
= NULL
;
14477 if (ctxt
->myDoc
!= NULL
)
14478 xmlFreeDoc(ctxt
->myDoc
);
14479 ctxt
->myDoc
= NULL
;
14481 ctxt
->standalone
= -1;
14482 ctxt
->hasExternalSubset
= 0;
14483 ctxt
->hasPErefs
= 0;
14485 ctxt
->external
= 0;
14486 ctxt
->instate
= XML_PARSER_START
;
14489 ctxt
->wellFormed
= 1;
14490 ctxt
->nsWellFormed
= 1;
14491 ctxt
->disableSAX
= 0;
14494 ctxt
->vctxt
.userData
= ctxt
;
14495 ctxt
->vctxt
.error
= xmlParserValidityError
;
14496 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
14498 ctxt
->record_info
= 0;
14499 ctxt
->checkIndex
= 0;
14500 ctxt
->endCheckState
= 0;
14501 ctxt
->inSubset
= 0;
14502 ctxt
->errNo
= XML_ERR_OK
;
14504 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
14505 ctxt
->catalogs
= NULL
;
14506 ctxt
->sizeentities
= 0;
14507 ctxt
->sizeentcopy
= 0;
14508 xmlInitNodeInfoSeq(&ctxt
->node_seq
);
14510 if (ctxt
->attsDefault
!= NULL
) {
14511 xmlHashFree(ctxt
->attsDefault
, xmlHashDefaultDeallocator
);
14512 ctxt
->attsDefault
= NULL
;
14514 if (ctxt
->attsSpecial
!= NULL
) {
14515 xmlHashFree(ctxt
->attsSpecial
, NULL
);
14516 ctxt
->attsSpecial
= NULL
;
14519 #ifdef LIBXML_CATALOG_ENABLED
14520 if (ctxt
->catalogs
!= NULL
)
14521 xmlCatalogFreeLocal(ctxt
->catalogs
);
14523 ctxt
->nbErrors
= 0;
14524 ctxt
->nbWarnings
= 0;
14525 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
14526 xmlResetError(&ctxt
->lastError
);
14530 * xmlCtxtResetPush:
14531 * @ctxt: an XML parser context
14532 * @chunk: a pointer to an array of chars
14533 * @size: number of chars in the array
14534 * @filename: an optional file name or URI
14535 * @encoding: the document encoding, or NULL
14537 * Reset a push parser context
14539 * Returns 0 in case of success and 1 in case of error
14542 xmlCtxtResetPush(xmlParserCtxtPtr ctxt
, const char *chunk
,
14543 int size
, const char *filename
, const char *encoding
)
14545 xmlParserInputPtr inputStream
;
14546 xmlParserInputBufferPtr buf
;
14547 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
14552 if ((encoding
== NULL
) && (chunk
!= NULL
) && (size
>= 4))
14553 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
14555 buf
= xmlAllocParserInputBuffer(enc
);
14559 if (ctxt
== NULL
) {
14560 xmlFreeParserInputBuffer(buf
);
14564 xmlCtxtReset(ctxt
);
14566 if (filename
== NULL
) {
14567 ctxt
->directory
= NULL
;
14569 ctxt
->directory
= xmlParserGetDirectory(filename
);
14572 inputStream
= xmlNewInputStream(ctxt
);
14573 if (inputStream
== NULL
) {
14574 xmlFreeParserInputBuffer(buf
);
14578 if (filename
== NULL
)
14579 inputStream
->filename
= NULL
;
14581 inputStream
->filename
= (char *)
14582 xmlCanonicPath((const xmlChar
*) filename
);
14583 inputStream
->buf
= buf
;
14584 xmlBufResetInput(buf
->buffer
, inputStream
);
14586 inputPush(ctxt
, inputStream
);
14588 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
14589 (ctxt
->input
->buf
!= NULL
)) {
14590 size_t base
= xmlBufGetInputBase(ctxt
->input
->buf
->buffer
, ctxt
->input
);
14591 size_t cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
14593 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
14595 xmlBufSetInputBaseCur(ctxt
->input
->buf
->buffer
, ctxt
->input
, base
, cur
);
14597 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
14601 if (encoding
!= NULL
) {
14602 xmlCharEncodingHandlerPtr hdlr
;
14604 if (ctxt
->encoding
!= NULL
)
14605 xmlFree((xmlChar
*) ctxt
->encoding
);
14606 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14608 hdlr
= xmlFindCharEncodingHandler(encoding
);
14609 if (hdlr
!= NULL
) {
14610 xmlSwitchToEncoding(ctxt
, hdlr
);
14612 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
14613 "Unsupported encoding %s\n", BAD_CAST encoding
);
14615 } else if (enc
!= XML_CHAR_ENCODING_NONE
) {
14616 xmlSwitchEncoding(ctxt
, enc
);
14624 * xmlCtxtUseOptionsInternal:
14625 * @ctxt: an XML parser context
14626 * @options: a combination of xmlParserOption
14627 * @encoding: the user provided encoding to use
14629 * Applies the options to the parser context
14631 * Returns 0 in case of success, the set of unknown or unimplemented options
14632 * in case of error.
14635 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
, const char *encoding
)
14639 if (encoding
!= NULL
) {
14640 if (ctxt
->encoding
!= NULL
)
14641 xmlFree((xmlChar
*) ctxt
->encoding
);
14642 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14644 if (options
& XML_PARSE_RECOVER
) {
14645 ctxt
->recovery
= 1;
14646 options
-= XML_PARSE_RECOVER
;
14647 ctxt
->options
|= XML_PARSE_RECOVER
;
14649 ctxt
->recovery
= 0;
14650 if (options
& XML_PARSE_DTDLOAD
) {
14651 ctxt
->loadsubset
= XML_DETECT_IDS
;
14652 options
-= XML_PARSE_DTDLOAD
;
14653 ctxt
->options
|= XML_PARSE_DTDLOAD
;
14655 ctxt
->loadsubset
= 0;
14656 if (options
& XML_PARSE_DTDATTR
) {
14657 ctxt
->loadsubset
|= XML_COMPLETE_ATTRS
;
14658 options
-= XML_PARSE_DTDATTR
;
14659 ctxt
->options
|= XML_PARSE_DTDATTR
;
14661 if (options
& XML_PARSE_NOENT
) {
14662 ctxt
->replaceEntities
= 1;
14663 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14664 options
-= XML_PARSE_NOENT
;
14665 ctxt
->options
|= XML_PARSE_NOENT
;
14667 ctxt
->replaceEntities
= 0;
14668 if (options
& XML_PARSE_PEDANTIC
) {
14669 ctxt
->pedantic
= 1;
14670 options
-= XML_PARSE_PEDANTIC
;
14671 ctxt
->options
|= XML_PARSE_PEDANTIC
;
14673 ctxt
->pedantic
= 0;
14674 if (options
& XML_PARSE_NOBLANKS
) {
14675 ctxt
->keepBlanks
= 0;
14676 ctxt
->sax
->ignorableWhitespace
= xmlSAX2IgnorableWhitespace
;
14677 options
-= XML_PARSE_NOBLANKS
;
14678 ctxt
->options
|= XML_PARSE_NOBLANKS
;
14680 ctxt
->keepBlanks
= 1;
14681 if (options
& XML_PARSE_DTDVALID
) {
14682 ctxt
->validate
= 1;
14683 if (options
& XML_PARSE_NOWARNING
)
14684 ctxt
->vctxt
.warning
= NULL
;
14685 if (options
& XML_PARSE_NOERROR
)
14686 ctxt
->vctxt
.error
= NULL
;
14687 options
-= XML_PARSE_DTDVALID
;
14688 ctxt
->options
|= XML_PARSE_DTDVALID
;
14690 ctxt
->validate
= 0;
14691 if (options
& XML_PARSE_NOWARNING
) {
14692 ctxt
->sax
->warning
= NULL
;
14693 options
-= XML_PARSE_NOWARNING
;
14695 if (options
& XML_PARSE_NOERROR
) {
14696 ctxt
->sax
->error
= NULL
;
14697 ctxt
->sax
->fatalError
= NULL
;
14698 options
-= XML_PARSE_NOERROR
;
14700 #ifdef LIBXML_SAX1_ENABLED
14701 if (options
& XML_PARSE_SAX1
) {
14702 ctxt
->sax
->startElement
= xmlSAX2StartElement
;
14703 ctxt
->sax
->endElement
= xmlSAX2EndElement
;
14704 ctxt
->sax
->startElementNs
= NULL
;
14705 ctxt
->sax
->endElementNs
= NULL
;
14706 ctxt
->sax
->initialized
= 1;
14707 options
-= XML_PARSE_SAX1
;
14708 ctxt
->options
|= XML_PARSE_SAX1
;
14710 #endif /* LIBXML_SAX1_ENABLED */
14711 if (options
& XML_PARSE_NODICT
) {
14712 ctxt
->dictNames
= 0;
14713 options
-= XML_PARSE_NODICT
;
14714 ctxt
->options
|= XML_PARSE_NODICT
;
14716 ctxt
->dictNames
= 1;
14718 if (options
& XML_PARSE_NOCDATA
) {
14719 ctxt
->sax
->cdataBlock
= NULL
;
14720 options
-= XML_PARSE_NOCDATA
;
14721 ctxt
->options
|= XML_PARSE_NOCDATA
;
14723 if (options
& XML_PARSE_NSCLEAN
) {
14724 ctxt
->options
|= XML_PARSE_NSCLEAN
;
14725 options
-= XML_PARSE_NSCLEAN
;
14727 if (options
& XML_PARSE_NONET
) {
14728 ctxt
->options
|= XML_PARSE_NONET
;
14729 options
-= XML_PARSE_NONET
;
14731 if (options
& XML_PARSE_COMPACT
) {
14732 ctxt
->options
|= XML_PARSE_COMPACT
;
14733 options
-= XML_PARSE_COMPACT
;
14735 if (options
& XML_PARSE_OLD10
) {
14736 ctxt
->options
|= XML_PARSE_OLD10
;
14737 options
-= XML_PARSE_OLD10
;
14739 if (options
& XML_PARSE_NOBASEFIX
) {
14740 ctxt
->options
|= XML_PARSE_NOBASEFIX
;
14741 options
-= XML_PARSE_NOBASEFIX
;
14743 if (options
& XML_PARSE_HUGE
) {
14744 ctxt
->options
|= XML_PARSE_HUGE
;
14745 options
-= XML_PARSE_HUGE
;
14746 if (ctxt
->dict
!= NULL
)
14747 xmlDictSetLimit(ctxt
->dict
, 0);
14749 if (options
& XML_PARSE_OLDSAX
) {
14750 ctxt
->options
|= XML_PARSE_OLDSAX
;
14751 options
-= XML_PARSE_OLDSAX
;
14753 if (options
& XML_PARSE_IGNORE_ENC
) {
14754 ctxt
->options
|= XML_PARSE_IGNORE_ENC
;
14755 options
-= XML_PARSE_IGNORE_ENC
;
14757 if (options
& XML_PARSE_BIG_LINES
) {
14758 ctxt
->options
|= XML_PARSE_BIG_LINES
;
14759 options
-= XML_PARSE_BIG_LINES
;
14761 ctxt
->linenumbers
= 1;
14766 * xmlCtxtUseOptions:
14767 * @ctxt: an XML parser context
14768 * @options: a combination of xmlParserOption
14770 * Applies the options to the parser context
14772 * Returns 0 in case of success, the set of unknown or unimplemented options
14773 * in case of error.
14776 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt
, int options
)
14778 return(xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
));
14783 * @ctxt: an XML parser context
14784 * @URL: the base URL to use for the document
14785 * @encoding: the document encoding, or NULL
14786 * @options: a combination of xmlParserOption
14787 * @reuse: keep the context for reuse
14789 * Common front-end for the xmlRead functions
14791 * Returns the resulting document tree or NULL
14794 xmlDoRead(xmlParserCtxtPtr ctxt
, const char *URL
, const char *encoding
,
14795 int options
, int reuse
)
14799 xmlCtxtUseOptionsInternal(ctxt
, options
, encoding
);
14800 if (encoding
!= NULL
) {
14801 xmlCharEncodingHandlerPtr hdlr
;
14804 * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14805 * caller provided an encoding. Otherwise, we might switch to
14806 * the encoding from the XML declaration which is likely to
14807 * break things. Also see xmlSwitchInputEncoding.
14809 hdlr
= xmlFindCharEncodingHandler(encoding
);
14811 xmlSwitchToEncoding(ctxt
, hdlr
);
14813 if ((URL
!= NULL
) && (ctxt
->input
!= NULL
) &&
14814 (ctxt
->input
->filename
== NULL
))
14815 ctxt
->input
->filename
= (char *) xmlStrdup((const xmlChar
*) URL
);
14816 xmlParseDocument(ctxt
);
14817 if ((ctxt
->wellFormed
) || ctxt
->recovery
)
14821 if (ctxt
->myDoc
!= NULL
) {
14822 xmlFreeDoc(ctxt
->myDoc
);
14825 ctxt
->myDoc
= NULL
;
14827 xmlFreeParserCtxt(ctxt
);
14835 * @cur: a pointer to a zero terminated string
14836 * @URL: the base URL to use for the document
14837 * @encoding: the document encoding, or NULL
14838 * @options: a combination of xmlParserOption
14840 * parse an XML in-memory document and build a tree.
14842 * Returns the resulting document tree
14845 xmlReadDoc(const xmlChar
* cur
, const char *URL
, const char *encoding
, int options
)
14847 xmlParserCtxtPtr ctxt
;
14853 ctxt
= xmlCreateDocParserCtxt(cur
);
14856 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14861 * @filename: a file or URL
14862 * @encoding: the document encoding, or NULL
14863 * @options: a combination of xmlParserOption
14865 * parse an XML file from the filesystem or the network.
14867 * Returns the resulting document tree
14870 xmlReadFile(const char *filename
, const char *encoding
, int options
)
14872 xmlParserCtxtPtr ctxt
;
14875 ctxt
= xmlCreateURLParserCtxt(filename
, options
);
14878 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 0));
14883 * @buffer: a pointer to a char array
14884 * @size: the size of the array
14885 * @URL: the base URL to use for the document
14886 * @encoding: the document encoding, or NULL
14887 * @options: a combination of xmlParserOption
14889 * parse an XML in-memory document and build a tree.
14891 * Returns the resulting document tree
14894 xmlReadMemory(const char *buffer
, int size
, const char *URL
, const char *encoding
, int options
)
14896 xmlParserCtxtPtr ctxt
;
14899 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14902 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14907 * @fd: an open file descriptor
14908 * @URL: the base URL to use for the document
14909 * @encoding: the document encoding, or NULL
14910 * @options: a combination of xmlParserOption
14912 * parse an XML from a file descriptor and build a tree.
14913 * NOTE that the file descriptor will not be closed when the
14914 * reader is closed or reset.
14916 * Returns the resulting document tree
14919 xmlReadFd(int fd
, const char *URL
, const char *encoding
, int options
)
14921 xmlParserCtxtPtr ctxt
;
14922 xmlParserInputBufferPtr input
;
14923 xmlParserInputPtr stream
;
14929 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
14932 input
->closecallback
= NULL
;
14933 ctxt
= xmlNewParserCtxt();
14934 if (ctxt
== NULL
) {
14935 xmlFreeParserInputBuffer(input
);
14938 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14939 if (stream
== NULL
) {
14940 xmlFreeParserInputBuffer(input
);
14941 xmlFreeParserCtxt(ctxt
);
14944 inputPush(ctxt
, stream
);
14945 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14950 * @ioread: an I/O read function
14951 * @ioclose: an I/O close function
14952 * @ioctx: an I/O handler
14953 * @URL: the base URL to use for the document
14954 * @encoding: the document encoding, or NULL
14955 * @options: a combination of xmlParserOption
14957 * parse an XML document from I/O functions and source and build a tree.
14959 * Returns the resulting document tree
14962 xmlReadIO(xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
14963 void *ioctx
, const char *URL
, const char *encoding
, int options
)
14965 xmlParserCtxtPtr ctxt
;
14966 xmlParserInputBufferPtr input
;
14967 xmlParserInputPtr stream
;
14969 if (ioread
== NULL
)
14973 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
14974 XML_CHAR_ENCODING_NONE
);
14975 if (input
== NULL
) {
14976 if (ioclose
!= NULL
)
14980 ctxt
= xmlNewParserCtxt();
14981 if (ctxt
== NULL
) {
14982 xmlFreeParserInputBuffer(input
);
14985 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14986 if (stream
== NULL
) {
14987 xmlFreeParserInputBuffer(input
);
14988 xmlFreeParserCtxt(ctxt
);
14991 inputPush(ctxt
, stream
);
14992 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14997 * @ctxt: an XML parser context
14998 * @cur: a pointer to a zero terminated string
14999 * @URL: the base URL to use for the document
15000 * @encoding: the document encoding, or NULL
15001 * @options: a combination of xmlParserOption
15003 * parse an XML in-memory document and build a tree.
15004 * This reuses the existing @ctxt parser context
15006 * Returns the resulting document tree
15009 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt
, const xmlChar
* cur
,
15010 const char *URL
, const char *encoding
, int options
)
15014 return (xmlCtxtReadMemory(ctxt
, (const char *) cur
, xmlStrlen(cur
), URL
,
15015 encoding
, options
));
15020 * @ctxt: an XML parser context
15021 * @filename: a file or URL
15022 * @encoding: the document encoding, or NULL
15023 * @options: a combination of xmlParserOption
15025 * parse an XML file from the filesystem or the network.
15026 * This reuses the existing @ctxt parser context
15028 * Returns the resulting document tree
15031 xmlCtxtReadFile(xmlParserCtxtPtr ctxt
, const char *filename
,
15032 const char *encoding
, int options
)
15034 xmlParserInputPtr stream
;
15036 if (filename
== NULL
)
15042 xmlCtxtReset(ctxt
);
15044 stream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
15045 if (stream
== NULL
) {
15048 inputPush(ctxt
, stream
);
15049 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 1));
15053 * xmlCtxtReadMemory:
15054 * @ctxt: an XML parser context
15055 * @buffer: a pointer to a char array
15056 * @size: the size of the array
15057 * @URL: the base URL to use for the document
15058 * @encoding: the document encoding, or NULL
15059 * @options: a combination of xmlParserOption
15061 * parse an XML in-memory document and build a tree.
15062 * This reuses the existing @ctxt parser context
15064 * Returns the resulting document tree
15067 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt
, const char *buffer
, int size
,
15068 const char *URL
, const char *encoding
, int options
)
15070 xmlParserInputBufferPtr input
;
15071 xmlParserInputPtr stream
;
15075 if (buffer
== NULL
)
15079 xmlCtxtReset(ctxt
);
15081 input
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
15082 if (input
== NULL
) {
15086 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15087 if (stream
== NULL
) {
15088 xmlFreeParserInputBuffer(input
);
15092 inputPush(ctxt
, stream
);
15093 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15098 * @ctxt: an XML parser context
15099 * @fd: an open file descriptor
15100 * @URL: the base URL to use for the document
15101 * @encoding: the document encoding, or NULL
15102 * @options: a combination of xmlParserOption
15104 * parse an XML from a file descriptor and build a tree.
15105 * This reuses the existing @ctxt parser context
15106 * NOTE that the file descriptor will not be closed when the
15107 * reader is closed or reset.
15109 * Returns the resulting document tree
15112 xmlCtxtReadFd(xmlParserCtxtPtr ctxt
, int fd
,
15113 const char *URL
, const char *encoding
, int options
)
15115 xmlParserInputBufferPtr input
;
15116 xmlParserInputPtr stream
;
15124 xmlCtxtReset(ctxt
);
15127 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
15130 input
->closecallback
= NULL
;
15131 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15132 if (stream
== NULL
) {
15133 xmlFreeParserInputBuffer(input
);
15136 inputPush(ctxt
, stream
);
15137 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
15142 * @ctxt: an XML parser context
15143 * @ioread: an I/O read function
15144 * @ioclose: an I/O close function
15145 * @ioctx: an I/O handler
15146 * @URL: the base URL to use for the document
15147 * @encoding: the document encoding, or NULL
15148 * @options: a combination of xmlParserOption
15150 * parse an XML document from I/O functions and source and build a tree.
15151 * This reuses the existing @ctxt parser context
15153 * Returns the resulting document tree
15156 xmlCtxtReadIO(xmlParserCtxtPtr ctxt
, xmlInputReadCallback ioread
,
15157 xmlInputCloseCallback ioclose
, void *ioctx
,
15159 const char *encoding
, int options
)
15161 xmlParserInputBufferPtr input
;
15162 xmlParserInputPtr stream
;
15164 if (ioread
== NULL
)
15170 xmlCtxtReset(ctxt
);
15172 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
15173 XML_CHAR_ENCODING_NONE
);
15174 if (input
== NULL
) {
15175 if (ioclose
!= NULL
)
15179 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
15180 if (stream
== NULL
) {
15181 xmlFreeParserInputBuffer(input
);
15184 inputPush(ctxt
, stream
);
15185 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));