opengl32: Consistently use $func instead of $func_ref.
[wine.git] / libs / xml2 / parser.c
blobe660b0a7d4990899a7fbb0b933c09479f93a3a4d
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
28 * See Copyright for the status of this software.
30 * daniel@veillard.com
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
38 #define IN_LIBXML
39 #include "libxml.h"
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/valid.h>
61 #include <libxml/entities.h>
62 #include <libxml/xmlerror.h>
63 #include <libxml/encoding.h>
64 #include <libxml/xmlIO.h>
65 #include <libxml/uri.h>
66 #ifdef LIBXML_CATALOG_ENABLED
67 #include <libxml/catalog.h>
68 #endif
69 #ifdef LIBXML_SCHEMAS_ENABLED
70 #include <libxml/xmlschemastypes.h>
71 #include <libxml/relaxng.h>
72 #endif
74 #include "buf.h"
75 #include "enc.h"
77 struct _xmlStartTag {
78 const xmlChar *prefix;
79 const xmlChar *URI;
80 int line;
81 int nsNr;
84 static void
85 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
87 static xmlParserCtxtPtr
88 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
89 const xmlChar *base, xmlParserCtxtPtr pctx);
91 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
93 static int
94 xmlParseElementStart(xmlParserCtxtPtr ctxt);
96 static void
97 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
99 /************************************************************************
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
103 ************************************************************************/
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and exponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
114 #define XML_PARSER_NON_LINEAR 10
117 * xmlParserEntityCheck
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
125 static int
126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
129 size_t consumed = 0;
130 int i;
132 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
133 return (0);
134 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
135 return (1);
138 * This may look absurd but is needed to detect
139 * entities problems
141 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
142 (ent->content != NULL) && (ent->checked == 0) &&
143 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
144 unsigned long oldnbent = ctxt->nbentities, diff;
145 xmlChar *rep;
147 ent->checked = 1;
149 ++ctxt->depth;
150 rep = xmlStringDecodeEntities(ctxt, ent->content,
151 XML_SUBSTITUTE_REF, 0, 0, 0);
152 --ctxt->depth;
153 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
154 ent->content[0] = 0;
157 diff = ctxt->nbentities - oldnbent + 1;
158 if (diff > INT_MAX / 2)
159 diff = INT_MAX / 2;
160 ent->checked = diff * 2;
161 if (rep != NULL) {
162 if (xmlStrchr(rep, '<'))
163 ent->checked |= 1;
164 xmlFree(rep);
165 rep = NULL;
170 * Prevent entity exponential check, not just replacement while
171 * parsing the DTD
172 * The check is potentially costly so do that only once in a thousand
174 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
175 (ctxt->nbentities % 1024 == 0)) {
176 for (i = 0;i < ctxt->inputNr;i++) {
177 consumed += ctxt->inputTab[i]->consumed +
178 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
180 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
181 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
182 ctxt->instate = XML_PARSER_EOF;
183 return (1);
185 consumed = 0;
190 if (replacement != 0) {
191 if (replacement < XML_MAX_TEXT_LENGTH)
192 return(0);
195 * If the volume of entity copy reaches 10 times the
196 * amount of parsed data and over the large text threshold
197 * then that's very likely to be an abuse.
199 if (ctxt->input != NULL) {
200 consumed = ctxt->input->consumed +
201 (ctxt->input->cur - ctxt->input->base);
203 consumed += ctxt->sizeentities;
205 if (replacement < XML_PARSER_NON_LINEAR * consumed)
206 return(0);
207 } else if (size != 0) {
209 * Do the check based on the replacement size of the entity
211 if (size < XML_PARSER_BIG_ENTITY)
212 return(0);
215 * A limit on the amount of text data reasonably used
217 if (ctxt->input != NULL) {
218 consumed = ctxt->input->consumed +
219 (ctxt->input->cur - ctxt->input->base);
221 consumed += ctxt->sizeentities;
223 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
224 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
225 return (0);
226 } else if (ent != NULL) {
228 * use the number of parsed entities in the replacement
230 size = ent->checked / 2;
233 * The amount of data parsed counting entities size only once
235 if (ctxt->input != NULL) {
236 consumed = ctxt->input->consumed +
237 (ctxt->input->cur - ctxt->input->base);
239 consumed += ctxt->sizeentities;
242 * Check the density of entities for the amount of data
243 * knowing an entity reference will take at least 3 bytes
245 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
246 return (0);
247 } else {
249 * strange we got no data for checking
251 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
252 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
253 (ctxt->nbentities <= 10000))
254 return (0);
256 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
257 return (1);
261 * xmlParserMaxDepth:
263 * arbitrary depth limit for the XML documents that we allow to
264 * process. This is not a limitation of the parser but a safety
265 * boundary feature. It can be disabled with the XML_PARSE_HUGE
266 * parser option.
268 unsigned int xmlParserMaxDepth = 256;
272 #define SAX2 1
273 #define XML_PARSER_BIG_BUFFER_SIZE 300
274 #define XML_PARSER_BUFFER_SIZE 100
275 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
278 * XML_PARSER_CHUNK_SIZE
280 * When calling GROW that's the minimal amount of data
281 * the parser expected to have received. It is not a hard
282 * limit but an optimization when reading strings like Names
283 * It is not strictly needed as long as inputs available characters
284 * are followed by 0, which should be provided by the I/O level
286 #define XML_PARSER_CHUNK_SIZE 100
289 * List of XML prefixed PI allowed by W3C specs
292 static const char* const xmlW3CPIs[] = {
293 "xml-stylesheet",
294 "xml-model",
295 NULL
299 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
300 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
301 const xmlChar **str);
303 static xmlParserErrors
304 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
305 xmlSAXHandlerPtr sax,
306 void *user_data, int depth, const xmlChar *URL,
307 const xmlChar *ID, xmlNodePtr *list);
309 static int
310 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
311 const char *encoding);
312 #ifdef LIBXML_LEGACY_ENABLED
313 static void
314 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
315 xmlNodePtr lastNode);
316 #endif /* LIBXML_LEGACY_ENABLED */
318 static xmlParserErrors
319 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
320 const xmlChar *string, void *user_data, xmlNodePtr *lst);
322 static int
323 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
325 /************************************************************************
327 * Some factorized error routines *
329 ************************************************************************/
332 * xmlErrAttributeDup:
333 * @ctxt: an XML parser context
334 * @prefix: the attribute prefix
335 * @localname: the attribute localname
337 * Handle a redefinition of attribute error
339 static void
340 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
341 const xmlChar * localname)
343 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
344 (ctxt->instate == XML_PARSER_EOF))
345 return;
346 if (ctxt != NULL)
347 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
349 if (prefix == NULL)
350 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
351 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
352 (const char *) localname, NULL, NULL, 0, 0,
353 "Attribute %s redefined\n", localname);
354 else
355 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
356 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
357 (const char *) prefix, (const char *) localname,
358 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
359 localname);
360 if (ctxt != NULL) {
361 ctxt->wellFormed = 0;
362 if (ctxt->recovery == 0)
363 ctxt->disableSAX = 1;
368 * xmlFatalErr:
369 * @ctxt: an XML parser context
370 * @error: the error number
371 * @extra: extra information string
373 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 static void
376 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
378 const char *errmsg;
380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
383 switch (error) {
384 case XML_ERR_INVALID_HEX_CHARREF:
385 errmsg = "CharRef: invalid hexadecimal value";
386 break;
387 case XML_ERR_INVALID_DEC_CHARREF:
388 errmsg = "CharRef: invalid decimal value";
389 break;
390 case XML_ERR_INVALID_CHARREF:
391 errmsg = "CharRef: invalid value";
392 break;
393 case XML_ERR_INTERNAL_ERROR:
394 errmsg = "internal error";
395 break;
396 case XML_ERR_PEREF_AT_EOF:
397 errmsg = "PEReference at end of document";
398 break;
399 case XML_ERR_PEREF_IN_PROLOG:
400 errmsg = "PEReference in prolog";
401 break;
402 case XML_ERR_PEREF_IN_EPILOG:
403 errmsg = "PEReference in epilog";
404 break;
405 case XML_ERR_PEREF_NO_NAME:
406 errmsg = "PEReference: no name";
407 break;
408 case XML_ERR_PEREF_SEMICOL_MISSING:
409 errmsg = "PEReference: expecting ';'";
410 break;
411 case XML_ERR_ENTITY_LOOP:
412 errmsg = "Detected an entity reference loop";
413 break;
414 case XML_ERR_ENTITY_NOT_STARTED:
415 errmsg = "EntityValue: \" or ' expected";
416 break;
417 case XML_ERR_ENTITY_PE_INTERNAL:
418 errmsg = "PEReferences forbidden in internal subset";
419 break;
420 case XML_ERR_ENTITY_NOT_FINISHED:
421 errmsg = "EntityValue: \" or ' expected";
422 break;
423 case XML_ERR_ATTRIBUTE_NOT_STARTED:
424 errmsg = "AttValue: \" or ' expected";
425 break;
426 case XML_ERR_LT_IN_ATTRIBUTE:
427 errmsg = "Unescaped '<' not allowed in attributes values";
428 break;
429 case XML_ERR_LITERAL_NOT_STARTED:
430 errmsg = "SystemLiteral \" or ' expected";
431 break;
432 case XML_ERR_LITERAL_NOT_FINISHED:
433 errmsg = "Unfinished System or Public ID \" or ' expected";
434 break;
435 case XML_ERR_MISPLACED_CDATA_END:
436 errmsg = "Sequence ']]>' not allowed in content";
437 break;
438 case XML_ERR_URI_REQUIRED:
439 errmsg = "SYSTEM or PUBLIC, the URI is missing";
440 break;
441 case XML_ERR_PUBID_REQUIRED:
442 errmsg = "PUBLIC, the Public Identifier is missing";
443 break;
444 case XML_ERR_HYPHEN_IN_COMMENT:
445 errmsg = "Comment must not contain '--' (double-hyphen)";
446 break;
447 case XML_ERR_PI_NOT_STARTED:
448 errmsg = "xmlParsePI : no target name";
449 break;
450 case XML_ERR_RESERVED_XML_NAME:
451 errmsg = "Invalid PI name";
452 break;
453 case XML_ERR_NOTATION_NOT_STARTED:
454 errmsg = "NOTATION: Name expected here";
455 break;
456 case XML_ERR_NOTATION_NOT_FINISHED:
457 errmsg = "'>' required to close NOTATION declaration";
458 break;
459 case XML_ERR_VALUE_REQUIRED:
460 errmsg = "Entity value required";
461 break;
462 case XML_ERR_URI_FRAGMENT:
463 errmsg = "Fragment not allowed";
464 break;
465 case XML_ERR_ATTLIST_NOT_STARTED:
466 errmsg = "'(' required to start ATTLIST enumeration";
467 break;
468 case XML_ERR_NMTOKEN_REQUIRED:
469 errmsg = "NmToken expected in ATTLIST enumeration";
470 break;
471 case XML_ERR_ATTLIST_NOT_FINISHED:
472 errmsg = "')' required to finish ATTLIST enumeration";
473 break;
474 case XML_ERR_MIXED_NOT_STARTED:
475 errmsg = "MixedContentDecl : '|' or ')*' expected";
476 break;
477 case XML_ERR_PCDATA_REQUIRED:
478 errmsg = "MixedContentDecl : '#PCDATA' expected";
479 break;
480 case XML_ERR_ELEMCONTENT_NOT_STARTED:
481 errmsg = "ContentDecl : Name or '(' expected";
482 break;
483 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
484 errmsg = "ContentDecl : ',' '|' or ')' expected";
485 break;
486 case XML_ERR_PEREF_IN_INT_SUBSET:
487 errmsg =
488 "PEReference: forbidden within markup decl in internal subset";
489 break;
490 case XML_ERR_GT_REQUIRED:
491 errmsg = "expected '>'";
492 break;
493 case XML_ERR_CONDSEC_INVALID:
494 errmsg = "XML conditional section '[' expected";
495 break;
496 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
497 errmsg = "Content error in the external subset";
498 break;
499 case XML_ERR_CONDSEC_INVALID_KEYWORD:
500 errmsg =
501 "conditional section INCLUDE or IGNORE keyword expected";
502 break;
503 case XML_ERR_CONDSEC_NOT_FINISHED:
504 errmsg = "XML conditional section not closed";
505 break;
506 case XML_ERR_XMLDECL_NOT_STARTED:
507 errmsg = "Text declaration '<?xml' required";
508 break;
509 case XML_ERR_XMLDECL_NOT_FINISHED:
510 errmsg = "parsing XML declaration: '?>' expected";
511 break;
512 case XML_ERR_EXT_ENTITY_STANDALONE:
513 errmsg = "external parsed entities cannot be standalone";
514 break;
515 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
516 errmsg = "EntityRef: expecting ';'";
517 break;
518 case XML_ERR_DOCTYPE_NOT_FINISHED:
519 errmsg = "DOCTYPE improperly terminated";
520 break;
521 case XML_ERR_LTSLASH_REQUIRED:
522 errmsg = "EndTag: '</' not found";
523 break;
524 case XML_ERR_EQUAL_REQUIRED:
525 errmsg = "expected '='";
526 break;
527 case XML_ERR_STRING_NOT_CLOSED:
528 errmsg = "String not closed expecting \" or '";
529 break;
530 case XML_ERR_STRING_NOT_STARTED:
531 errmsg = "String not started expecting ' or \"";
532 break;
533 case XML_ERR_ENCODING_NAME:
534 errmsg = "Invalid XML encoding name";
535 break;
536 case XML_ERR_STANDALONE_VALUE:
537 errmsg = "standalone accepts only 'yes' or 'no'";
538 break;
539 case XML_ERR_DOCUMENT_EMPTY:
540 errmsg = "Document is empty";
541 break;
542 case XML_ERR_DOCUMENT_END:
543 errmsg = "Extra content at the end of the document";
544 break;
545 case XML_ERR_NOT_WELL_BALANCED:
546 errmsg = "chunk is not well balanced";
547 break;
548 case XML_ERR_EXTRA_CONTENT:
549 errmsg = "extra content at the end of well balanced chunk";
550 break;
551 case XML_ERR_VERSION_MISSING:
552 errmsg = "Malformed declaration expecting version";
553 break;
554 case XML_ERR_NAME_TOO_LONG:
555 errmsg = "Name too long use XML_PARSE_HUGE option";
556 break;
557 #if 0
558 case:
559 errmsg = "";
560 break;
561 #endif
562 default:
563 errmsg = "Unregistered error message";
565 if (ctxt != NULL)
566 ctxt->errNo = error;
567 if (info == NULL) {
568 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
569 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
570 errmsg);
571 } else {
572 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
573 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
574 errmsg, info);
576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
584 * xmlFatalErrMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
591 static void LIBXML_ATTR_FORMAT(3,0)
592 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg)
595 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
596 (ctxt->instate == XML_PARSER_EOF))
597 return;
598 if (ctxt != NULL)
599 ctxt->errNo = error;
600 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
601 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
602 if (ctxt != NULL) {
603 ctxt->wellFormed = 0;
604 if (ctxt->recovery == 0)
605 ctxt->disableSAX = 1;
610 * xmlWarningMsg:
611 * @ctxt: an XML parser context
612 * @error: the error number
613 * @msg: the error message
614 * @str1: extra data
615 * @str2: extra data
617 * Handle a warning.
619 static void LIBXML_ATTR_FORMAT(3,0)
620 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
621 const char *msg, const xmlChar *str1, const xmlChar *str2)
623 xmlStructuredErrorFunc schannel = NULL;
625 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
626 (ctxt->instate == XML_PARSER_EOF))
627 return;
628 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
629 (ctxt->sax->initialized == XML_SAX2_MAGIC))
630 schannel = ctxt->sax->serror;
631 if (ctxt != NULL) {
632 __xmlRaiseError(schannel,
633 (ctxt->sax) ? ctxt->sax->warning : NULL,
634 ctxt->userData,
635 ctxt, NULL, XML_FROM_PARSER, error,
636 XML_ERR_WARNING, NULL, 0,
637 (const char *) str1, (const char *) str2, NULL, 0, 0,
638 msg, (const char *) str1, (const char *) str2);
639 } else {
640 __xmlRaiseError(schannel, NULL, NULL,
641 ctxt, NULL, XML_FROM_PARSER, error,
642 XML_ERR_WARNING, NULL, 0,
643 (const char *) str1, (const char *) str2, NULL, 0, 0,
644 msg, (const char *) str1, (const char *) str2);
649 * xmlValidityError:
650 * @ctxt: an XML parser context
651 * @error: the error number
652 * @msg: the error message
653 * @str1: extra data
655 * Handle a validity error.
657 static void LIBXML_ATTR_FORMAT(3,0)
658 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
659 const char *msg, const xmlChar *str1, const xmlChar *str2)
661 xmlStructuredErrorFunc schannel = NULL;
663 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
664 (ctxt->instate == XML_PARSER_EOF))
665 return;
666 if (ctxt != NULL) {
667 ctxt->errNo = error;
668 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
669 schannel = ctxt->sax->serror;
671 if (ctxt != NULL) {
672 __xmlRaiseError(schannel,
673 ctxt->vctxt.error, ctxt->vctxt.userData,
674 ctxt, NULL, XML_FROM_DTD, error,
675 XML_ERR_ERROR, NULL, 0, (const char *) str1,
676 (const char *) str2, NULL, 0, 0,
677 msg, (const char *) str1, (const char *) str2);
678 ctxt->valid = 0;
679 } else {
680 __xmlRaiseError(schannel, NULL, NULL,
681 ctxt, NULL, XML_FROM_DTD, error,
682 XML_ERR_ERROR, NULL, 0, (const char *) str1,
683 (const char *) str2, NULL, 0, 0,
684 msg, (const char *) str1, (const char *) str2);
689 * xmlFatalErrMsgInt:
690 * @ctxt: an XML parser context
691 * @error: the error number
692 * @msg: the error message
693 * @val: an integer value
695 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
697 static void LIBXML_ATTR_FORMAT(3,0)
698 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
699 const char *msg, int val)
701 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
702 (ctxt->instate == XML_PARSER_EOF))
703 return;
704 if (ctxt != NULL)
705 ctxt->errNo = error;
706 __xmlRaiseError(NULL, NULL, NULL,
707 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
708 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
709 if (ctxt != NULL) {
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
717 * xmlFatalErrMsgStrIntStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @str1: an string info
722 * @val: an integer value
723 * @str2: an string info
725 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
727 static void LIBXML_ATTR_FORMAT(3,0)
728 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
729 const char *msg, const xmlChar *str1, int val,
730 const xmlChar *str2)
732 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
733 (ctxt->instate == XML_PARSER_EOF))
734 return;
735 if (ctxt != NULL)
736 ctxt->errNo = error;
737 __xmlRaiseError(NULL, NULL, NULL,
738 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
739 NULL, 0, (const char *) str1, (const char *) str2,
740 NULL, val, 0, msg, str1, val, str2);
741 if (ctxt != NULL) {
742 ctxt->wellFormed = 0;
743 if (ctxt->recovery == 0)
744 ctxt->disableSAX = 1;
749 * xmlFatalErrMsgStr:
750 * @ctxt: an XML parser context
751 * @error: the error number
752 * @msg: the error message
753 * @val: a string value
755 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
757 static void LIBXML_ATTR_FORMAT(3,0)
758 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
759 const char *msg, const xmlChar * val)
761 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
762 (ctxt->instate == XML_PARSER_EOF))
763 return;
764 if (ctxt != NULL)
765 ctxt->errNo = error;
766 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
767 XML_FROM_PARSER, error, XML_ERR_FATAL,
768 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
769 val);
770 if (ctxt != NULL) {
771 ctxt->wellFormed = 0;
772 if (ctxt->recovery == 0)
773 ctxt->disableSAX = 1;
778 * xmlErrMsgStr:
779 * @ctxt: an XML parser context
780 * @error: the error number
781 * @msg: the error message
782 * @val: a string value
784 * Handle a non fatal parser error
786 static void LIBXML_ATTR_FORMAT(3,0)
787 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
788 const char *msg, const xmlChar * val)
790 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
791 (ctxt->instate == XML_PARSER_EOF))
792 return;
793 if (ctxt != NULL)
794 ctxt->errNo = error;
795 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
796 XML_FROM_PARSER, error, XML_ERR_ERROR,
797 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
798 val);
802 * xmlNsErr:
803 * @ctxt: an XML parser context
804 * @error: the error number
805 * @msg: the message
806 * @info1: extra information string
807 * @info2: extra information string
809 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
811 static void LIBXML_ATTR_FORMAT(3,0)
812 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
813 const char *msg,
814 const xmlChar * info1, const xmlChar * info2,
815 const xmlChar * info3)
817 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
818 (ctxt->instate == XML_PARSER_EOF))
819 return;
820 if (ctxt != NULL)
821 ctxt->errNo = error;
822 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
823 XML_ERR_ERROR, NULL, 0, (const char *) info1,
824 (const char *) info2, (const char *) info3, 0, 0, msg,
825 info1, info2, info3);
826 if (ctxt != NULL)
827 ctxt->nsWellFormed = 0;
831 * xmlNsWarn
832 * @ctxt: an XML parser context
833 * @error: the error number
834 * @msg: the message
835 * @info1: extra information string
836 * @info2: extra information string
838 * Handle a namespace warning error
840 static void LIBXML_ATTR_FORMAT(3,0)
841 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
842 const char *msg,
843 const xmlChar * info1, const xmlChar * info2,
844 const xmlChar * info3)
846 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
847 (ctxt->instate == XML_PARSER_EOF))
848 return;
849 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
850 XML_ERR_WARNING, NULL, 0, (const char *) info1,
851 (const char *) info2, (const char *) info3, 0, 0, msg,
852 info1, info2, info3);
855 /************************************************************************
857 * Library wide options *
859 ************************************************************************/
862 * xmlHasFeature:
863 * @feature: the feature to be examined
865 * Examines if the library has been compiled with a given feature.
867 * Returns a non-zero value if the feature exist, otherwise zero.
868 * Returns zero (0) if the feature does not exist or an unknown
869 * unknown feature is requested, non-zero otherwise.
872 xmlHasFeature(xmlFeature feature)
874 switch (feature) {
875 case XML_WITH_THREAD:
876 #ifdef LIBXML_THREAD_ENABLED
877 return(1);
878 #else
879 return(0);
880 #endif
881 case XML_WITH_TREE:
882 #ifdef LIBXML_TREE_ENABLED
883 return(1);
884 #else
885 return(0);
886 #endif
887 case XML_WITH_OUTPUT:
888 #ifdef LIBXML_OUTPUT_ENABLED
889 return(1);
890 #else
891 return(0);
892 #endif
893 case XML_WITH_PUSH:
894 #ifdef LIBXML_PUSH_ENABLED
895 return(1);
896 #else
897 return(0);
898 #endif
899 case XML_WITH_READER:
900 #ifdef LIBXML_READER_ENABLED
901 return(1);
902 #else
903 return(0);
904 #endif
905 case XML_WITH_PATTERN:
906 #ifdef LIBXML_PATTERN_ENABLED
907 return(1);
908 #else
909 return(0);
910 #endif
911 case XML_WITH_WRITER:
912 #ifdef LIBXML_WRITER_ENABLED
913 return(1);
914 #else
915 return(0);
916 #endif
917 case XML_WITH_SAX1:
918 #ifdef LIBXML_SAX1_ENABLED
919 return(1);
920 #else
921 return(0);
922 #endif
923 case XML_WITH_FTP:
924 #ifdef LIBXML_FTP_ENABLED
925 return(1);
926 #else
927 return(0);
928 #endif
929 case XML_WITH_HTTP:
930 #ifdef LIBXML_HTTP_ENABLED
931 return(1);
932 #else
933 return(0);
934 #endif
935 case XML_WITH_VALID:
936 #ifdef LIBXML_VALID_ENABLED
937 return(1);
938 #else
939 return(0);
940 #endif
941 case XML_WITH_HTML:
942 #ifdef LIBXML_HTML_ENABLED
943 return(1);
944 #else
945 return(0);
946 #endif
947 case XML_WITH_LEGACY:
948 #ifdef LIBXML_LEGACY_ENABLED
949 return(1);
950 #else
951 return(0);
952 #endif
953 case XML_WITH_C14N:
954 #ifdef LIBXML_C14N_ENABLED
955 return(1);
956 #else
957 return(0);
958 #endif
959 case XML_WITH_CATALOG:
960 #ifdef LIBXML_CATALOG_ENABLED
961 return(1);
962 #else
963 return(0);
964 #endif
965 case XML_WITH_XPATH:
966 #ifdef LIBXML_XPATH_ENABLED
967 return(1);
968 #else
969 return(0);
970 #endif
971 case XML_WITH_XPTR:
972 #ifdef LIBXML_XPTR_ENABLED
973 return(1);
974 #else
975 return(0);
976 #endif
977 case XML_WITH_XINCLUDE:
978 #ifdef LIBXML_XINCLUDE_ENABLED
979 return(1);
980 #else
981 return(0);
982 #endif
983 case XML_WITH_ICONV:
984 #ifdef LIBXML_ICONV_ENABLED
985 return(1);
986 #else
987 return(0);
988 #endif
989 case XML_WITH_ISO8859X:
990 #ifdef LIBXML_ISO8859X_ENABLED
991 return(1);
992 #else
993 return(0);
994 #endif
995 case XML_WITH_UNICODE:
996 #ifdef LIBXML_UNICODE_ENABLED
997 return(1);
998 #else
999 return(0);
1000 #endif
1001 case XML_WITH_REGEXP:
1002 #ifdef LIBXML_REGEXP_ENABLED
1003 return(1);
1004 #else
1005 return(0);
1006 #endif
1007 case XML_WITH_AUTOMATA:
1008 #ifdef LIBXML_AUTOMATA_ENABLED
1009 return(1);
1010 #else
1011 return(0);
1012 #endif
1013 case XML_WITH_EXPR:
1014 #ifdef LIBXML_EXPR_ENABLED
1015 return(1);
1016 #else
1017 return(0);
1018 #endif
1019 case XML_WITH_SCHEMAS:
1020 #ifdef LIBXML_SCHEMAS_ENABLED
1021 return(1);
1022 #else
1023 return(0);
1024 #endif
1025 case XML_WITH_SCHEMATRON:
1026 #ifdef LIBXML_SCHEMATRON_ENABLED
1027 return(1);
1028 #else
1029 return(0);
1030 #endif
1031 case XML_WITH_MODULES:
1032 #ifdef LIBXML_MODULES_ENABLED
1033 return(1);
1034 #else
1035 return(0);
1036 #endif
1037 case XML_WITH_DEBUG:
1038 #ifdef LIBXML_DEBUG_ENABLED
1039 return(1);
1040 #else
1041 return(0);
1042 #endif
1043 case XML_WITH_DEBUG_MEM:
1044 #ifdef DEBUG_MEMORY_LOCATION
1045 return(1);
1046 #else
1047 return(0);
1048 #endif
1049 case XML_WITH_DEBUG_RUN:
1050 #ifdef LIBXML_DEBUG_RUNTIME
1051 return(1);
1052 #else
1053 return(0);
1054 #endif
1055 case XML_WITH_ZLIB:
1056 #ifdef LIBXML_ZLIB_ENABLED
1057 return(1);
1058 #else
1059 return(0);
1060 #endif
1061 case XML_WITH_LZMA:
1062 #ifdef LIBXML_LZMA_ENABLED
1063 return(1);
1064 #else
1065 return(0);
1066 #endif
1067 case XML_WITH_ICU:
1068 #ifdef LIBXML_ICU_ENABLED
1069 return(1);
1070 #else
1071 return(0);
1072 #endif
1073 default:
1074 break;
1076 return(0);
1079 /************************************************************************
1081 * SAX2 defaulted attributes handling *
1083 ************************************************************************/
1086 * xmlDetectSAX2:
1087 * @ctxt: an XML parser context
1089 * Do the SAX2 detection and specific initialization
1091 static void
1092 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1093 xmlSAXHandlerPtr sax;
1095 /* Avoid unused variable warning if features are disabled. */
1096 (void) sax;
1098 if (ctxt == NULL) return;
1099 sax = ctxt->sax;
1100 #ifdef LIBXML_SAX1_ENABLED
1101 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1102 ((sax->startElementNs != NULL) ||
1103 (sax->endElementNs != NULL) ||
1104 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1105 ctxt->sax2 = 1;
1106 #else
1107 ctxt->sax2 = 1;
1108 #endif /* LIBXML_SAX1_ENABLED */
1110 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1111 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1112 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1113 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1114 (ctxt->str_xml_ns == NULL)) {
1115 xmlErrMemory(ctxt, NULL);
1119 typedef struct _xmlDefAttrs xmlDefAttrs;
1120 typedef xmlDefAttrs *xmlDefAttrsPtr;
1121 struct _xmlDefAttrs {
1122 int nbAttrs; /* number of defaulted attributes on that element */
1123 int maxAttrs; /* the size of the array */
1124 #if __STDC_VERSION__ >= 199901L
1125 /* Using a C99 flexible array member avoids UBSan errors. */
1126 const xmlChar *values[]; /* array of localname/prefix/values/external */
1127 #else
1128 const xmlChar *values[5];
1129 #endif
1133 * xmlAttrNormalizeSpace:
1134 * @src: the source string
1135 * @dst: the target string
1137 * Normalize the space in non CDATA attribute values:
1138 * If the attribute type is not CDATA, then the XML processor MUST further
1139 * process the normalized attribute value by discarding any leading and
1140 * trailing space (#x20) characters, and by replacing sequences of space
1141 * (#x20) characters by a single space (#x20) character.
1142 * Note that the size of dst need to be at least src, and if one doesn't need
1143 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1144 * passing src as dst is just fine.
1146 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1147 * is needed.
1149 static xmlChar *
1150 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1152 if ((src == NULL) || (dst == NULL))
1153 return(NULL);
1155 while (*src == 0x20) src++;
1156 while (*src != 0) {
1157 if (*src == 0x20) {
1158 while (*src == 0x20) src++;
1159 if (*src != 0)
1160 *dst++ = 0x20;
1161 } else {
1162 *dst++ = *src++;
1165 *dst = 0;
1166 if (dst == src)
1167 return(NULL);
1168 return(dst);
1172 * xmlAttrNormalizeSpace2:
1173 * @src: the source string
1175 * Normalize the space in non CDATA attribute values, a slightly more complex
1176 * front end to avoid allocation problems when running on attribute values
1177 * coming from the input.
1179 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1180 * is needed.
1182 static const xmlChar *
1183 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1185 int i;
1186 int remove_head = 0;
1187 int need_realloc = 0;
1188 const xmlChar *cur;
1190 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1191 return(NULL);
1192 i = *len;
1193 if (i <= 0)
1194 return(NULL);
1196 cur = src;
1197 while (*cur == 0x20) {
1198 cur++;
1199 remove_head++;
1201 while (*cur != 0) {
1202 if (*cur == 0x20) {
1203 cur++;
1204 if ((*cur == 0x20) || (*cur == 0)) {
1205 need_realloc = 1;
1206 break;
1208 } else
1209 cur++;
1211 if (need_realloc) {
1212 xmlChar *ret;
1214 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1215 if (ret == NULL) {
1216 xmlErrMemory(ctxt, NULL);
1217 return(NULL);
1219 xmlAttrNormalizeSpace(ret, ret);
1220 *len = (int) strlen((const char *)ret);
1221 return(ret);
1222 } else if (remove_head) {
1223 *len -= remove_head;
1224 memmove(src, src + remove_head, 1 + *len);
1225 return(src);
1227 return(NULL);
1231 * xmlAddDefAttrs:
1232 * @ctxt: an XML parser context
1233 * @fullname: the element fullname
1234 * @fullattr: the attribute fullname
1235 * @value: the attribute value
1237 * Add a defaulted attribute for an element
1239 static void
1240 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1241 const xmlChar *fullname,
1242 const xmlChar *fullattr,
1243 const xmlChar *value) {
1244 xmlDefAttrsPtr defaults;
1245 int len;
1246 const xmlChar *name;
1247 const xmlChar *prefix;
1250 * Allows to detect attribute redefinitions
1252 if (ctxt->attsSpecial != NULL) {
1253 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1254 return;
1257 if (ctxt->attsDefault == NULL) {
1258 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1259 if (ctxt->attsDefault == NULL)
1260 goto mem_error;
1264 * split the element name into prefix:localname , the string found
1265 * are within the DTD and then not associated to namespace names.
1267 name = xmlSplitQName3(fullname, &len);
1268 if (name == NULL) {
1269 name = xmlDictLookup(ctxt->dict, fullname, -1);
1270 prefix = NULL;
1271 } else {
1272 name = xmlDictLookup(ctxt->dict, name, -1);
1273 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1277 * make sure there is some storage
1279 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1280 if (defaults == NULL) {
1281 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1282 (4 * 5) * sizeof(const xmlChar *));
1283 if (defaults == NULL)
1284 goto mem_error;
1285 defaults->nbAttrs = 0;
1286 defaults->maxAttrs = 4;
1287 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1288 defaults, NULL) < 0) {
1289 xmlFree(defaults);
1290 goto mem_error;
1292 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1293 xmlDefAttrsPtr temp;
1295 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1296 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1297 if (temp == NULL)
1298 goto mem_error;
1299 defaults = temp;
1300 defaults->maxAttrs *= 2;
1301 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1302 defaults, NULL) < 0) {
1303 xmlFree(defaults);
1304 goto mem_error;
1309 * Split the element name into prefix:localname , the string found
1310 * are within the DTD and hen not associated to namespace names.
1312 name = xmlSplitQName3(fullattr, &len);
1313 if (name == NULL) {
1314 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1315 prefix = NULL;
1316 } else {
1317 name = xmlDictLookup(ctxt->dict, name, -1);
1318 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1321 defaults->values[5 * defaults->nbAttrs] = name;
1322 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1323 /* intern the string and precompute the end */
1324 len = xmlStrlen(value);
1325 value = xmlDictLookup(ctxt->dict, value, len);
1326 defaults->values[5 * defaults->nbAttrs + 2] = value;
1327 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1328 if (ctxt->external)
1329 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1330 else
1331 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1332 defaults->nbAttrs++;
1334 return;
1336 mem_error:
1337 xmlErrMemory(ctxt, NULL);
1338 return;
1342 * xmlAddSpecialAttr:
1343 * @ctxt: an XML parser context
1344 * @fullname: the element fullname
1345 * @fullattr: the attribute fullname
1346 * @type: the attribute type
1348 * Register this attribute type
1350 static void
1351 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1352 const xmlChar *fullname,
1353 const xmlChar *fullattr,
1354 int type)
1356 if (ctxt->attsSpecial == NULL) {
1357 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1358 if (ctxt->attsSpecial == NULL)
1359 goto mem_error;
1362 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1363 return;
1365 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1366 (void *) (ptrdiff_t) type);
1367 return;
1369 mem_error:
1370 xmlErrMemory(ctxt, NULL);
1371 return;
1375 * xmlCleanSpecialAttrCallback:
1377 * Removes CDATA attributes from the special attribute table
1379 static void
1380 xmlCleanSpecialAttrCallback(void *payload, void *data,
1381 const xmlChar *fullname, const xmlChar *fullattr,
1382 const xmlChar *unused ATTRIBUTE_UNUSED) {
1383 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1385 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1386 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1391 * xmlCleanSpecialAttr:
1392 * @ctxt: an XML parser context
1394 * Trim the list of attributes defined to remove all those of type
1395 * CDATA as they are not special. This call should be done when finishing
1396 * to parse the DTD and before starting to parse the document root.
1398 static void
1399 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1401 if (ctxt->attsSpecial == NULL)
1402 return;
1404 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1406 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1407 xmlHashFree(ctxt->attsSpecial, NULL);
1408 ctxt->attsSpecial = NULL;
1410 return;
1414 * xmlCheckLanguageID:
1415 * @lang: pointer to the string value
1417 * Checks that the value conforms to the LanguageID production:
1419 * NOTE: this is somewhat deprecated, those productions were removed from
1420 * the XML Second edition.
1422 * [33] LanguageID ::= Langcode ('-' Subcode)*
1423 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1424 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1425 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1426 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1427 * [38] Subcode ::= ([a-z] | [A-Z])+
1429 * The current REC reference the successors of RFC 1766, currently 5646
1431 * http://www.rfc-editor.org/rfc/rfc5646.txt
1432 * langtag = language
1433 * ["-" script]
1434 * ["-" region]
1435 * *("-" variant)
1436 * *("-" extension)
1437 * ["-" privateuse]
1438 * language = 2*3ALPHA ; shortest ISO 639 code
1439 * ["-" extlang] ; sometimes followed by
1440 * ; extended language subtags
1441 * / 4ALPHA ; or reserved for future use
1442 * / 5*8ALPHA ; or registered language subtag
1444 * extlang = 3ALPHA ; selected ISO 639 codes
1445 * *2("-" 3ALPHA) ; permanently reserved
1447 * script = 4ALPHA ; ISO 15924 code
1449 * region = 2ALPHA ; ISO 3166-1 code
1450 * / 3DIGIT ; UN M.49 code
1452 * variant = 5*8alphanum ; registered variants
1453 * / (DIGIT 3alphanum)
1455 * extension = singleton 1*("-" (2*8alphanum))
1457 * ; Single alphanumerics
1458 * ; "x" reserved for private use
1459 * singleton = DIGIT ; 0 - 9
1460 * / %x41-57 ; A - W
1461 * / %x59-5A ; Y - Z
1462 * / %x61-77 ; a - w
1463 * / %x79-7A ; y - z
1465 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1466 * The parser below doesn't try to cope with extension or privateuse
1467 * that could be added but that's not interoperable anyway
1469 * Returns 1 if correct 0 otherwise
1472 xmlCheckLanguageID(const xmlChar * lang)
1474 const xmlChar *cur = lang, *nxt;
1476 if (cur == NULL)
1477 return (0);
1478 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1479 ((cur[0] == 'I') && (cur[1] == '-')) ||
1480 ((cur[0] == 'x') && (cur[1] == '-')) ||
1481 ((cur[0] == 'X') && (cur[1] == '-'))) {
1483 * Still allow IANA code and user code which were coming
1484 * from the previous version of the XML-1.0 specification
1485 * it's deprecated but we should not fail
1487 cur += 2;
1488 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1489 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1490 cur++;
1491 return(cur[0] == 0);
1493 nxt = cur;
1494 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496 nxt++;
1497 if (nxt - cur >= 4) {
1499 * Reserved
1501 if ((nxt - cur > 8) || (nxt[0] != 0))
1502 return(0);
1503 return(1);
1505 if (nxt - cur < 2)
1506 return(0);
1507 /* we got an ISO 639 code */
1508 if (nxt[0] == 0)
1509 return(1);
1510 if (nxt[0] != '-')
1511 return(0);
1513 nxt++;
1514 cur = nxt;
1515 /* now we can have extlang or script or region or variant */
1516 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1517 goto region_m49;
1519 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1520 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1521 nxt++;
1522 if (nxt - cur == 4)
1523 goto script;
1524 if (nxt - cur == 2)
1525 goto region;
1526 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1527 goto variant;
1528 if (nxt - cur != 3)
1529 return(0);
1530 /* we parsed an extlang */
1531 if (nxt[0] == 0)
1532 return(1);
1533 if (nxt[0] != '-')
1534 return(0);
1536 nxt++;
1537 cur = nxt;
1538 /* now we can have script or region or variant */
1539 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1540 goto region_m49;
1542 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1543 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1544 nxt++;
1545 if (nxt - cur == 2)
1546 goto region;
1547 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1548 goto variant;
1549 if (nxt - cur != 4)
1550 return(0);
1551 /* we parsed a script */
1552 script:
1553 if (nxt[0] == 0)
1554 return(1);
1555 if (nxt[0] != '-')
1556 return(0);
1558 nxt++;
1559 cur = nxt;
1560 /* now we can have region or variant */
1561 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1562 goto region_m49;
1564 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1565 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1566 nxt++;
1568 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1569 goto variant;
1570 if (nxt - cur != 2)
1571 return(0);
1572 /* we parsed a region */
1573 region:
1574 if (nxt[0] == 0)
1575 return(1);
1576 if (nxt[0] != '-')
1577 return(0);
1579 nxt++;
1580 cur = nxt;
1581 /* now we can just have a variant */
1582 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1583 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1584 nxt++;
1586 if ((nxt - cur < 5) || (nxt - cur > 8))
1587 return(0);
1589 /* we parsed a variant */
1590 variant:
1591 if (nxt[0] == 0)
1592 return(1);
1593 if (nxt[0] != '-')
1594 return(0);
1595 /* extensions and private use subtags not checked */
1596 return (1);
1598 region_m49:
1599 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1600 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1601 nxt += 3;
1602 goto region;
1604 return(0);
1607 /************************************************************************
1609 * Parser stacks related functions and macros *
1611 ************************************************************************/
1613 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1614 const xmlChar ** str);
1616 #ifdef SAX2
1618 * nsPush:
1619 * @ctxt: an XML parser context
1620 * @prefix: the namespace prefix or NULL
1621 * @URL: the namespace name
1623 * Pushes a new parser namespace on top of the ns stack
1625 * Returns -1 in case of error, -2 if the namespace should be discarded
1626 * and the index in the stack otherwise.
1628 static int
1629 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1631 if (ctxt->options & XML_PARSE_NSCLEAN) {
1632 int i;
1633 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1634 if (ctxt->nsTab[i] == prefix) {
1635 /* in scope */
1636 if (ctxt->nsTab[i + 1] == URL)
1637 return(-2);
1638 /* out of scope keep it */
1639 break;
1643 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1644 ctxt->nsMax = 10;
1645 ctxt->nsNr = 0;
1646 ctxt->nsTab = (const xmlChar **)
1647 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1648 if (ctxt->nsTab == NULL) {
1649 xmlErrMemory(ctxt, NULL);
1650 ctxt->nsMax = 0;
1651 return (-1);
1653 } else if (ctxt->nsNr >= ctxt->nsMax) {
1654 const xmlChar ** tmp;
1655 ctxt->nsMax *= 2;
1656 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1657 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1658 if (tmp == NULL) {
1659 xmlErrMemory(ctxt, NULL);
1660 ctxt->nsMax /= 2;
1661 return (-1);
1663 ctxt->nsTab = tmp;
1665 ctxt->nsTab[ctxt->nsNr++] = prefix;
1666 ctxt->nsTab[ctxt->nsNr++] = URL;
1667 return (ctxt->nsNr);
1670 * nsPop:
1671 * @ctxt: an XML parser context
1672 * @nr: the number to pop
1674 * Pops the top @nr parser prefix/namespace from the ns stack
1676 * Returns the number of namespaces removed
1678 static int
1679 nsPop(xmlParserCtxtPtr ctxt, int nr)
1681 int i;
1683 if (ctxt->nsTab == NULL) return(0);
1684 if (ctxt->nsNr < nr) {
1685 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1686 nr = ctxt->nsNr;
1688 if (ctxt->nsNr <= 0)
1689 return (0);
1691 for (i = 0;i < nr;i++) {
1692 ctxt->nsNr--;
1693 ctxt->nsTab[ctxt->nsNr] = NULL;
1695 return(nr);
1697 #endif
1699 static int
1700 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1701 const xmlChar **atts;
1702 int *attallocs;
1703 int maxatts;
1705 if (ctxt->atts == NULL) {
1706 maxatts = 55; /* allow for 10 attrs by default */
1707 atts = (const xmlChar **)
1708 xmlMalloc(maxatts * sizeof(xmlChar *));
1709 if (atts == NULL) goto mem_error;
1710 ctxt->atts = atts;
1711 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1712 if (attallocs == NULL) goto mem_error;
1713 ctxt->attallocs = attallocs;
1714 ctxt->maxatts = maxatts;
1715 } else if (nr + 5 > ctxt->maxatts) {
1716 maxatts = (nr + 5) * 2;
1717 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1718 maxatts * sizeof(const xmlChar *));
1719 if (atts == NULL) goto mem_error;
1720 ctxt->atts = atts;
1721 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1722 (maxatts / 5) * sizeof(int));
1723 if (attallocs == NULL) goto mem_error;
1724 ctxt->attallocs = attallocs;
1725 ctxt->maxatts = maxatts;
1727 return(ctxt->maxatts);
1728 mem_error:
1729 xmlErrMemory(ctxt, NULL);
1730 return(-1);
1734 * inputPush:
1735 * @ctxt: an XML parser context
1736 * @value: the parser input
1738 * Pushes a new parser input on top of the input stack
1740 * Returns -1 in case of error, the index in the stack otherwise
1743 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1745 if ((ctxt == NULL) || (value == NULL))
1746 return(-1);
1747 if (ctxt->inputNr >= ctxt->inputMax) {
1748 ctxt->inputMax *= 2;
1749 ctxt->inputTab =
1750 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1751 ctxt->inputMax *
1752 sizeof(ctxt->inputTab[0]));
1753 if (ctxt->inputTab == NULL) {
1754 xmlErrMemory(ctxt, NULL);
1755 ctxt->inputMax /= 2;
1756 return (-1);
1759 ctxt->inputTab[ctxt->inputNr] = value;
1760 ctxt->input = value;
1761 return (ctxt->inputNr++);
1764 * inputPop:
1765 * @ctxt: an XML parser context
1767 * Pops the top parser input from the input stack
1769 * Returns the input just removed
1771 xmlParserInputPtr
1772 inputPop(xmlParserCtxtPtr ctxt)
1774 xmlParserInputPtr ret;
1776 if (ctxt == NULL)
1777 return(NULL);
1778 if (ctxt->inputNr <= 0)
1779 return (NULL);
1780 ctxt->inputNr--;
1781 if (ctxt->inputNr > 0)
1782 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1783 else
1784 ctxt->input = NULL;
1785 ret = ctxt->inputTab[ctxt->inputNr];
1786 ctxt->inputTab[ctxt->inputNr] = NULL;
1787 return (ret);
1790 * nodePush:
1791 * @ctxt: an XML parser context
1792 * @value: the element node
1794 * Pushes a new element node on top of the node stack
1796 * Returns -1 in case of error, the index in the stack otherwise
1799 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1801 if (ctxt == NULL) return(0);
1802 if (ctxt->nodeNr >= ctxt->nodeMax) {
1803 xmlNodePtr *tmp;
1805 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1806 ctxt->nodeMax * 2 *
1807 sizeof(ctxt->nodeTab[0]));
1808 if (tmp == NULL) {
1809 xmlErrMemory(ctxt, NULL);
1810 return (-1);
1812 ctxt->nodeTab = tmp;
1813 ctxt->nodeMax *= 2;
1815 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1816 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1817 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1818 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1819 xmlParserMaxDepth);
1820 xmlHaltParser(ctxt);
1821 return(-1);
1823 ctxt->nodeTab[ctxt->nodeNr] = value;
1824 ctxt->node = value;
1825 return (ctxt->nodeNr++);
1829 * nodePop:
1830 * @ctxt: an XML parser context
1832 * Pops the top element node from the node stack
1834 * Returns the node just removed
1836 xmlNodePtr
1837 nodePop(xmlParserCtxtPtr ctxt)
1839 xmlNodePtr ret;
1841 if (ctxt == NULL) return(NULL);
1842 if (ctxt->nodeNr <= 0)
1843 return (NULL);
1844 ctxt->nodeNr--;
1845 if (ctxt->nodeNr > 0)
1846 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1847 else
1848 ctxt->node = NULL;
1849 ret = ctxt->nodeTab[ctxt->nodeNr];
1850 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1851 return (ret);
1855 * nameNsPush:
1856 * @ctxt: an XML parser context
1857 * @value: the element name
1858 * @prefix: the element prefix
1859 * @URI: the element namespace name
1860 * @line: the current line number for error messages
1861 * @nsNr: the number of namespaces pushed on the namespace table
1863 * Pushes a new element name/prefix/URL on top of the name stack
1865 * Returns -1 in case of error, the index in the stack otherwise
1867 static int
1868 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1869 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1871 xmlStartTag *tag;
1873 if (ctxt->nameNr >= ctxt->nameMax) {
1874 const xmlChar * *tmp;
1875 xmlStartTag *tmp2;
1876 ctxt->nameMax *= 2;
1877 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1878 ctxt->nameMax *
1879 sizeof(ctxt->nameTab[0]));
1880 if (tmp == NULL) {
1881 ctxt->nameMax /= 2;
1882 goto mem_error;
1884 ctxt->nameTab = tmp;
1885 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1886 ctxt->nameMax *
1887 sizeof(ctxt->pushTab[0]));
1888 if (tmp2 == NULL) {
1889 ctxt->nameMax /= 2;
1890 goto mem_error;
1892 ctxt->pushTab = tmp2;
1893 } else if (ctxt->pushTab == NULL) {
1894 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1895 sizeof(ctxt->pushTab[0]));
1896 if (ctxt->pushTab == NULL)
1897 goto mem_error;
1899 ctxt->nameTab[ctxt->nameNr] = value;
1900 ctxt->name = value;
1901 tag = &ctxt->pushTab[ctxt->nameNr];
1902 tag->prefix = prefix;
1903 tag->URI = URI;
1904 tag->line = line;
1905 tag->nsNr = nsNr;
1906 return (ctxt->nameNr++);
1907 mem_error:
1908 xmlErrMemory(ctxt, NULL);
1909 return (-1);
1911 #ifdef LIBXML_PUSH_ENABLED
1913 * nameNsPop:
1914 * @ctxt: an XML parser context
1916 * Pops the top element/prefix/URI name from the name stack
1918 * Returns the name just removed
1920 static const xmlChar *
1921 nameNsPop(xmlParserCtxtPtr ctxt)
1923 const xmlChar *ret;
1925 if (ctxt->nameNr <= 0)
1926 return (NULL);
1927 ctxt->nameNr--;
1928 if (ctxt->nameNr > 0)
1929 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1930 else
1931 ctxt->name = NULL;
1932 ret = ctxt->nameTab[ctxt->nameNr];
1933 ctxt->nameTab[ctxt->nameNr] = NULL;
1934 return (ret);
1936 #endif /* LIBXML_PUSH_ENABLED */
1939 * namePush:
1940 * @ctxt: an XML parser context
1941 * @value: the element name
1943 * Pushes a new element name on top of the name stack
1945 * Returns -1 in case of error, the index in the stack otherwise
1948 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1950 if (ctxt == NULL) return (-1);
1952 if (ctxt->nameNr >= ctxt->nameMax) {
1953 const xmlChar * *tmp;
1954 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1955 ctxt->nameMax * 2 *
1956 sizeof(ctxt->nameTab[0]));
1957 if (tmp == NULL) {
1958 goto mem_error;
1960 ctxt->nameTab = tmp;
1961 ctxt->nameMax *= 2;
1963 ctxt->nameTab[ctxt->nameNr] = value;
1964 ctxt->name = value;
1965 return (ctxt->nameNr++);
1966 mem_error:
1967 xmlErrMemory(ctxt, NULL);
1968 return (-1);
1971 * namePop:
1972 * @ctxt: an XML parser context
1974 * Pops the top element name from the name stack
1976 * Returns the name just removed
1978 const xmlChar *
1979 namePop(xmlParserCtxtPtr ctxt)
1981 const xmlChar *ret;
1983 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1984 return (NULL);
1985 ctxt->nameNr--;
1986 if (ctxt->nameNr > 0)
1987 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1988 else
1989 ctxt->name = NULL;
1990 ret = ctxt->nameTab[ctxt->nameNr];
1991 ctxt->nameTab[ctxt->nameNr] = NULL;
1992 return (ret);
1995 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1996 if (ctxt->spaceNr >= ctxt->spaceMax) {
1997 int *tmp;
1999 ctxt->spaceMax *= 2;
2000 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2001 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2002 if (tmp == NULL) {
2003 xmlErrMemory(ctxt, NULL);
2004 ctxt->spaceMax /=2;
2005 return(-1);
2007 ctxt->spaceTab = tmp;
2009 ctxt->spaceTab[ctxt->spaceNr] = val;
2010 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2011 return(ctxt->spaceNr++);
2014 static int spacePop(xmlParserCtxtPtr ctxt) {
2015 int ret;
2016 if (ctxt->spaceNr <= 0) return(0);
2017 ctxt->spaceNr--;
2018 if (ctxt->spaceNr > 0)
2019 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2020 else
2021 ctxt->space = &ctxt->spaceTab[0];
2022 ret = ctxt->spaceTab[ctxt->spaceNr];
2023 ctxt->spaceTab[ctxt->spaceNr] = -1;
2024 return(ret);
2028 * Macros for accessing the content. Those should be used only by the parser,
2029 * and not exported.
2031 * Dirty macros, i.e. one often need to make assumption on the context to
2032 * use them
2034 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2035 * To be used with extreme caution since operations consuming
2036 * characters may move the input buffer to a different location !
2037 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2038 * This should be used internally by the parser
2039 * only to compare to ASCII values otherwise it would break when
2040 * running with UTF-8 encoding.
2041 * RAW same as CUR but in the input buffer, bypass any token
2042 * extraction that may have been done
2043 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2044 * to compare on ASCII based substring.
2045 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2046 * strings without newlines within the parser.
2047 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2048 * defined char within the parser.
2049 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2051 * NEXT Skip to the next character, this does the proper decoding
2052 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2053 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2054 * CUR_CHAR(l) returns the current unicode character (int), set l
2055 * to the number of xmlChars used for the encoding [0-5].
2056 * CUR_SCHAR same but operate on a string instead of the context
2057 * COPY_BUF copy the current unicode char to the target buffer, increment
2058 * the index
2059 * GROW, SHRINK handling of input buffers
2062 #define RAW (*ctxt->input->cur)
2063 #define CUR (*ctxt->input->cur)
2064 #define NXT(val) ctxt->input->cur[(val)]
2065 #define CUR_PTR ctxt->input->cur
2066 #define BASE_PTR ctxt->input->base
2068 #define CMP4( s, c1, c2, c3, c4 ) \
2069 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2070 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2071 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2072 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2073 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2074 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2075 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2076 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2077 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2078 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2079 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2080 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2081 ((unsigned char *) s)[ 8 ] == c9 )
2082 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2083 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2084 ((unsigned char *) s)[ 9 ] == c10 )
2086 #define SKIP(val) do { \
2087 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2088 if (*ctxt->input->cur == 0) \
2089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2090 } while (0)
2092 #define SKIPL(val) do { \
2093 int skipl; \
2094 for(skipl=0; skipl<val; skipl++) { \
2095 if (*(ctxt->input->cur) == '\n') { \
2096 ctxt->input->line++; ctxt->input->col = 1; \
2097 } else ctxt->input->col++; \
2098 ctxt->input->cur++; \
2100 if (*ctxt->input->cur == 0) \
2101 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2102 } while (0)
2104 #define SHRINK if ((ctxt->progressive == 0) && \
2105 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2106 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2107 xmlSHRINK (ctxt);
2109 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2110 xmlParserInputShrink(ctxt->input);
2111 if (*ctxt->input->cur == 0)
2112 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115 #define GROW if ((ctxt->progressive == 0) && \
2116 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2117 xmlGROW (ctxt);
2119 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2120 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2121 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2123 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2124 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2125 ((ctxt->input->buf) &&
2126 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2127 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2128 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2129 xmlHaltParser(ctxt);
2130 return;
2132 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2133 if ((ctxt->input->cur > ctxt->input->end) ||
2134 (ctxt->input->cur < ctxt->input->base)) {
2135 xmlHaltParser(ctxt);
2136 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2137 return;
2139 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2140 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2143 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2145 #define NEXT xmlNextChar(ctxt)
2147 #define NEXT1 { \
2148 ctxt->input->col++; \
2149 ctxt->input->cur++; \
2150 if (*ctxt->input->cur == 0) \
2151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2154 #define NEXTL(l) do { \
2155 if (*(ctxt->input->cur) == '\n') { \
2156 ctxt->input->line++; ctxt->input->col = 1; \
2157 } else ctxt->input->col++; \
2158 ctxt->input->cur += l; \
2159 } while (0)
2161 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2162 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2164 #define COPY_BUF(l,b,i,v) \
2165 if (l == 1) b[i++] = (xmlChar) v; \
2166 else i += xmlCopyCharMultiByte(&b[i],v)
2168 #define CUR_CONSUMED \
2169 (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2172 * xmlSkipBlankChars:
2173 * @ctxt: the XML parser context
2175 * skip all blanks character found at that point in the input streams.
2176 * It pops up finished entities in the process if allowable at that point.
2178 * Returns the number of space chars skipped
2182 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2183 int res = 0;
2186 * It's Okay to use CUR/NEXT here since all the blanks are on
2187 * the ASCII range.
2189 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2190 (ctxt->instate == XML_PARSER_START)) {
2191 const xmlChar *cur;
2193 * if we are in the document content, go really fast
2195 cur = ctxt->input->cur;
2196 while (IS_BLANK_CH(*cur)) {
2197 if (*cur == '\n') {
2198 ctxt->input->line++; ctxt->input->col = 1;
2199 } else {
2200 ctxt->input->col++;
2202 cur++;
2203 if (res < INT_MAX)
2204 res++;
2205 if (*cur == 0) {
2206 ctxt->input->cur = cur;
2207 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2208 cur = ctxt->input->cur;
2211 ctxt->input->cur = cur;
2212 } else {
2213 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2215 while (1) {
2216 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2217 NEXT;
2218 } else if (CUR == '%') {
2220 * Need to handle support of entities branching here
2222 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2223 break;
2224 xmlParsePEReference(ctxt);
2225 } else if (CUR == 0) {
2226 if (ctxt->inputNr <= 1)
2227 break;
2228 xmlPopInput(ctxt);
2229 } else {
2230 break;
2234 * Also increase the counter when entering or exiting a PERef.
2235 * The spec says: "When a parameter-entity reference is recognized
2236 * in the DTD and included, its replacement text MUST be enlarged
2237 * by the attachment of one leading and one following space (#x20)
2238 * character."
2240 if (res < INT_MAX)
2241 res++;
2244 return(res);
2247 /************************************************************************
2249 * Commodity functions to handle entities *
2251 ************************************************************************/
2254 * xmlPopInput:
2255 * @ctxt: an XML parser context
2257 * xmlPopInput: the current input pointed by ctxt->input came to an end
2258 * pop it and return the next char.
2260 * Returns the current xmlChar in the parser context
2262 xmlChar
2263 xmlPopInput(xmlParserCtxtPtr ctxt) {
2264 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2265 if (xmlParserDebugEntities)
2266 xmlGenericError(xmlGenericErrorContext,
2267 "Popping input %d\n", ctxt->inputNr);
2268 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2269 (ctxt->instate != XML_PARSER_EOF))
2270 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2271 "Unfinished entity outside the DTD");
2272 xmlFreeInputStream(inputPop(ctxt));
2273 if (*ctxt->input->cur == 0)
2274 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2275 return(CUR);
2279 * xmlPushInput:
2280 * @ctxt: an XML parser context
2281 * @input: an XML parser input fragment (entity, XML fragment ...).
2283 * xmlPushInput: switch to a new input stream which is stacked on top
2284 * of the previous one(s).
2285 * Returns -1 in case of error or the index in the input stack
2288 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2289 int ret;
2290 if (input == NULL) return(-1);
2292 if (xmlParserDebugEntities) {
2293 if ((ctxt->input != NULL) && (ctxt->input->filename))
2294 xmlGenericError(xmlGenericErrorContext,
2295 "%s(%d): ", ctxt->input->filename,
2296 ctxt->input->line);
2297 xmlGenericError(xmlGenericErrorContext,
2298 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2300 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2301 (ctxt->inputNr > 1024)) {
2302 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2303 while (ctxt->inputNr > 1)
2304 xmlFreeInputStream(inputPop(ctxt));
2305 return(-1);
2307 ret = inputPush(ctxt, input);
2308 if (ctxt->instate == XML_PARSER_EOF)
2309 return(-1);
2310 GROW;
2311 return(ret);
2315 * xmlParseCharRef:
2316 * @ctxt: an XML parser context
2318 * parse Reference declarations
2320 * [66] CharRef ::= '&#' [0-9]+ ';' |
2321 * '&#x' [0-9a-fA-F]+ ';'
2323 * [ WFC: Legal Character ]
2324 * Characters referred to using character references must match the
2325 * production for Char.
2327 * Returns the value parsed (as an int), 0 in case of error
2330 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2331 int val = 0;
2332 int count = 0;
2335 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2337 if ((RAW == '&') && (NXT(1) == '#') &&
2338 (NXT(2) == 'x')) {
2339 SKIP(3);
2340 GROW;
2341 while (RAW != ';') { /* loop blocked by count */
2342 if (count++ > 20) {
2343 count = 0;
2344 GROW;
2345 if (ctxt->instate == XML_PARSER_EOF)
2346 return(0);
2348 if ((RAW >= '0') && (RAW <= '9'))
2349 val = val * 16 + (CUR - '0');
2350 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2351 val = val * 16 + (CUR - 'a') + 10;
2352 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2353 val = val * 16 + (CUR - 'A') + 10;
2354 else {
2355 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2356 val = 0;
2357 break;
2359 if (val > 0x110000)
2360 val = 0x110000;
2362 NEXT;
2363 count++;
2365 if (RAW == ';') {
2366 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2367 ctxt->input->col++;
2368 ctxt->input->cur++;
2370 } else if ((RAW == '&') && (NXT(1) == '#')) {
2371 SKIP(2);
2372 GROW;
2373 while (RAW != ';') { /* loop blocked by count */
2374 if (count++ > 20) {
2375 count = 0;
2376 GROW;
2377 if (ctxt->instate == XML_PARSER_EOF)
2378 return(0);
2380 if ((RAW >= '0') && (RAW <= '9'))
2381 val = val * 10 + (CUR - '0');
2382 else {
2383 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2384 val = 0;
2385 break;
2387 if (val > 0x110000)
2388 val = 0x110000;
2390 NEXT;
2391 count++;
2393 if (RAW == ';') {
2394 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2395 ctxt->input->col++;
2396 ctxt->input->cur++;
2398 } else {
2399 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2403 * [ WFC: Legal Character ]
2404 * Characters referred to using character references must match the
2405 * production for Char.
2407 if (val >= 0x110000) {
2408 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2409 "xmlParseCharRef: character reference out of bounds\n",
2410 val);
2411 } else if (IS_CHAR(val)) {
2412 return(val);
2413 } else {
2414 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2415 "xmlParseCharRef: invalid xmlChar value %d\n",
2416 val);
2418 return(0);
2422 * xmlParseStringCharRef:
2423 * @ctxt: an XML parser context
2424 * @str: a pointer to an index in the string
2426 * parse Reference declarations, variant parsing from a string rather
2427 * than an an input flow.
2429 * [66] CharRef ::= '&#' [0-9]+ ';' |
2430 * '&#x' [0-9a-fA-F]+ ';'
2432 * [ WFC: Legal Character ]
2433 * Characters referred to using character references must match the
2434 * production for Char.
2436 * Returns the value parsed (as an int), 0 in case of error, str will be
2437 * updated to the current value of the index
2439 static int
2440 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2441 const xmlChar *ptr;
2442 xmlChar cur;
2443 int val = 0;
2445 if ((str == NULL) || (*str == NULL)) return(0);
2446 ptr = *str;
2447 cur = *ptr;
2448 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2449 ptr += 3;
2450 cur = *ptr;
2451 while (cur != ';') { /* Non input consuming loop */
2452 if ((cur >= '0') && (cur <= '9'))
2453 val = val * 16 + (cur - '0');
2454 else if ((cur >= 'a') && (cur <= 'f'))
2455 val = val * 16 + (cur - 'a') + 10;
2456 else if ((cur >= 'A') && (cur <= 'F'))
2457 val = val * 16 + (cur - 'A') + 10;
2458 else {
2459 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2460 val = 0;
2461 break;
2463 if (val > 0x110000)
2464 val = 0x110000;
2466 ptr++;
2467 cur = *ptr;
2469 if (cur == ';')
2470 ptr++;
2471 } else if ((cur == '&') && (ptr[1] == '#')){
2472 ptr += 2;
2473 cur = *ptr;
2474 while (cur != ';') { /* Non input consuming loops */
2475 if ((cur >= '0') && (cur <= '9'))
2476 val = val * 10 + (cur - '0');
2477 else {
2478 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2479 val = 0;
2480 break;
2482 if (val > 0x110000)
2483 val = 0x110000;
2485 ptr++;
2486 cur = *ptr;
2488 if (cur == ';')
2489 ptr++;
2490 } else {
2491 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2492 return(0);
2494 *str = ptr;
2497 * [ WFC: Legal Character ]
2498 * Characters referred to using character references must match the
2499 * production for Char.
2501 if (val >= 0x110000) {
2502 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2503 "xmlParseStringCharRef: character reference out of bounds\n",
2504 val);
2505 } else if (IS_CHAR(val)) {
2506 return(val);
2507 } else {
2508 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2509 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2510 val);
2512 return(0);
2516 * xmlParserHandlePEReference:
2517 * @ctxt: the parser context
2519 * [69] PEReference ::= '%' Name ';'
2521 * [ WFC: No Recursion ]
2522 * A parsed entity must not contain a recursive
2523 * reference to itself, either directly or indirectly.
2525 * [ WFC: Entity Declared ]
2526 * In a document without any DTD, a document with only an internal DTD
2527 * subset which contains no parameter entity references, or a document
2528 * with "standalone='yes'", ... ... The declaration of a parameter
2529 * entity must precede any reference to it...
2531 * [ VC: Entity Declared ]
2532 * In a document with an external subset or external parameter entities
2533 * with "standalone='no'", ... ... The declaration of a parameter entity
2534 * must precede any reference to it...
2536 * [ WFC: In DTD ]
2537 * Parameter-entity references may only appear in the DTD.
2538 * NOTE: misleading but this is handled.
2540 * A PEReference may have been detected in the current input stream
2541 * the handling is done accordingly to
2542 * http://www.w3.org/TR/REC-xml#entproc
2543 * i.e.
2544 * - Included in literal in entity values
2545 * - Included as Parameter Entity reference within DTDs
2547 void
2548 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2549 switch(ctxt->instate) {
2550 case XML_PARSER_CDATA_SECTION:
2551 return;
2552 case XML_PARSER_COMMENT:
2553 return;
2554 case XML_PARSER_START_TAG:
2555 return;
2556 case XML_PARSER_END_TAG:
2557 return;
2558 case XML_PARSER_EOF:
2559 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2560 return;
2561 case XML_PARSER_PROLOG:
2562 case XML_PARSER_START:
2563 case XML_PARSER_MISC:
2564 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2565 return;
2566 case XML_PARSER_ENTITY_DECL:
2567 case XML_PARSER_CONTENT:
2568 case XML_PARSER_ATTRIBUTE_VALUE:
2569 case XML_PARSER_PI:
2570 case XML_PARSER_SYSTEM_LITERAL:
2571 case XML_PARSER_PUBLIC_LITERAL:
2572 /* we just ignore it there */
2573 return;
2574 case XML_PARSER_EPILOG:
2575 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2576 return;
2577 case XML_PARSER_ENTITY_VALUE:
2579 * NOTE: in the case of entity values, we don't do the
2580 * substitution here since we need the literal
2581 * entity value to be able to save the internal
2582 * subset of the document.
2583 * This will be handled by xmlStringDecodeEntities
2585 return;
2586 case XML_PARSER_DTD:
2588 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2589 * In the internal DTD subset, parameter-entity references
2590 * can occur only where markup declarations can occur, not
2591 * within markup declarations.
2592 * In that case this is handled in xmlParseMarkupDecl
2594 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2595 return;
2596 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2597 return;
2598 break;
2599 case XML_PARSER_IGNORE:
2600 return;
2603 xmlParsePEReference(ctxt);
2607 * Macro used to grow the current buffer.
2608 * buffer##_size is expected to be a size_t
2609 * mem_error: is expected to handle memory allocation failures
2611 #define growBuffer(buffer, n) { \
2612 xmlChar *tmp; \
2613 size_t new_size = buffer##_size * 2 + n; \
2614 if (new_size < buffer##_size) goto mem_error; \
2615 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2616 if (tmp == NULL) goto mem_error; \
2617 buffer = tmp; \
2618 buffer##_size = new_size; \
2622 * xmlStringLenDecodeEntities:
2623 * @ctxt: the parser context
2624 * @str: the input string
2625 * @len: the string length
2626 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2627 * @end: an end marker xmlChar, 0 if none
2628 * @end2: an end marker xmlChar, 0 if none
2629 * @end3: an end marker xmlChar, 0 if none
2631 * Takes a entity string content and process to do the adequate substitutions.
2633 * [67] Reference ::= EntityRef | CharRef
2635 * [69] PEReference ::= '%' Name ';'
2637 * Returns A newly allocated string with the substitution done. The caller
2638 * must deallocate it !
2640 xmlChar *
2641 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2642 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2643 xmlChar *buffer = NULL;
2644 size_t buffer_size = 0;
2645 size_t nbchars = 0;
2647 xmlChar *current = NULL;
2648 xmlChar *rep = NULL;
2649 const xmlChar *last;
2650 xmlEntityPtr ent;
2651 int c,l;
2653 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2654 return(NULL);
2655 last = str + len;
2657 if (((ctxt->depth > 40) &&
2658 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2659 (ctxt->depth > 1024)) {
2660 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2661 return(NULL);
2665 * allocate a translation buffer.
2667 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2668 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2669 if (buffer == NULL) goto mem_error;
2672 * OK loop until we reach one of the ending char or a size limit.
2673 * we are operating on already parsed values.
2675 if (str < last)
2676 c = CUR_SCHAR(str, l);
2677 else
2678 c = 0;
2679 while ((c != 0) && (c != end) && /* non input consuming loop */
2680 (c != end2) && (c != end3) &&
2681 (ctxt->instate != XML_PARSER_EOF)) {
2683 if (c == 0) break;
2684 if ((c == '&') && (str[1] == '#')) {
2685 int val = xmlParseStringCharRef(ctxt, &str);
2686 if (val == 0)
2687 goto int_error;
2688 COPY_BUF(0,buffer,nbchars,val);
2689 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2690 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2692 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2693 if (xmlParserDebugEntities)
2694 xmlGenericError(xmlGenericErrorContext,
2695 "String decoding Entity Reference: %.30s\n",
2696 str);
2697 ent = xmlParseStringEntityRef(ctxt, &str);
2698 xmlParserEntityCheck(ctxt, 0, ent, 0);
2699 if (ent != NULL)
2700 ctxt->nbentities += ent->checked / 2;
2701 if ((ent != NULL) &&
2702 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2703 if (ent->content != NULL) {
2704 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2705 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2706 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2708 } else {
2709 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2710 "predefined entity has no content\n");
2711 goto int_error;
2713 } else if ((ent != NULL) && (ent->content != NULL)) {
2714 ctxt->depth++;
2715 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2716 0, 0, 0);
2717 ctxt->depth--;
2718 if (rep == NULL) {
2719 ent->content[0] = 0;
2720 goto int_error;
2723 current = rep;
2724 while (*current != 0) { /* non input consuming loop */
2725 buffer[nbchars++] = *current++;
2726 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2727 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2728 goto int_error;
2729 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2732 xmlFree(rep);
2733 rep = NULL;
2734 } else if (ent != NULL) {
2735 int i = xmlStrlen(ent->name);
2736 const xmlChar *cur = ent->name;
2738 buffer[nbchars++] = '&';
2739 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2742 for (;i > 0;i--)
2743 buffer[nbchars++] = *cur++;
2744 buffer[nbchars++] = ';';
2746 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2747 if (xmlParserDebugEntities)
2748 xmlGenericError(xmlGenericErrorContext,
2749 "String decoding PE Reference: %.30s\n", str);
2750 ent = xmlParseStringPEReference(ctxt, &str);
2751 xmlParserEntityCheck(ctxt, 0, ent, 0);
2752 if (ent != NULL)
2753 ctxt->nbentities += ent->checked / 2;
2754 if (ent != NULL) {
2755 if (ent->content == NULL) {
2757 * Note: external parsed entities will not be loaded,
2758 * it is not required for a non-validating parser to
2759 * complete external PEReferences coming from the
2760 * internal subset
2762 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2763 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2764 (ctxt->validate != 0)) {
2765 xmlLoadEntityContent(ctxt, ent);
2766 } else {
2767 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2768 "not validating will not read content for PE entity %s\n",
2769 ent->name, NULL);
2772 ctxt->depth++;
2773 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2774 0, 0, 0);
2775 ctxt->depth--;
2776 if (rep == NULL) {
2777 if (ent->content != NULL)
2778 ent->content[0] = 0;
2779 goto int_error;
2781 current = rep;
2782 while (*current != 0) { /* non input consuming loop */
2783 buffer[nbchars++] = *current++;
2784 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2785 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2786 goto int_error;
2787 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2790 xmlFree(rep);
2791 rep = NULL;
2793 } else {
2794 COPY_BUF(l,buffer,nbchars,c);
2795 str += l;
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2800 if (str < last)
2801 c = CUR_SCHAR(str, l);
2802 else
2803 c = 0;
2805 buffer[nbchars] = 0;
2806 return(buffer);
2808 mem_error:
2809 xmlErrMemory(ctxt, NULL);
2810 int_error:
2811 if (rep != NULL)
2812 xmlFree(rep);
2813 if (buffer != NULL)
2814 xmlFree(buffer);
2815 return(NULL);
2819 * xmlStringDecodeEntities:
2820 * @ctxt: the parser context
2821 * @str: the input string
2822 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2823 * @end: an end marker xmlChar, 0 if none
2824 * @end2: an end marker xmlChar, 0 if none
2825 * @end3: an end marker xmlChar, 0 if none
2827 * Takes a entity string content and process to do the adequate substitutions.
2829 * [67] Reference ::= EntityRef | CharRef
2831 * [69] PEReference ::= '%' Name ';'
2833 * Returns A newly allocated string with the substitution done. The caller
2834 * must deallocate it !
2836 xmlChar *
2837 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2838 xmlChar end, xmlChar end2, xmlChar end3) {
2839 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2840 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2841 end, end2, end3));
2844 /************************************************************************
2846 * Commodity functions, cleanup needed ? *
2848 ************************************************************************/
2851 * areBlanks:
2852 * @ctxt: an XML parser context
2853 * @str: a xmlChar *
2854 * @len: the size of @str
2855 * @blank_chars: we know the chars are blanks
2857 * Is this a sequence of blank chars that one can ignore ?
2859 * Returns 1 if ignorable 0 otherwise.
2862 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2863 int blank_chars) {
2864 int i, ret;
2865 xmlNodePtr lastChild;
2868 * Don't spend time trying to differentiate them, the same callback is
2869 * used !
2871 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2872 return(0);
2875 * Check for xml:space value.
2877 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2878 (*(ctxt->space) == -2))
2879 return(0);
2882 * Check that the string is made of blanks
2884 if (blank_chars == 0) {
2885 for (i = 0;i < len;i++)
2886 if (!(IS_BLANK_CH(str[i]))) return(0);
2890 * Look if the element is mixed content in the DTD if available
2892 if (ctxt->node == NULL) return(0);
2893 if (ctxt->myDoc != NULL) {
2894 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2895 if (ret == 0) return(1);
2896 if (ret == 1) return(0);
2900 * Otherwise, heuristic :-\
2902 if ((RAW != '<') && (RAW != 0xD)) return(0);
2903 if ((ctxt->node->children == NULL) &&
2904 (RAW == '<') && (NXT(1) == '/')) return(0);
2906 lastChild = xmlGetLastChild(ctxt->node);
2907 if (lastChild == NULL) {
2908 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2909 (ctxt->node->content != NULL)) return(0);
2910 } else if (xmlNodeIsText(lastChild))
2911 return(0);
2912 else if ((ctxt->node->children != NULL) &&
2913 (xmlNodeIsText(ctxt->node->children)))
2914 return(0);
2915 return(1);
2918 /************************************************************************
2920 * Extra stuff for namespace support *
2921 * Relates to http://www.w3.org/TR/WD-xml-names *
2923 ************************************************************************/
2926 * xmlSplitQName:
2927 * @ctxt: an XML parser context
2928 * @name: an XML parser context
2929 * @prefix: a xmlChar **
2931 * parse an UTF8 encoded XML qualified name string
2933 * [NS 5] QName ::= (Prefix ':')? LocalPart
2935 * [NS 6] Prefix ::= NCName
2937 * [NS 7] LocalPart ::= NCName
2939 * Returns the local part, and prefix is updated
2940 * to get the Prefix if any.
2943 xmlChar *
2944 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2945 xmlChar buf[XML_MAX_NAMELEN + 5];
2946 xmlChar *buffer = NULL;
2947 int len = 0;
2948 int max = XML_MAX_NAMELEN;
2949 xmlChar *ret = NULL;
2950 const xmlChar *cur = name;
2951 int c;
2953 if (prefix == NULL) return(NULL);
2954 *prefix = NULL;
2956 if (cur == NULL) return(NULL);
2958 #ifndef XML_XML_NAMESPACE
2959 /* xml: prefix is not really a namespace */
2960 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2961 (cur[2] == 'l') && (cur[3] == ':'))
2962 return(xmlStrdup(name));
2963 #endif
2965 /* nasty but well=formed */
2966 if (cur[0] == ':')
2967 return(xmlStrdup(name));
2969 c = *cur++;
2970 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2971 buf[len++] = c;
2972 c = *cur++;
2974 if (len >= max) {
2976 * Okay someone managed to make a huge name, so he's ready to pay
2977 * for the processing speed.
2979 max = len * 2;
2981 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2982 if (buffer == NULL) {
2983 xmlErrMemory(ctxt, NULL);
2984 return(NULL);
2986 memcpy(buffer, buf, len);
2987 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2988 if (len + 10 > max) {
2989 xmlChar *tmp;
2991 max *= 2;
2992 tmp = (xmlChar *) xmlRealloc(buffer,
2993 max * sizeof(xmlChar));
2994 if (tmp == NULL) {
2995 xmlFree(buffer);
2996 xmlErrMemory(ctxt, NULL);
2997 return(NULL);
2999 buffer = tmp;
3001 buffer[len++] = c;
3002 c = *cur++;
3004 buffer[len] = 0;
3007 if ((c == ':') && (*cur == 0)) {
3008 if (buffer != NULL)
3009 xmlFree(buffer);
3010 *prefix = NULL;
3011 return(xmlStrdup(name));
3014 if (buffer == NULL)
3015 ret = xmlStrndup(buf, len);
3016 else {
3017 ret = buffer;
3018 buffer = NULL;
3019 max = XML_MAX_NAMELEN;
3023 if (c == ':') {
3024 c = *cur;
3025 *prefix = ret;
3026 if (c == 0) {
3027 return(xmlStrndup(BAD_CAST "", 0));
3029 len = 0;
3032 * Check that the first character is proper to start
3033 * a new name
3035 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3036 ((c >= 0x41) && (c <= 0x5A)) ||
3037 (c == '_') || (c == ':'))) {
3038 int l;
3039 int first = CUR_SCHAR(cur, l);
3041 if (!IS_LETTER(first) && (first != '_')) {
3042 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3043 "Name %s is not XML Namespace compliant\n",
3044 name);
3047 cur++;
3049 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3050 buf[len++] = c;
3051 c = *cur++;
3053 if (len >= max) {
3055 * Okay someone managed to make a huge name, so he's ready to pay
3056 * for the processing speed.
3058 max = len * 2;
3060 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3061 if (buffer == NULL) {
3062 xmlErrMemory(ctxt, NULL);
3063 return(NULL);
3065 memcpy(buffer, buf, len);
3066 while (c != 0) { /* tested bigname2.xml */
3067 if (len + 10 > max) {
3068 xmlChar *tmp;
3070 max *= 2;
3071 tmp = (xmlChar *) xmlRealloc(buffer,
3072 max * sizeof(xmlChar));
3073 if (tmp == NULL) {
3074 xmlErrMemory(ctxt, NULL);
3075 xmlFree(buffer);
3076 return(NULL);
3078 buffer = tmp;
3080 buffer[len++] = c;
3081 c = *cur++;
3083 buffer[len] = 0;
3086 if (buffer == NULL)
3087 ret = xmlStrndup(buf, len);
3088 else {
3089 ret = buffer;
3093 return(ret);
3096 /************************************************************************
3098 * The parser itself *
3099 * Relates to http://www.w3.org/TR/REC-xml *
3101 ************************************************************************/
3103 /************************************************************************
3105 * Routines to parse Name, NCName and NmToken *
3107 ************************************************************************/
3108 #ifdef DEBUG
3109 static unsigned long nbParseName = 0;
3110 static unsigned long nbParseNmToken = 0;
3111 static unsigned long nbParseNCName = 0;
3112 static unsigned long nbParseNCNameComplex = 0;
3113 static unsigned long nbParseNameComplex = 0;
3114 static unsigned long nbParseStringName = 0;
3115 #endif
3118 * The two following functions are related to the change of accepted
3119 * characters for Name and NmToken in the Revision 5 of XML-1.0
3120 * They correspond to the modified production [4] and the new production [4a]
3121 * changes in that revision. Also note that the macros used for the
3122 * productions Letter, Digit, CombiningChar and Extender are not needed
3123 * anymore.
3124 * We still keep compatibility to pre-revision5 parsing semantic if the
3125 * new XML_PARSE_OLD10 option is given to the parser.
3127 static int
3128 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3129 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3131 * Use the new checks of production [4] [4a] amd [5] of the
3132 * Update 5 of XML-1.0
3134 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3135 (((c >= 'a') && (c <= 'z')) ||
3136 ((c >= 'A') && (c <= 'Z')) ||
3137 (c == '_') || (c == ':') ||
3138 ((c >= 0xC0) && (c <= 0xD6)) ||
3139 ((c >= 0xD8) && (c <= 0xF6)) ||
3140 ((c >= 0xF8) && (c <= 0x2FF)) ||
3141 ((c >= 0x370) && (c <= 0x37D)) ||
3142 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3143 ((c >= 0x200C) && (c <= 0x200D)) ||
3144 ((c >= 0x2070) && (c <= 0x218F)) ||
3145 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3146 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3147 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3148 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3149 ((c >= 0x10000) && (c <= 0xEFFFF))))
3150 return(1);
3151 } else {
3152 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3153 return(1);
3155 return(0);
3158 static int
3159 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3160 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162 * Use the new checks of production [4] [4a] amd [5] of the
3163 * Update 5 of XML-1.0
3165 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3166 (((c >= 'a') && (c <= 'z')) ||
3167 ((c >= 'A') && (c <= 'Z')) ||
3168 ((c >= '0') && (c <= '9')) || /* !start */
3169 (c == '_') || (c == ':') ||
3170 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3171 ((c >= 0xC0) && (c <= 0xD6)) ||
3172 ((c >= 0xD8) && (c <= 0xF6)) ||
3173 ((c >= 0xF8) && (c <= 0x2FF)) ||
3174 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3175 ((c >= 0x370) && (c <= 0x37D)) ||
3176 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3177 ((c >= 0x200C) && (c <= 0x200D)) ||
3178 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3179 ((c >= 0x2070) && (c <= 0x218F)) ||
3180 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3181 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3182 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3183 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3184 ((c >= 0x10000) && (c <= 0xEFFFF))))
3185 return(1);
3186 } else {
3187 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3188 (c == '.') || (c == '-') ||
3189 (c == '_') || (c == ':') ||
3190 (IS_COMBINING(c)) ||
3191 (IS_EXTENDER(c)))
3192 return(1);
3194 return(0);
3197 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3198 int *len, int *alloc, int normalize);
3200 static const xmlChar *
3201 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3202 int len = 0, l;
3203 int c;
3204 int count = 0;
3206 #ifdef DEBUG
3207 nbParseNameComplex++;
3208 #endif
3211 * Handler for more complex cases
3213 GROW;
3214 if (ctxt->instate == XML_PARSER_EOF)
3215 return(NULL);
3216 c = CUR_CHAR(l);
3217 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3219 * Use the new checks of production [4] [4a] amd [5] of the
3220 * Update 5 of XML-1.0
3222 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3223 (!(((c >= 'a') && (c <= 'z')) ||
3224 ((c >= 'A') && (c <= 'Z')) ||
3225 (c == '_') || (c == ':') ||
3226 ((c >= 0xC0) && (c <= 0xD6)) ||
3227 ((c >= 0xD8) && (c <= 0xF6)) ||
3228 ((c >= 0xF8) && (c <= 0x2FF)) ||
3229 ((c >= 0x370) && (c <= 0x37D)) ||
3230 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3231 ((c >= 0x200C) && (c <= 0x200D)) ||
3232 ((c >= 0x2070) && (c <= 0x218F)) ||
3233 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3234 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3235 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3236 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3237 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3238 return(NULL);
3240 len += l;
3241 NEXTL(l);
3242 c = CUR_CHAR(l);
3243 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3244 (((c >= 'a') && (c <= 'z')) ||
3245 ((c >= 'A') && (c <= 'Z')) ||
3246 ((c >= '0') && (c <= '9')) || /* !start */
3247 (c == '_') || (c == ':') ||
3248 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3249 ((c >= 0xC0) && (c <= 0xD6)) ||
3250 ((c >= 0xD8) && (c <= 0xF6)) ||
3251 ((c >= 0xF8) && (c <= 0x2FF)) ||
3252 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3253 ((c >= 0x370) && (c <= 0x37D)) ||
3254 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3255 ((c >= 0x200C) && (c <= 0x200D)) ||
3256 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3257 ((c >= 0x2070) && (c <= 0x218F)) ||
3258 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262 ((c >= 0x10000) && (c <= 0xEFFFF))
3263 )) {
3264 if (count++ > XML_PARSER_CHUNK_SIZE) {
3265 count = 0;
3266 GROW;
3267 if (ctxt->instate == XML_PARSER_EOF)
3268 return(NULL);
3270 len += l;
3271 NEXTL(l);
3272 c = CUR_CHAR(l);
3274 } else {
3275 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3276 (!IS_LETTER(c) && (c != '_') &&
3277 (c != ':'))) {
3278 return(NULL);
3280 len += l;
3281 NEXTL(l);
3282 c = CUR_CHAR(l);
3284 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3285 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3286 (c == '.') || (c == '-') ||
3287 (c == '_') || (c == ':') ||
3288 (IS_COMBINING(c)) ||
3289 (IS_EXTENDER(c)))) {
3290 if (count++ > XML_PARSER_CHUNK_SIZE) {
3291 count = 0;
3292 GROW;
3293 if (ctxt->instate == XML_PARSER_EOF)
3294 return(NULL);
3296 len += l;
3297 NEXTL(l);
3298 c = CUR_CHAR(l);
3301 if ((len > XML_MAX_NAME_LENGTH) &&
3302 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3303 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3304 return(NULL);
3306 if (ctxt->input->cur - ctxt->input->base < len) {
3308 * There were a couple of bugs where PERefs lead to to a change
3309 * of the buffer. Check the buffer size to avoid passing an invalid
3310 * pointer to xmlDictLookup.
3312 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3313 "unexpected change of input buffer");
3314 return (NULL);
3316 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3317 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3318 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3322 * xmlParseName:
3323 * @ctxt: an XML parser context
3325 * parse an XML name.
3327 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3328 * CombiningChar | Extender
3330 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3332 * [6] Names ::= Name (#x20 Name)*
3334 * Returns the Name parsed or NULL
3337 const xmlChar *
3338 xmlParseName(xmlParserCtxtPtr ctxt) {
3339 const xmlChar *in;
3340 const xmlChar *ret;
3341 int count = 0;
3343 GROW;
3345 #ifdef DEBUG
3346 nbParseName++;
3347 #endif
3350 * Accelerator for simple ASCII names
3352 in = ctxt->input->cur;
3353 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3354 ((*in >= 0x41) && (*in <= 0x5A)) ||
3355 (*in == '_') || (*in == ':')) {
3356 in++;
3357 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3358 ((*in >= 0x41) && (*in <= 0x5A)) ||
3359 ((*in >= 0x30) && (*in <= 0x39)) ||
3360 (*in == '_') || (*in == '-') ||
3361 (*in == ':') || (*in == '.'))
3362 in++;
3363 if ((*in > 0) && (*in < 0x80)) {
3364 count = in - ctxt->input->cur;
3365 if ((count > XML_MAX_NAME_LENGTH) &&
3366 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3367 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3368 return(NULL);
3370 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3371 ctxt->input->cur = in;
3372 ctxt->input->col += count;
3373 if (ret == NULL)
3374 xmlErrMemory(ctxt, NULL);
3375 return(ret);
3378 /* accelerator for special cases */
3379 return(xmlParseNameComplex(ctxt));
3382 static const xmlChar *
3383 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3384 int len = 0, l;
3385 int c;
3386 int count = 0;
3387 size_t startPosition = 0;
3389 #ifdef DEBUG
3390 nbParseNCNameComplex++;
3391 #endif
3394 * Handler for more complex cases
3396 GROW;
3397 startPosition = CUR_PTR - BASE_PTR;
3398 c = CUR_CHAR(l);
3399 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3400 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3401 return(NULL);
3404 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3405 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3406 if (count++ > XML_PARSER_CHUNK_SIZE) {
3407 if ((len > XML_MAX_NAME_LENGTH) &&
3408 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3409 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3410 return(NULL);
3412 count = 0;
3413 GROW;
3414 if (ctxt->instate == XML_PARSER_EOF)
3415 return(NULL);
3417 len += l;
3418 NEXTL(l);
3419 c = CUR_CHAR(l);
3420 if (c == 0) {
3421 count = 0;
3423 * when shrinking to extend the buffer we really need to preserve
3424 * the part of the name we already parsed. Hence rolling back
3425 * by current length.
3427 ctxt->input->cur -= l;
3428 GROW;
3429 if (ctxt->instate == XML_PARSER_EOF)
3430 return(NULL);
3431 ctxt->input->cur += l;
3432 c = CUR_CHAR(l);
3435 if ((len > XML_MAX_NAME_LENGTH) &&
3436 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3437 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3438 return(NULL);
3440 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3444 * xmlParseNCName:
3445 * @ctxt: an XML parser context
3446 * @len: length of the string parsed
3448 * parse an XML name.
3450 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3451 * CombiningChar | Extender
3453 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3455 * Returns the Name parsed or NULL
3458 static const xmlChar *
3459 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3460 const xmlChar *in, *e;
3461 const xmlChar *ret;
3462 int count = 0;
3464 #ifdef DEBUG
3465 nbParseNCName++;
3466 #endif
3469 * Accelerator for simple ASCII names
3471 in = ctxt->input->cur;
3472 e = ctxt->input->end;
3473 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3474 ((*in >= 0x41) && (*in <= 0x5A)) ||
3475 (*in == '_')) && (in < e)) {
3476 in++;
3477 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3478 ((*in >= 0x41) && (*in <= 0x5A)) ||
3479 ((*in >= 0x30) && (*in <= 0x39)) ||
3480 (*in == '_') || (*in == '-') ||
3481 (*in == '.')) && (in < e))
3482 in++;
3483 if (in >= e)
3484 goto complex;
3485 if ((*in > 0) && (*in < 0x80)) {
3486 count = in - ctxt->input->cur;
3487 if ((count > XML_MAX_NAME_LENGTH) &&
3488 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3489 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3490 return(NULL);
3492 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3493 ctxt->input->cur = in;
3494 ctxt->input->col += count;
3495 if (ret == NULL) {
3496 xmlErrMemory(ctxt, NULL);
3498 return(ret);
3501 complex:
3502 return(xmlParseNCNameComplex(ctxt));
3506 * xmlParseNameAndCompare:
3507 * @ctxt: an XML parser context
3509 * parse an XML name and compares for match
3510 * (specialized for endtag parsing)
3512 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3513 * and the name for mismatch
3516 static const xmlChar *
3517 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3518 register const xmlChar *cmp = other;
3519 register const xmlChar *in;
3520 const xmlChar *ret;
3522 GROW;
3523 if (ctxt->instate == XML_PARSER_EOF)
3524 return(NULL);
3526 in = ctxt->input->cur;
3527 while (*in != 0 && *in == *cmp) {
3528 ++in;
3529 ++cmp;
3531 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3532 /* success */
3533 ctxt->input->col += in - ctxt->input->cur;
3534 ctxt->input->cur = in;
3535 return (const xmlChar*) 1;
3537 /* failure (or end of input buffer), check with full function */
3538 ret = xmlParseName (ctxt);
3539 /* strings coming from the dictionary direct compare possible */
3540 if (ret == other) {
3541 return (const xmlChar*) 1;
3543 return ret;
3547 * xmlParseStringName:
3548 * @ctxt: an XML parser context
3549 * @str: a pointer to the string pointer (IN/OUT)
3551 * parse an XML name.
3553 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3554 * CombiningChar | Extender
3556 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3558 * [6] Names ::= Name (#x20 Name)*
3560 * Returns the Name parsed or NULL. The @str pointer
3561 * is updated to the current location in the string.
3564 static xmlChar *
3565 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3566 xmlChar buf[XML_MAX_NAMELEN + 5];
3567 const xmlChar *cur = *str;
3568 int len = 0, l;
3569 int c;
3571 #ifdef DEBUG
3572 nbParseStringName++;
3573 #endif
3575 c = CUR_SCHAR(cur, l);
3576 if (!xmlIsNameStartChar(ctxt, c)) {
3577 return(NULL);
3580 COPY_BUF(l,buf,len,c);
3581 cur += l;
3582 c = CUR_SCHAR(cur, l);
3583 while (xmlIsNameChar(ctxt, c)) {
3584 COPY_BUF(l,buf,len,c);
3585 cur += l;
3586 c = CUR_SCHAR(cur, l);
3587 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3589 * Okay someone managed to make a huge name, so he's ready to pay
3590 * for the processing speed.
3592 xmlChar *buffer;
3593 int max = len * 2;
3595 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3596 if (buffer == NULL) {
3597 xmlErrMemory(ctxt, NULL);
3598 return(NULL);
3600 memcpy(buffer, buf, len);
3601 while (xmlIsNameChar(ctxt, c)) {
3602 if (len + 10 > max) {
3603 xmlChar *tmp;
3605 if ((len > XML_MAX_NAME_LENGTH) &&
3606 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3607 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3608 xmlFree(buffer);
3609 return(NULL);
3611 max *= 2;
3612 tmp = (xmlChar *) xmlRealloc(buffer,
3613 max * sizeof(xmlChar));
3614 if (tmp == NULL) {
3615 xmlErrMemory(ctxt, NULL);
3616 xmlFree(buffer);
3617 return(NULL);
3619 buffer = tmp;
3621 COPY_BUF(l,buffer,len,c);
3622 cur += l;
3623 c = CUR_SCHAR(cur, l);
3625 buffer[len] = 0;
3626 *str = cur;
3627 return(buffer);
3630 if ((len > XML_MAX_NAME_LENGTH) &&
3631 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3632 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3633 return(NULL);
3635 *str = cur;
3636 return(xmlStrndup(buf, len));
3640 * xmlParseNmtoken:
3641 * @ctxt: an XML parser context
3643 * parse an XML Nmtoken.
3645 * [7] Nmtoken ::= (NameChar)+
3647 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3649 * Returns the Nmtoken parsed or NULL
3652 xmlChar *
3653 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3654 xmlChar buf[XML_MAX_NAMELEN + 5];
3655 int len = 0, l;
3656 int c;
3657 int count = 0;
3659 #ifdef DEBUG
3660 nbParseNmToken++;
3661 #endif
3663 GROW;
3664 if (ctxt->instate == XML_PARSER_EOF)
3665 return(NULL);
3666 c = CUR_CHAR(l);
3668 while (xmlIsNameChar(ctxt, c)) {
3669 if (count++ > XML_PARSER_CHUNK_SIZE) {
3670 count = 0;
3671 GROW;
3673 COPY_BUF(l,buf,len,c);
3674 NEXTL(l);
3675 c = CUR_CHAR(l);
3676 if (c == 0) {
3677 count = 0;
3678 GROW;
3679 if (ctxt->instate == XML_PARSER_EOF)
3680 return(NULL);
3681 c = CUR_CHAR(l);
3683 if (len >= XML_MAX_NAMELEN) {
3685 * Okay someone managed to make a huge token, so he's ready to pay
3686 * for the processing speed.
3688 xmlChar *buffer;
3689 int max = len * 2;
3691 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3692 if (buffer == NULL) {
3693 xmlErrMemory(ctxt, NULL);
3694 return(NULL);
3696 memcpy(buffer, buf, len);
3697 while (xmlIsNameChar(ctxt, c)) {
3698 if (count++ > XML_PARSER_CHUNK_SIZE) {
3699 count = 0;
3700 GROW;
3701 if (ctxt->instate == XML_PARSER_EOF) {
3702 xmlFree(buffer);
3703 return(NULL);
3706 if (len + 10 > max) {
3707 xmlChar *tmp;
3709 if ((max > XML_MAX_NAME_LENGTH) &&
3710 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3711 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3712 xmlFree(buffer);
3713 return(NULL);
3715 max *= 2;
3716 tmp = (xmlChar *) xmlRealloc(buffer,
3717 max * sizeof(xmlChar));
3718 if (tmp == NULL) {
3719 xmlErrMemory(ctxt, NULL);
3720 xmlFree(buffer);
3721 return(NULL);
3723 buffer = tmp;
3725 COPY_BUF(l,buffer,len,c);
3726 NEXTL(l);
3727 c = CUR_CHAR(l);
3729 buffer[len] = 0;
3730 return(buffer);
3733 if (len == 0)
3734 return(NULL);
3735 if ((len > XML_MAX_NAME_LENGTH) &&
3736 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3737 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3738 return(NULL);
3740 return(xmlStrndup(buf, len));
3744 * xmlParseEntityValue:
3745 * @ctxt: an XML parser context
3746 * @orig: if non-NULL store a copy of the original entity value
3748 * parse a value for ENTITY declarations
3750 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3751 * "'" ([^%&'] | PEReference | Reference)* "'"
3753 * Returns the EntityValue parsed with reference substituted or NULL
3756 xmlChar *
3757 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3758 xmlChar *buf = NULL;
3759 int len = 0;
3760 int size = XML_PARSER_BUFFER_SIZE;
3761 int c, l;
3762 xmlChar stop;
3763 xmlChar *ret = NULL;
3764 const xmlChar *cur = NULL;
3765 xmlParserInputPtr input;
3767 if (RAW == '"') stop = '"';
3768 else if (RAW == '\'') stop = '\'';
3769 else {
3770 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3771 return(NULL);
3773 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3774 if (buf == NULL) {
3775 xmlErrMemory(ctxt, NULL);
3776 return(NULL);
3780 * The content of the entity definition is copied in a buffer.
3783 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3784 input = ctxt->input;
3785 GROW;
3786 if (ctxt->instate == XML_PARSER_EOF)
3787 goto error;
3788 NEXT;
3789 c = CUR_CHAR(l);
3791 * NOTE: 4.4.5 Included in Literal
3792 * When a parameter entity reference appears in a literal entity
3793 * value, ... a single or double quote character in the replacement
3794 * text is always treated as a normal data character and will not
3795 * terminate the literal.
3796 * In practice it means we stop the loop only when back at parsing
3797 * the initial entity and the quote is found
3799 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3800 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3801 if (len + 5 >= size) {
3802 xmlChar *tmp;
3804 size *= 2;
3805 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3806 if (tmp == NULL) {
3807 xmlErrMemory(ctxt, NULL);
3808 goto error;
3810 buf = tmp;
3812 COPY_BUF(l,buf,len,c);
3813 NEXTL(l);
3815 GROW;
3816 c = CUR_CHAR(l);
3817 if (c == 0) {
3818 GROW;
3819 c = CUR_CHAR(l);
3822 buf[len] = 0;
3823 if (ctxt->instate == XML_PARSER_EOF)
3824 goto error;
3825 if (c != stop) {
3826 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3827 goto error;
3829 NEXT;
3832 * Raise problem w.r.t. '&' and '%' being used in non-entities
3833 * reference constructs. Note Charref will be handled in
3834 * xmlStringDecodeEntities()
3836 cur = buf;
3837 while (*cur != 0) { /* non input consuming */
3838 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3839 xmlChar *name;
3840 xmlChar tmp = *cur;
3841 int nameOk = 0;
3843 cur++;
3844 name = xmlParseStringName(ctxt, &cur);
3845 if (name != NULL) {
3846 nameOk = 1;
3847 xmlFree(name);
3849 if ((nameOk == 0) || (*cur != ';')) {
3850 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3851 "EntityValue: '%c' forbidden except for entities references\n",
3852 tmp);
3853 goto error;
3855 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3856 (ctxt->inputNr == 1)) {
3857 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3858 goto error;
3860 if (*cur == 0)
3861 break;
3863 cur++;
3867 * Then PEReference entities are substituted.
3869 * NOTE: 4.4.7 Bypassed
3870 * When a general entity reference appears in the EntityValue in
3871 * an entity declaration, it is bypassed and left as is.
3872 * so XML_SUBSTITUTE_REF is not set here.
3874 ++ctxt->depth;
3875 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3876 0, 0, 0);
3877 --ctxt->depth;
3878 if (orig != NULL) {
3879 *orig = buf;
3880 buf = NULL;
3883 error:
3884 if (buf != NULL)
3885 xmlFree(buf);
3886 return(ret);
3890 * xmlParseAttValueComplex:
3891 * @ctxt: an XML parser context
3892 * @len: the resulting attribute len
3893 * @normalize: whether to apply the inner normalization
3895 * parse a value for an attribute, this is the fallback function
3896 * of xmlParseAttValue() when the attribute parsing requires handling
3897 * of non-ASCII characters, or normalization compaction.
3899 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3901 static xmlChar *
3902 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3903 xmlChar limit = 0;
3904 xmlChar *buf = NULL;
3905 xmlChar *rep = NULL;
3906 size_t len = 0;
3907 size_t buf_size = 0;
3908 int c, l, in_space = 0;
3909 xmlChar *current = NULL;
3910 xmlEntityPtr ent;
3912 if (NXT(0) == '"') {
3913 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3914 limit = '"';
3915 NEXT;
3916 } else if (NXT(0) == '\'') {
3917 limit = '\'';
3918 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3919 NEXT;
3920 } else {
3921 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3922 return(NULL);
3926 * allocate a translation buffer.
3928 buf_size = XML_PARSER_BUFFER_SIZE;
3929 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3930 if (buf == NULL) goto mem_error;
3933 * OK loop until we reach one of the ending char or a size limit.
3935 c = CUR_CHAR(l);
3936 while (((NXT(0) != limit) && /* checked */
3937 (IS_CHAR(c)) && (c != '<')) &&
3938 (ctxt->instate != XML_PARSER_EOF)) {
3940 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3941 * special option is given
3943 if ((len > XML_MAX_TEXT_LENGTH) &&
3944 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3945 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3946 "AttValue length too long\n");
3947 goto mem_error;
3949 if (c == '&') {
3950 in_space = 0;
3951 if (NXT(1) == '#') {
3952 int val = xmlParseCharRef(ctxt);
3954 if (val == '&') {
3955 if (ctxt->replaceEntities) {
3956 if (len + 10 > buf_size) {
3957 growBuffer(buf, 10);
3959 buf[len++] = '&';
3960 } else {
3962 * The reparsing will be done in xmlStringGetNodeList()
3963 * called by the attribute() function in SAX.c
3965 if (len + 10 > buf_size) {
3966 growBuffer(buf, 10);
3968 buf[len++] = '&';
3969 buf[len++] = '#';
3970 buf[len++] = '3';
3971 buf[len++] = '8';
3972 buf[len++] = ';';
3974 } else if (val != 0) {
3975 if (len + 10 > buf_size) {
3976 growBuffer(buf, 10);
3978 len += xmlCopyChar(0, &buf[len], val);
3980 } else {
3981 ent = xmlParseEntityRef(ctxt);
3982 ctxt->nbentities++;
3983 if (ent != NULL)
3984 ctxt->nbentities += ent->owner;
3985 if ((ent != NULL) &&
3986 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3987 if (len + 10 > buf_size) {
3988 growBuffer(buf, 10);
3990 if ((ctxt->replaceEntities == 0) &&
3991 (ent->content[0] == '&')) {
3992 buf[len++] = '&';
3993 buf[len++] = '#';
3994 buf[len++] = '3';
3995 buf[len++] = '8';
3996 buf[len++] = ';';
3997 } else {
3998 buf[len++] = ent->content[0];
4000 } else if ((ent != NULL) &&
4001 (ctxt->replaceEntities != 0)) {
4002 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4003 ++ctxt->depth;
4004 rep = xmlStringDecodeEntities(ctxt, ent->content,
4005 XML_SUBSTITUTE_REF,
4006 0, 0, 0);
4007 --ctxt->depth;
4008 if (rep != NULL) {
4009 current = rep;
4010 while (*current != 0) { /* non input consuming */
4011 if ((*current == 0xD) || (*current == 0xA) ||
4012 (*current == 0x9)) {
4013 buf[len++] = 0x20;
4014 current++;
4015 } else
4016 buf[len++] = *current++;
4017 if (len + 10 > buf_size) {
4018 growBuffer(buf, 10);
4021 xmlFree(rep);
4022 rep = NULL;
4024 } else {
4025 if (len + 10 > buf_size) {
4026 growBuffer(buf, 10);
4028 if (ent->content != NULL)
4029 buf[len++] = ent->content[0];
4031 } else if (ent != NULL) {
4032 int i = xmlStrlen(ent->name);
4033 const xmlChar *cur = ent->name;
4036 * This may look absurd but is needed to detect
4037 * entities problems
4039 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4040 (ent->content != NULL) && (ent->checked == 0)) {
4041 unsigned long oldnbent = ctxt->nbentities, diff;
4043 ++ctxt->depth;
4044 rep = xmlStringDecodeEntities(ctxt, ent->content,
4045 XML_SUBSTITUTE_REF, 0, 0, 0);
4046 --ctxt->depth;
4048 diff = ctxt->nbentities - oldnbent + 1;
4049 if (diff > INT_MAX / 2)
4050 diff = INT_MAX / 2;
4051 ent->checked = diff * 2;
4052 if (rep != NULL) {
4053 if (xmlStrchr(rep, '<'))
4054 ent->checked |= 1;
4055 xmlFree(rep);
4056 rep = NULL;
4057 } else {
4058 ent->content[0] = 0;
4063 * Just output the reference
4065 buf[len++] = '&';
4066 while (len + i + 10 > buf_size) {
4067 growBuffer(buf, i + 10);
4069 for (;i > 0;i--)
4070 buf[len++] = *cur++;
4071 buf[len++] = ';';
4074 } else {
4075 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4076 if ((len != 0) || (!normalize)) {
4077 if ((!normalize) || (!in_space)) {
4078 COPY_BUF(l,buf,len,0x20);
4079 while (len + 10 > buf_size) {
4080 growBuffer(buf, 10);
4083 in_space = 1;
4085 } else {
4086 in_space = 0;
4087 COPY_BUF(l,buf,len,c);
4088 if (len + 10 > buf_size) {
4089 growBuffer(buf, 10);
4092 NEXTL(l);
4094 GROW;
4095 c = CUR_CHAR(l);
4097 if (ctxt->instate == XML_PARSER_EOF)
4098 goto error;
4100 if ((in_space) && (normalize)) {
4101 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4103 buf[len] = 0;
4104 if (RAW == '<') {
4105 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4106 } else if (RAW != limit) {
4107 if ((c != 0) && (!IS_CHAR(c))) {
4108 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4109 "invalid character in attribute value\n");
4110 } else {
4111 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4112 "AttValue: ' expected\n");
4114 } else
4115 NEXT;
4118 * There we potentially risk an overflow, don't allow attribute value of
4119 * length more than INT_MAX it is a very reasonable assumption !
4121 if (len >= INT_MAX) {
4122 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4123 "AttValue length too long\n");
4124 goto mem_error;
4127 if (attlen != NULL) *attlen = (int) len;
4128 return(buf);
4130 mem_error:
4131 xmlErrMemory(ctxt, NULL);
4132 error:
4133 if (buf != NULL)
4134 xmlFree(buf);
4135 if (rep != NULL)
4136 xmlFree(rep);
4137 return(NULL);
4141 * xmlParseAttValue:
4142 * @ctxt: an XML parser context
4144 * parse a value for an attribute
4145 * Note: the parser won't do substitution of entities here, this
4146 * will be handled later in xmlStringGetNodeList
4148 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4149 * "'" ([^<&'] | Reference)* "'"
4151 * 3.3.3 Attribute-Value Normalization:
4152 * Before the value of an attribute is passed to the application or
4153 * checked for validity, the XML processor must normalize it as follows:
4154 * - a character reference is processed by appending the referenced
4155 * character to the attribute value
4156 * - an entity reference is processed by recursively processing the
4157 * replacement text of the entity
4158 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4159 * appending #x20 to the normalized value, except that only a single
4160 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4161 * parsed entity or the literal entity value of an internal parsed entity
4162 * - other characters are processed by appending them to the normalized value
4163 * If the declared value is not CDATA, then the XML processor must further
4164 * process the normalized attribute value by discarding any leading and
4165 * trailing space (#x20) characters, and by replacing sequences of space
4166 * (#x20) characters by a single space (#x20) character.
4167 * All attributes for which no declaration has been read should be treated
4168 * by a non-validating parser as if declared CDATA.
4170 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4174 xmlChar *
4175 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4176 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4177 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4181 * xmlParseSystemLiteral:
4182 * @ctxt: an XML parser context
4184 * parse an XML Literal
4186 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4188 * Returns the SystemLiteral parsed or NULL
4191 xmlChar *
4192 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4193 xmlChar *buf = NULL;
4194 int len = 0;
4195 int size = XML_PARSER_BUFFER_SIZE;
4196 int cur, l;
4197 xmlChar stop;
4198 int state = ctxt->instate;
4199 int count = 0;
4201 SHRINK;
4202 if (RAW == '"') {
4203 NEXT;
4204 stop = '"';
4205 } else if (RAW == '\'') {
4206 NEXT;
4207 stop = '\'';
4208 } else {
4209 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4210 return(NULL);
4213 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4214 if (buf == NULL) {
4215 xmlErrMemory(ctxt, NULL);
4216 return(NULL);
4218 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4219 cur = CUR_CHAR(l);
4220 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4221 if (len + 5 >= size) {
4222 xmlChar *tmp;
4224 if ((size > XML_MAX_NAME_LENGTH) &&
4225 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4226 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4227 xmlFree(buf);
4228 ctxt->instate = (xmlParserInputState) state;
4229 return(NULL);
4231 size *= 2;
4232 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4233 if (tmp == NULL) {
4234 xmlFree(buf);
4235 xmlErrMemory(ctxt, NULL);
4236 ctxt->instate = (xmlParserInputState) state;
4237 return(NULL);
4239 buf = tmp;
4241 count++;
4242 if (count > 50) {
4243 SHRINK;
4244 GROW;
4245 count = 0;
4246 if (ctxt->instate == XML_PARSER_EOF) {
4247 xmlFree(buf);
4248 return(NULL);
4251 COPY_BUF(l,buf,len,cur);
4252 NEXTL(l);
4253 cur = CUR_CHAR(l);
4254 if (cur == 0) {
4255 GROW;
4256 SHRINK;
4257 cur = CUR_CHAR(l);
4260 buf[len] = 0;
4261 ctxt->instate = (xmlParserInputState) state;
4262 if (!IS_CHAR(cur)) {
4263 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4264 } else {
4265 NEXT;
4267 return(buf);
4271 * xmlParsePubidLiteral:
4272 * @ctxt: an XML parser context
4274 * parse an XML public literal
4276 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4278 * Returns the PubidLiteral parsed or NULL.
4281 xmlChar *
4282 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4283 xmlChar *buf = NULL;
4284 int len = 0;
4285 int size = XML_PARSER_BUFFER_SIZE;
4286 xmlChar cur;
4287 xmlChar stop;
4288 int count = 0;
4289 xmlParserInputState oldstate = ctxt->instate;
4291 SHRINK;
4292 if (RAW == '"') {
4293 NEXT;
4294 stop = '"';
4295 } else if (RAW == '\'') {
4296 NEXT;
4297 stop = '\'';
4298 } else {
4299 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4300 return(NULL);
4302 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4303 if (buf == NULL) {
4304 xmlErrMemory(ctxt, NULL);
4305 return(NULL);
4307 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4308 cur = CUR;
4309 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4310 if (len + 1 >= size) {
4311 xmlChar *tmp;
4313 if ((size > XML_MAX_NAME_LENGTH) &&
4314 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4315 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4316 xmlFree(buf);
4317 return(NULL);
4319 size *= 2;
4320 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4321 if (tmp == NULL) {
4322 xmlErrMemory(ctxt, NULL);
4323 xmlFree(buf);
4324 return(NULL);
4326 buf = tmp;
4328 buf[len++] = cur;
4329 count++;
4330 if (count > 50) {
4331 SHRINK;
4332 GROW;
4333 count = 0;
4334 if (ctxt->instate == XML_PARSER_EOF) {
4335 xmlFree(buf);
4336 return(NULL);
4339 NEXT;
4340 cur = CUR;
4341 if (cur == 0) {
4342 GROW;
4343 SHRINK;
4344 cur = CUR;
4347 buf[len] = 0;
4348 if (cur != stop) {
4349 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4350 } else {
4351 NEXT;
4353 ctxt->instate = oldstate;
4354 return(buf);
4357 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4360 * used for the test in the inner loop of the char data testing
4362 static const unsigned char test_char_data[256] = {
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4368 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4369 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4370 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4371 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4372 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4373 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4374 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4375 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4376 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4378 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4379 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4398 * xmlParseCharData:
4399 * @ctxt: an XML parser context
4400 * @cdata: int indicating whether we are within a CDATA section
4402 * parse a CharData section.
4403 * if we are within a CDATA section ']]>' marks an end of section.
4405 * The right angle bracket (>) may be represented using the string "&gt;",
4406 * and must, for compatibility, be escaped using "&gt;" or a character
4407 * reference when it appears in the string "]]>" in content, when that
4408 * string is not marking the end of a CDATA section.
4410 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4413 void
4414 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4415 const xmlChar *in;
4416 int nbchar = 0;
4417 int line = ctxt->input->line;
4418 int col = ctxt->input->col;
4419 int ccol;
4421 SHRINK;
4422 GROW;
4424 * Accelerated common case where input don't need to be
4425 * modified before passing it to the handler.
4427 if (!cdata) {
4428 in = ctxt->input->cur;
4429 do {
4430 get_more_space:
4431 while (*in == 0x20) { in++; ctxt->input->col++; }
4432 if (*in == 0xA) {
4433 do {
4434 ctxt->input->line++; ctxt->input->col = 1;
4435 in++;
4436 } while (*in == 0xA);
4437 goto get_more_space;
4439 if (*in == '<') {
4440 nbchar = in - ctxt->input->cur;
4441 if (nbchar > 0) {
4442 const xmlChar *tmp = ctxt->input->cur;
4443 ctxt->input->cur = in;
4445 if ((ctxt->sax != NULL) &&
4446 (ctxt->sax->ignorableWhitespace !=
4447 ctxt->sax->characters)) {
4448 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4449 if (ctxt->sax->ignorableWhitespace != NULL)
4450 ctxt->sax->ignorableWhitespace(ctxt->userData,
4451 tmp, nbchar);
4452 } else {
4453 if (ctxt->sax->characters != NULL)
4454 ctxt->sax->characters(ctxt->userData,
4455 tmp, nbchar);
4456 if (*ctxt->space == -1)
4457 *ctxt->space = -2;
4459 } else if ((ctxt->sax != NULL) &&
4460 (ctxt->sax->characters != NULL)) {
4461 ctxt->sax->characters(ctxt->userData,
4462 tmp, nbchar);
4465 return;
4468 get_more:
4469 ccol = ctxt->input->col;
4470 while (test_char_data[*in]) {
4471 in++;
4472 ccol++;
4474 ctxt->input->col = ccol;
4475 if (*in == 0xA) {
4476 do {
4477 ctxt->input->line++; ctxt->input->col = 1;
4478 in++;
4479 } while (*in == 0xA);
4480 goto get_more;
4482 if (*in == ']') {
4483 if ((in[1] == ']') && (in[2] == '>')) {
4484 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4485 ctxt->input->cur = in + 1;
4486 return;
4488 in++;
4489 ctxt->input->col++;
4490 goto get_more;
4492 nbchar = in - ctxt->input->cur;
4493 if (nbchar > 0) {
4494 if ((ctxt->sax != NULL) &&
4495 (ctxt->sax->ignorableWhitespace !=
4496 ctxt->sax->characters) &&
4497 (IS_BLANK_CH(*ctxt->input->cur))) {
4498 const xmlChar *tmp = ctxt->input->cur;
4499 ctxt->input->cur = in;
4501 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4502 if (ctxt->sax->ignorableWhitespace != NULL)
4503 ctxt->sax->ignorableWhitespace(ctxt->userData,
4504 tmp, nbchar);
4505 } else {
4506 if (ctxt->sax->characters != NULL)
4507 ctxt->sax->characters(ctxt->userData,
4508 tmp, nbchar);
4509 if (*ctxt->space == -1)
4510 *ctxt->space = -2;
4512 line = ctxt->input->line;
4513 col = ctxt->input->col;
4514 } else if (ctxt->sax != NULL) {
4515 if (ctxt->sax->characters != NULL)
4516 ctxt->sax->characters(ctxt->userData,
4517 ctxt->input->cur, nbchar);
4518 line = ctxt->input->line;
4519 col = ctxt->input->col;
4521 /* something really bad happened in the SAX callback */
4522 if (ctxt->instate != XML_PARSER_CONTENT)
4523 return;
4525 ctxt->input->cur = in;
4526 if (*in == 0xD) {
4527 in++;
4528 if (*in == 0xA) {
4529 ctxt->input->cur = in;
4530 in++;
4531 ctxt->input->line++; ctxt->input->col = 1;
4532 continue; /* while */
4534 in--;
4536 if (*in == '<') {
4537 return;
4539 if (*in == '&') {
4540 return;
4542 SHRINK;
4543 GROW;
4544 if (ctxt->instate == XML_PARSER_EOF)
4545 return;
4546 in = ctxt->input->cur;
4547 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4548 nbchar = 0;
4550 ctxt->input->line = line;
4551 ctxt->input->col = col;
4552 xmlParseCharDataComplex(ctxt, cdata);
4556 * xmlParseCharDataComplex:
4557 * @ctxt: an XML parser context
4558 * @cdata: int indicating whether we are within a CDATA section
4560 * parse a CharData section.this is the fallback function
4561 * of xmlParseCharData() when the parsing requires handling
4562 * of non-ASCII characters.
4564 static void
4565 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4566 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4567 int nbchar = 0;
4568 int cur, l;
4569 int count = 0;
4571 SHRINK;
4572 GROW;
4573 cur = CUR_CHAR(l);
4574 while ((cur != '<') && /* checked */
4575 (cur != '&') &&
4576 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4577 if ((cur == ']') && (NXT(1) == ']') &&
4578 (NXT(2) == '>')) {
4579 if (cdata) break;
4580 else {
4581 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4584 COPY_BUF(l,buf,nbchar,cur);
4585 /* move current position before possible calling of ctxt->sax->characters */
4586 NEXTL(l);
4587 cur = CUR_CHAR(l);
4588 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4589 buf[nbchar] = 0;
4592 * OK the segment is to be consumed as chars.
4594 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4595 if (areBlanks(ctxt, buf, nbchar, 0)) {
4596 if (ctxt->sax->ignorableWhitespace != NULL)
4597 ctxt->sax->ignorableWhitespace(ctxt->userData,
4598 buf, nbchar);
4599 } else {
4600 if (ctxt->sax->characters != NULL)
4601 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4602 if ((ctxt->sax->characters !=
4603 ctxt->sax->ignorableWhitespace) &&
4604 (*ctxt->space == -1))
4605 *ctxt->space = -2;
4608 nbchar = 0;
4609 /* something really bad happened in the SAX callback */
4610 if (ctxt->instate != XML_PARSER_CONTENT)
4611 return;
4613 count++;
4614 if (count > 50) {
4615 SHRINK;
4616 GROW;
4617 count = 0;
4618 if (ctxt->instate == XML_PARSER_EOF)
4619 return;
4622 if (nbchar != 0) {
4623 buf[nbchar] = 0;
4625 * OK the segment is to be consumed as chars.
4627 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4628 if (areBlanks(ctxt, buf, nbchar, 0)) {
4629 if (ctxt->sax->ignorableWhitespace != NULL)
4630 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4631 } else {
4632 if (ctxt->sax->characters != NULL)
4633 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4634 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4635 (*ctxt->space == -1))
4636 *ctxt->space = -2;
4640 if ((cur != 0) && (!IS_CHAR(cur))) {
4641 /* Generate the error and skip the offending character */
4642 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4643 "PCDATA invalid Char value %d\n",
4644 cur);
4645 NEXTL(l);
4650 * xmlParseExternalID:
4651 * @ctxt: an XML parser context
4652 * @publicID: a xmlChar** receiving PubidLiteral
4653 * @strict: indicate whether we should restrict parsing to only
4654 * production [75], see NOTE below
4656 * Parse an External ID or a Public ID
4658 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4659 * 'PUBLIC' S PubidLiteral S SystemLiteral
4661 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4662 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4664 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4666 * Returns the function returns SystemLiteral and in the second
4667 * case publicID receives PubidLiteral, is strict is off
4668 * it is possible to return NULL and have publicID set.
4671 xmlChar *
4672 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4673 xmlChar *URI = NULL;
4675 SHRINK;
4677 *publicID = NULL;
4678 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4679 SKIP(6);
4680 if (SKIP_BLANKS == 0) {
4681 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4682 "Space required after 'SYSTEM'\n");
4684 URI = xmlParseSystemLiteral(ctxt);
4685 if (URI == NULL) {
4686 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4688 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4689 SKIP(6);
4690 if (SKIP_BLANKS == 0) {
4691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4692 "Space required after 'PUBLIC'\n");
4694 *publicID = xmlParsePubidLiteral(ctxt);
4695 if (*publicID == NULL) {
4696 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4698 if (strict) {
4700 * We don't handle [83] so "S SystemLiteral" is required.
4702 if (SKIP_BLANKS == 0) {
4703 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4704 "Space required after the Public Identifier\n");
4706 } else {
4708 * We handle [83] so we return immediately, if
4709 * "S SystemLiteral" is not detected. We skip blanks if no
4710 * system literal was found, but this is harmless since we must
4711 * be at the end of a NotationDecl.
4713 if (SKIP_BLANKS == 0) return(NULL);
4714 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4716 URI = xmlParseSystemLiteral(ctxt);
4717 if (URI == NULL) {
4718 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4721 return(URI);
4725 * xmlParseCommentComplex:
4726 * @ctxt: an XML parser context
4727 * @buf: the already parsed part of the buffer
4728 * @len: number of bytes in the buffer
4729 * @size: allocated size of the buffer
4731 * Skip an XML (SGML) comment <!-- .... -->
4732 * The spec says that "For compatibility, the string "--" (double-hyphen)
4733 * must not occur within comments. "
4734 * This is the slow routine in case the accelerator for ascii didn't work
4736 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4738 static void
4739 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4740 size_t len, size_t size) {
4741 int q, ql;
4742 int r, rl;
4743 int cur, l;
4744 size_t count = 0;
4745 int inputid;
4747 inputid = ctxt->input->id;
4749 if (buf == NULL) {
4750 len = 0;
4751 size = XML_PARSER_BUFFER_SIZE;
4752 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4753 if (buf == NULL) {
4754 xmlErrMemory(ctxt, NULL);
4755 return;
4758 GROW; /* Assure there's enough input data */
4759 q = CUR_CHAR(ql);
4760 if (q == 0)
4761 goto not_terminated;
4762 if (!IS_CHAR(q)) {
4763 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4764 "xmlParseComment: invalid xmlChar value %d\n",
4766 xmlFree (buf);
4767 return;
4769 NEXTL(ql);
4770 r = CUR_CHAR(rl);
4771 if (r == 0)
4772 goto not_terminated;
4773 if (!IS_CHAR(r)) {
4774 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4775 "xmlParseComment: invalid xmlChar value %d\n",
4777 xmlFree (buf);
4778 return;
4780 NEXTL(rl);
4781 cur = CUR_CHAR(l);
4782 if (cur == 0)
4783 goto not_terminated;
4784 while (IS_CHAR(cur) && /* checked */
4785 ((cur != '>') ||
4786 (r != '-') || (q != '-'))) {
4787 if ((r == '-') && (q == '-')) {
4788 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4790 if ((len > XML_MAX_TEXT_LENGTH) &&
4791 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4792 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4793 "Comment too big found", NULL);
4794 xmlFree (buf);
4795 return;
4797 if (len + 5 >= size) {
4798 xmlChar *new_buf;
4799 size_t new_size;
4801 new_size = size * 2;
4802 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4803 if (new_buf == NULL) {
4804 xmlFree (buf);
4805 xmlErrMemory(ctxt, NULL);
4806 return;
4808 buf = new_buf;
4809 size = new_size;
4811 COPY_BUF(ql,buf,len,q);
4812 q = r;
4813 ql = rl;
4814 r = cur;
4815 rl = l;
4817 count++;
4818 if (count > 50) {
4819 SHRINK;
4820 GROW;
4821 count = 0;
4822 if (ctxt->instate == XML_PARSER_EOF) {
4823 xmlFree(buf);
4824 return;
4827 NEXTL(l);
4828 cur = CUR_CHAR(l);
4829 if (cur == 0) {
4830 SHRINK;
4831 GROW;
4832 cur = CUR_CHAR(l);
4835 buf[len] = 0;
4836 if (cur == 0) {
4837 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4838 "Comment not terminated \n<!--%.50s\n", buf);
4839 } else if (!IS_CHAR(cur)) {
4840 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4841 "xmlParseComment: invalid xmlChar value %d\n",
4842 cur);
4843 } else {
4844 if (inputid != ctxt->input->id) {
4845 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4846 "Comment doesn't start and stop in the same"
4847 " entity\n");
4849 NEXT;
4850 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4851 (!ctxt->disableSAX))
4852 ctxt->sax->comment(ctxt->userData, buf);
4854 xmlFree(buf);
4855 return;
4856 not_terminated:
4857 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4858 "Comment not terminated\n", NULL);
4859 xmlFree(buf);
4860 return;
4864 * xmlParseComment:
4865 * @ctxt: an XML parser context
4867 * Skip an XML (SGML) comment <!-- .... -->
4868 * The spec says that "For compatibility, the string "--" (double-hyphen)
4869 * must not occur within comments. "
4871 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4873 void
4874 xmlParseComment(xmlParserCtxtPtr ctxt) {
4875 xmlChar *buf = NULL;
4876 size_t size = XML_PARSER_BUFFER_SIZE;
4877 size_t len = 0;
4878 xmlParserInputState state;
4879 const xmlChar *in;
4880 size_t nbchar = 0;
4881 int ccol;
4882 int inputid;
4885 * Check that there is a comment right here.
4887 if ((RAW != '<') || (NXT(1) != '!') ||
4888 (NXT(2) != '-') || (NXT(3) != '-')) return;
4889 state = ctxt->instate;
4890 ctxt->instate = XML_PARSER_COMMENT;
4891 inputid = ctxt->input->id;
4892 SKIP(4);
4893 SHRINK;
4894 GROW;
4897 * Accelerated common case where input don't need to be
4898 * modified before passing it to the handler.
4900 in = ctxt->input->cur;
4901 do {
4902 if (*in == 0xA) {
4903 do {
4904 ctxt->input->line++; ctxt->input->col = 1;
4905 in++;
4906 } while (*in == 0xA);
4908 get_more:
4909 ccol = ctxt->input->col;
4910 while (((*in > '-') && (*in <= 0x7F)) ||
4911 ((*in >= 0x20) && (*in < '-')) ||
4912 (*in == 0x09)) {
4913 in++;
4914 ccol++;
4916 ctxt->input->col = ccol;
4917 if (*in == 0xA) {
4918 do {
4919 ctxt->input->line++; ctxt->input->col = 1;
4920 in++;
4921 } while (*in == 0xA);
4922 goto get_more;
4924 nbchar = in - ctxt->input->cur;
4926 * save current set of data
4928 if (nbchar > 0) {
4929 if ((ctxt->sax != NULL) &&
4930 (ctxt->sax->comment != NULL)) {
4931 if (buf == NULL) {
4932 if ((*in == '-') && (in[1] == '-'))
4933 size = nbchar + 1;
4934 else
4935 size = XML_PARSER_BUFFER_SIZE + nbchar;
4936 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4937 if (buf == NULL) {
4938 xmlErrMemory(ctxt, NULL);
4939 ctxt->instate = state;
4940 return;
4942 len = 0;
4943 } else if (len + nbchar + 1 >= size) {
4944 xmlChar *new_buf;
4945 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4946 new_buf = (xmlChar *) xmlRealloc(buf,
4947 size * sizeof(xmlChar));
4948 if (new_buf == NULL) {
4949 xmlFree (buf);
4950 xmlErrMemory(ctxt, NULL);
4951 ctxt->instate = state;
4952 return;
4954 buf = new_buf;
4956 memcpy(&buf[len], ctxt->input->cur, nbchar);
4957 len += nbchar;
4958 buf[len] = 0;
4961 if ((len > XML_MAX_TEXT_LENGTH) &&
4962 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4963 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4964 "Comment too big found", NULL);
4965 xmlFree (buf);
4966 return;
4968 ctxt->input->cur = in;
4969 if (*in == 0xA) {
4970 in++;
4971 ctxt->input->line++; ctxt->input->col = 1;
4973 if (*in == 0xD) {
4974 in++;
4975 if (*in == 0xA) {
4976 ctxt->input->cur = in;
4977 in++;
4978 ctxt->input->line++; ctxt->input->col = 1;
4979 goto get_more;
4981 in--;
4983 SHRINK;
4984 GROW;
4985 if (ctxt->instate == XML_PARSER_EOF) {
4986 xmlFree(buf);
4987 return;
4989 in = ctxt->input->cur;
4990 if (*in == '-') {
4991 if (in[1] == '-') {
4992 if (in[2] == '>') {
4993 if (ctxt->input->id != inputid) {
4994 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4995 "comment doesn't start and stop in the"
4996 " same entity\n");
4998 SKIP(3);
4999 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5000 (!ctxt->disableSAX)) {
5001 if (buf != NULL)
5002 ctxt->sax->comment(ctxt->userData, buf);
5003 else
5004 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5006 if (buf != NULL)
5007 xmlFree(buf);
5008 if (ctxt->instate != XML_PARSER_EOF)
5009 ctxt->instate = state;
5010 return;
5012 if (buf != NULL) {
5013 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5014 "Double hyphen within comment: "
5015 "<!--%.50s\n",
5016 buf);
5017 } else
5018 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5019 "Double hyphen within comment\n", NULL);
5020 if (ctxt->instate == XML_PARSER_EOF) {
5021 xmlFree(buf);
5022 return;
5024 in++;
5025 ctxt->input->col++;
5027 in++;
5028 ctxt->input->col++;
5029 goto get_more;
5031 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5032 xmlParseCommentComplex(ctxt, buf, len, size);
5033 ctxt->instate = state;
5034 return;
5039 * xmlParsePITarget:
5040 * @ctxt: an XML parser context
5042 * parse the name of a PI
5044 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5046 * Returns the PITarget name or NULL
5049 const xmlChar *
5050 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5051 const xmlChar *name;
5053 name = xmlParseName(ctxt);
5054 if ((name != NULL) &&
5055 ((name[0] == 'x') || (name[0] == 'X')) &&
5056 ((name[1] == 'm') || (name[1] == 'M')) &&
5057 ((name[2] == 'l') || (name[2] == 'L'))) {
5058 int i;
5059 if ((name[0] == 'x') && (name[1] == 'm') &&
5060 (name[2] == 'l') && (name[3] == 0)) {
5061 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5062 "XML declaration allowed only at the start of the document\n");
5063 return(name);
5064 } else if (name[3] == 0) {
5065 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5066 return(name);
5068 for (i = 0;;i++) {
5069 if (xmlW3CPIs[i] == NULL) break;
5070 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5071 return(name);
5073 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5074 "xmlParsePITarget: invalid name prefix 'xml'\n",
5075 NULL, NULL);
5077 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5078 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5079 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5081 return(name);
5084 #ifdef LIBXML_CATALOG_ENABLED
5086 * xmlParseCatalogPI:
5087 * @ctxt: an XML parser context
5088 * @catalog: the PI value string
5090 * parse an XML Catalog Processing Instruction.
5092 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5094 * Occurs only if allowed by the user and if happening in the Misc
5095 * part of the document before any doctype information
5096 * This will add the given catalog to the parsing context in order
5097 * to be used if there is a resolution need further down in the document
5100 static void
5101 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5102 xmlChar *URL = NULL;
5103 const xmlChar *tmp, *base;
5104 xmlChar marker;
5106 tmp = catalog;
5107 while (IS_BLANK_CH(*tmp)) tmp++;
5108 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5109 goto error;
5110 tmp += 7;
5111 while (IS_BLANK_CH(*tmp)) tmp++;
5112 if (*tmp != '=') {
5113 return;
5115 tmp++;
5116 while (IS_BLANK_CH(*tmp)) tmp++;
5117 marker = *tmp;
5118 if ((marker != '\'') && (marker != '"'))
5119 goto error;
5120 tmp++;
5121 base = tmp;
5122 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5123 if (*tmp == 0)
5124 goto error;
5125 URL = xmlStrndup(base, tmp - base);
5126 tmp++;
5127 while (IS_BLANK_CH(*tmp)) tmp++;
5128 if (*tmp != 0)
5129 goto error;
5131 if (URL != NULL) {
5132 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5133 xmlFree(URL);
5135 return;
5137 error:
5138 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5139 "Catalog PI syntax error: %s\n",
5140 catalog, NULL);
5141 if (URL != NULL)
5142 xmlFree(URL);
5144 #endif
5147 * xmlParsePI:
5148 * @ctxt: an XML parser context
5150 * parse an XML Processing Instruction.
5152 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5154 * The processing is transferred to SAX once parsed.
5157 void
5158 xmlParsePI(xmlParserCtxtPtr ctxt) {
5159 xmlChar *buf = NULL;
5160 size_t len = 0;
5161 size_t size = XML_PARSER_BUFFER_SIZE;
5162 int cur, l;
5163 const xmlChar *target;
5164 xmlParserInputState state;
5165 int count = 0;
5167 if ((RAW == '<') && (NXT(1) == '?')) {
5168 int inputid = ctxt->input->id;
5169 state = ctxt->instate;
5170 ctxt->instate = XML_PARSER_PI;
5172 * this is a Processing Instruction.
5174 SKIP(2);
5175 SHRINK;
5178 * Parse the target name and check for special support like
5179 * namespace.
5181 target = xmlParsePITarget(ctxt);
5182 if (target != NULL) {
5183 if ((RAW == '?') && (NXT(1) == '>')) {
5184 if (inputid != ctxt->input->id) {
5185 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5186 "PI declaration doesn't start and stop in"
5187 " the same entity\n");
5189 SKIP(2);
5192 * SAX: PI detected.
5194 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5195 (ctxt->sax->processingInstruction != NULL))
5196 ctxt->sax->processingInstruction(ctxt->userData,
5197 target, NULL);
5198 if (ctxt->instate != XML_PARSER_EOF)
5199 ctxt->instate = state;
5200 return;
5202 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5203 if (buf == NULL) {
5204 xmlErrMemory(ctxt, NULL);
5205 ctxt->instate = state;
5206 return;
5208 if (SKIP_BLANKS == 0) {
5209 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5210 "ParsePI: PI %s space expected\n", target);
5212 cur = CUR_CHAR(l);
5213 while (IS_CHAR(cur) && /* checked */
5214 ((cur != '?') || (NXT(1) != '>'))) {
5215 if (len + 5 >= size) {
5216 xmlChar *tmp;
5217 size_t new_size = size * 2;
5218 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5219 if (tmp == NULL) {
5220 xmlErrMemory(ctxt, NULL);
5221 xmlFree(buf);
5222 ctxt->instate = state;
5223 return;
5225 buf = tmp;
5226 size = new_size;
5228 count++;
5229 if (count > 50) {
5230 SHRINK;
5231 GROW;
5232 if (ctxt->instate == XML_PARSER_EOF) {
5233 xmlFree(buf);
5234 return;
5236 count = 0;
5237 if ((len > XML_MAX_TEXT_LENGTH) &&
5238 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5239 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5240 "PI %s too big found", target);
5241 xmlFree(buf);
5242 ctxt->instate = state;
5243 return;
5246 COPY_BUF(l,buf,len,cur);
5247 NEXTL(l);
5248 cur = CUR_CHAR(l);
5249 if (cur == 0) {
5250 SHRINK;
5251 GROW;
5252 cur = CUR_CHAR(l);
5255 if ((len > XML_MAX_TEXT_LENGTH) &&
5256 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5257 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5258 "PI %s too big found", target);
5259 xmlFree(buf);
5260 ctxt->instate = state;
5261 return;
5263 buf[len] = 0;
5264 if (cur != '?') {
5265 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5266 "ParsePI: PI %s never end ...\n", target);
5267 } else {
5268 if (inputid != ctxt->input->id) {
5269 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5270 "PI declaration doesn't start and stop in"
5271 " the same entity\n");
5273 SKIP(2);
5275 #ifdef LIBXML_CATALOG_ENABLED
5276 if (((state == XML_PARSER_MISC) ||
5277 (state == XML_PARSER_START)) &&
5278 (xmlStrEqual(target, XML_CATALOG_PI))) {
5279 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5280 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5281 (allow == XML_CATA_ALLOW_ALL))
5282 xmlParseCatalogPI(ctxt, buf);
5284 #endif
5288 * SAX: PI detected.
5290 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5291 (ctxt->sax->processingInstruction != NULL))
5292 ctxt->sax->processingInstruction(ctxt->userData,
5293 target, buf);
5295 xmlFree(buf);
5296 } else {
5297 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5299 if (ctxt->instate != XML_PARSER_EOF)
5300 ctxt->instate = state;
5305 * xmlParseNotationDecl:
5306 * @ctxt: an XML parser context
5308 * parse a notation declaration
5310 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5312 * Hence there is actually 3 choices:
5313 * 'PUBLIC' S PubidLiteral
5314 * 'PUBLIC' S PubidLiteral S SystemLiteral
5315 * and 'SYSTEM' S SystemLiteral
5317 * See the NOTE on xmlParseExternalID().
5320 void
5321 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5322 const xmlChar *name;
5323 xmlChar *Pubid;
5324 xmlChar *Systemid;
5326 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5327 int inputid = ctxt->input->id;
5328 SHRINK;
5329 SKIP(10);
5330 if (SKIP_BLANKS == 0) {
5331 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5332 "Space required after '<!NOTATION'\n");
5333 return;
5336 name = xmlParseName(ctxt);
5337 if (name == NULL) {
5338 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5339 return;
5341 if (xmlStrchr(name, ':') != NULL) {
5342 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5343 "colons are forbidden from notation names '%s'\n",
5344 name, NULL, NULL);
5346 if (SKIP_BLANKS == 0) {
5347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5348 "Space required after the NOTATION name'\n");
5349 return;
5353 * Parse the IDs.
5355 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5356 SKIP_BLANKS;
5358 if (RAW == '>') {
5359 if (inputid != ctxt->input->id) {
5360 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5361 "Notation declaration doesn't start and stop"
5362 " in the same entity\n");
5364 NEXT;
5365 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5366 (ctxt->sax->notationDecl != NULL))
5367 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5368 } else {
5369 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5371 if (Systemid != NULL) xmlFree(Systemid);
5372 if (Pubid != NULL) xmlFree(Pubid);
5377 * xmlParseEntityDecl:
5378 * @ctxt: an XML parser context
5380 * parse <!ENTITY declarations
5382 * [70] EntityDecl ::= GEDecl | PEDecl
5384 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5386 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5388 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5390 * [74] PEDef ::= EntityValue | ExternalID
5392 * [76] NDataDecl ::= S 'NDATA' S Name
5394 * [ VC: Notation Declared ]
5395 * The Name must match the declared name of a notation.
5398 void
5399 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5400 const xmlChar *name = NULL;
5401 xmlChar *value = NULL;
5402 xmlChar *URI = NULL, *literal = NULL;
5403 const xmlChar *ndata = NULL;
5404 int isParameter = 0;
5405 xmlChar *orig = NULL;
5407 /* GROW; done in the caller */
5408 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5409 int inputid = ctxt->input->id;
5410 SHRINK;
5411 SKIP(8);
5412 if (SKIP_BLANKS == 0) {
5413 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5414 "Space required after '<!ENTITY'\n");
5417 if (RAW == '%') {
5418 NEXT;
5419 if (SKIP_BLANKS == 0) {
5420 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5421 "Space required after '%%'\n");
5423 isParameter = 1;
5426 name = xmlParseName(ctxt);
5427 if (name == NULL) {
5428 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5429 "xmlParseEntityDecl: no name\n");
5430 return;
5432 if (xmlStrchr(name, ':') != NULL) {
5433 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5434 "colons are forbidden from entities names '%s'\n",
5435 name, NULL, NULL);
5437 if (SKIP_BLANKS == 0) {
5438 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5439 "Space required after the entity name\n");
5442 ctxt->instate = XML_PARSER_ENTITY_DECL;
5444 * handle the various case of definitions...
5446 if (isParameter) {
5447 if ((RAW == '"') || (RAW == '\'')) {
5448 value = xmlParseEntityValue(ctxt, &orig);
5449 if (value) {
5450 if ((ctxt->sax != NULL) &&
5451 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5452 ctxt->sax->entityDecl(ctxt->userData, name,
5453 XML_INTERNAL_PARAMETER_ENTITY,
5454 NULL, NULL, value);
5456 } else {
5457 URI = xmlParseExternalID(ctxt, &literal, 1);
5458 if ((URI == NULL) && (literal == NULL)) {
5459 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5461 if (URI) {
5462 xmlURIPtr uri;
5464 uri = xmlParseURI((const char *) URI);
5465 if (uri == NULL) {
5466 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5467 "Invalid URI: %s\n", URI);
5469 * This really ought to be a well formedness error
5470 * but the XML Core WG decided otherwise c.f. issue
5471 * E26 of the XML erratas.
5473 } else {
5474 if (uri->fragment != NULL) {
5476 * Okay this is foolish to block those but not
5477 * invalid URIs.
5479 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5480 } else {
5481 if ((ctxt->sax != NULL) &&
5482 (!ctxt->disableSAX) &&
5483 (ctxt->sax->entityDecl != NULL))
5484 ctxt->sax->entityDecl(ctxt->userData, name,
5485 XML_EXTERNAL_PARAMETER_ENTITY,
5486 literal, URI, NULL);
5488 xmlFreeURI(uri);
5492 } else {
5493 if ((RAW == '"') || (RAW == '\'')) {
5494 value = xmlParseEntityValue(ctxt, &orig);
5495 if ((ctxt->sax != NULL) &&
5496 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5497 ctxt->sax->entityDecl(ctxt->userData, name,
5498 XML_INTERNAL_GENERAL_ENTITY,
5499 NULL, NULL, value);
5501 * For expat compatibility in SAX mode.
5503 if ((ctxt->myDoc == NULL) ||
5504 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5505 if (ctxt->myDoc == NULL) {
5506 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5507 if (ctxt->myDoc == NULL) {
5508 xmlErrMemory(ctxt, "New Doc failed");
5509 return;
5511 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5513 if (ctxt->myDoc->intSubset == NULL)
5514 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5515 BAD_CAST "fake", NULL, NULL);
5517 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5518 NULL, NULL, value);
5520 } else {
5521 URI = xmlParseExternalID(ctxt, &literal, 1);
5522 if ((URI == NULL) && (literal == NULL)) {
5523 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5525 if (URI) {
5526 xmlURIPtr uri;
5528 uri = xmlParseURI((const char *)URI);
5529 if (uri == NULL) {
5530 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5531 "Invalid URI: %s\n", URI);
5533 * This really ought to be a well formedness error
5534 * but the XML Core WG decided otherwise c.f. issue
5535 * E26 of the XML erratas.
5537 } else {
5538 if (uri->fragment != NULL) {
5540 * Okay this is foolish to block those but not
5541 * invalid URIs.
5543 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5545 xmlFreeURI(uri);
5548 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5549 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5550 "Space required before 'NDATA'\n");
5552 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5553 SKIP(5);
5554 if (SKIP_BLANKS == 0) {
5555 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5556 "Space required after 'NDATA'\n");
5558 ndata = xmlParseName(ctxt);
5559 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5560 (ctxt->sax->unparsedEntityDecl != NULL))
5561 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5562 literal, URI, ndata);
5563 } else {
5564 if ((ctxt->sax != NULL) &&
5565 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5566 ctxt->sax->entityDecl(ctxt->userData, name,
5567 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5568 literal, URI, NULL);
5570 * For expat compatibility in SAX mode.
5571 * assuming the entity replacement was asked for
5573 if ((ctxt->replaceEntities != 0) &&
5574 ((ctxt->myDoc == NULL) ||
5575 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5576 if (ctxt->myDoc == NULL) {
5577 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5578 if (ctxt->myDoc == NULL) {
5579 xmlErrMemory(ctxt, "New Doc failed");
5580 return;
5582 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5585 if (ctxt->myDoc->intSubset == NULL)
5586 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5587 BAD_CAST "fake", NULL, NULL);
5588 xmlSAX2EntityDecl(ctxt, name,
5589 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5590 literal, URI, NULL);
5595 if (ctxt->instate == XML_PARSER_EOF)
5596 goto done;
5597 SKIP_BLANKS;
5598 if (RAW != '>') {
5599 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5600 "xmlParseEntityDecl: entity %s not terminated\n", name);
5601 xmlHaltParser(ctxt);
5602 } else {
5603 if (inputid != ctxt->input->id) {
5604 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5605 "Entity declaration doesn't start and stop in"
5606 " the same entity\n");
5608 NEXT;
5610 if (orig != NULL) {
5612 * Ugly mechanism to save the raw entity value.
5614 xmlEntityPtr cur = NULL;
5616 if (isParameter) {
5617 if ((ctxt->sax != NULL) &&
5618 (ctxt->sax->getParameterEntity != NULL))
5619 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5620 } else {
5621 if ((ctxt->sax != NULL) &&
5622 (ctxt->sax->getEntity != NULL))
5623 cur = ctxt->sax->getEntity(ctxt->userData, name);
5624 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5625 cur = xmlSAX2GetEntity(ctxt, name);
5628 if ((cur != NULL) && (cur->orig == NULL)) {
5629 cur->orig = orig;
5630 orig = NULL;
5634 done:
5635 if (value != NULL) xmlFree(value);
5636 if (URI != NULL) xmlFree(URI);
5637 if (literal != NULL) xmlFree(literal);
5638 if (orig != NULL) xmlFree(orig);
5643 * xmlParseDefaultDecl:
5644 * @ctxt: an XML parser context
5645 * @value: Receive a possible fixed default value for the attribute
5647 * Parse an attribute default declaration
5649 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5651 * [ VC: Required Attribute ]
5652 * if the default declaration is the keyword #REQUIRED, then the
5653 * attribute must be specified for all elements of the type in the
5654 * attribute-list declaration.
5656 * [ VC: Attribute Default Legal ]
5657 * The declared default value must meet the lexical constraints of
5658 * the declared attribute type c.f. xmlValidateAttributeDecl()
5660 * [ VC: Fixed Attribute Default ]
5661 * if an attribute has a default value declared with the #FIXED
5662 * keyword, instances of that attribute must match the default value.
5664 * [ WFC: No < in Attribute Values ]
5665 * handled in xmlParseAttValue()
5667 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5668 * or XML_ATTRIBUTE_FIXED.
5672 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5673 int val;
5674 xmlChar *ret;
5676 *value = NULL;
5677 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5678 SKIP(9);
5679 return(XML_ATTRIBUTE_REQUIRED);
5681 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5682 SKIP(8);
5683 return(XML_ATTRIBUTE_IMPLIED);
5685 val = XML_ATTRIBUTE_NONE;
5686 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5687 SKIP(6);
5688 val = XML_ATTRIBUTE_FIXED;
5689 if (SKIP_BLANKS == 0) {
5690 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5691 "Space required after '#FIXED'\n");
5694 ret = xmlParseAttValue(ctxt);
5695 ctxt->instate = XML_PARSER_DTD;
5696 if (ret == NULL) {
5697 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5698 "Attribute default value declaration error\n");
5699 } else
5700 *value = ret;
5701 return(val);
5705 * xmlParseNotationType:
5706 * @ctxt: an XML parser context
5708 * parse an Notation attribute type.
5710 * Note: the leading 'NOTATION' S part has already being parsed...
5712 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5714 * [ VC: Notation Attributes ]
5715 * Values of this type must match one of the notation names included
5716 * in the declaration; all notation names in the declaration must be declared.
5718 * Returns: the notation attribute tree built while parsing
5721 xmlEnumerationPtr
5722 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5723 const xmlChar *name;
5724 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5726 if (RAW != '(') {
5727 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5728 return(NULL);
5730 SHRINK;
5731 do {
5732 NEXT;
5733 SKIP_BLANKS;
5734 name = xmlParseName(ctxt);
5735 if (name == NULL) {
5736 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5737 "Name expected in NOTATION declaration\n");
5738 xmlFreeEnumeration(ret);
5739 return(NULL);
5741 tmp = ret;
5742 while (tmp != NULL) {
5743 if (xmlStrEqual(name, tmp->name)) {
5744 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5745 "standalone: attribute notation value token %s duplicated\n",
5746 name, NULL);
5747 if (!xmlDictOwns(ctxt->dict, name))
5748 xmlFree((xmlChar *) name);
5749 break;
5751 tmp = tmp->next;
5753 if (tmp == NULL) {
5754 cur = xmlCreateEnumeration(name);
5755 if (cur == NULL) {
5756 xmlFreeEnumeration(ret);
5757 return(NULL);
5759 if (last == NULL) ret = last = cur;
5760 else {
5761 last->next = cur;
5762 last = cur;
5765 SKIP_BLANKS;
5766 } while (RAW == '|');
5767 if (RAW != ')') {
5768 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5769 xmlFreeEnumeration(ret);
5770 return(NULL);
5772 NEXT;
5773 return(ret);
5777 * xmlParseEnumerationType:
5778 * @ctxt: an XML parser context
5780 * parse an Enumeration attribute type.
5782 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5784 * [ VC: Enumeration ]
5785 * Values of this type must match one of the Nmtoken tokens in
5786 * the declaration
5788 * Returns: the enumeration attribute tree built while parsing
5791 xmlEnumerationPtr
5792 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5793 xmlChar *name;
5794 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5796 if (RAW != '(') {
5797 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5798 return(NULL);
5800 SHRINK;
5801 do {
5802 NEXT;
5803 SKIP_BLANKS;
5804 name = xmlParseNmtoken(ctxt);
5805 if (name == NULL) {
5806 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5807 return(ret);
5809 tmp = ret;
5810 while (tmp != NULL) {
5811 if (xmlStrEqual(name, tmp->name)) {
5812 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5813 "standalone: attribute enumeration value token %s duplicated\n",
5814 name, NULL);
5815 if (!xmlDictOwns(ctxt->dict, name))
5816 xmlFree(name);
5817 break;
5819 tmp = tmp->next;
5821 if (tmp == NULL) {
5822 cur = xmlCreateEnumeration(name);
5823 if (!xmlDictOwns(ctxt->dict, name))
5824 xmlFree(name);
5825 if (cur == NULL) {
5826 xmlFreeEnumeration(ret);
5827 return(NULL);
5829 if (last == NULL) ret = last = cur;
5830 else {
5831 last->next = cur;
5832 last = cur;
5835 SKIP_BLANKS;
5836 } while (RAW == '|');
5837 if (RAW != ')') {
5838 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5839 return(ret);
5841 NEXT;
5842 return(ret);
5846 * xmlParseEnumeratedType:
5847 * @ctxt: an XML parser context
5848 * @tree: the enumeration tree built while parsing
5850 * parse an Enumerated attribute type.
5852 * [57] EnumeratedType ::= NotationType | Enumeration
5854 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5857 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5861 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5862 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5863 SKIP(8);
5864 if (SKIP_BLANKS == 0) {
5865 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5866 "Space required after 'NOTATION'\n");
5867 return(0);
5869 *tree = xmlParseNotationType(ctxt);
5870 if (*tree == NULL) return(0);
5871 return(XML_ATTRIBUTE_NOTATION);
5873 *tree = xmlParseEnumerationType(ctxt);
5874 if (*tree == NULL) return(0);
5875 return(XML_ATTRIBUTE_ENUMERATION);
5879 * xmlParseAttributeType:
5880 * @ctxt: an XML parser context
5881 * @tree: the enumeration tree built while parsing
5883 * parse the Attribute list def for an element
5885 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5887 * [55] StringType ::= 'CDATA'
5889 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5890 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5892 * Validity constraints for attribute values syntax are checked in
5893 * xmlValidateAttributeValue()
5895 * [ VC: ID ]
5896 * Values of type ID must match the Name production. A name must not
5897 * appear more than once in an XML document as a value of this type;
5898 * i.e., ID values must uniquely identify the elements which bear them.
5900 * [ VC: One ID per Element Type ]
5901 * No element type may have more than one ID attribute specified.
5903 * [ VC: ID Attribute Default ]
5904 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5906 * [ VC: IDREF ]
5907 * Values of type IDREF must match the Name production, and values
5908 * of type IDREFS must match Names; each IDREF Name must match the value
5909 * of an ID attribute on some element in the XML document; i.e. IDREF
5910 * values must match the value of some ID attribute.
5912 * [ VC: Entity Name ]
5913 * Values of type ENTITY must match the Name production, values
5914 * of type ENTITIES must match Names; each Entity Name must match the
5915 * name of an unparsed entity declared in the DTD.
5917 * [ VC: Name Token ]
5918 * Values of type NMTOKEN must match the Nmtoken production; values
5919 * of type NMTOKENS must match Nmtokens.
5921 * Returns the attribute type
5924 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5925 SHRINK;
5926 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5927 SKIP(5);
5928 return(XML_ATTRIBUTE_CDATA);
5929 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5930 SKIP(6);
5931 return(XML_ATTRIBUTE_IDREFS);
5932 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5933 SKIP(5);
5934 return(XML_ATTRIBUTE_IDREF);
5935 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5936 SKIP(2);
5937 return(XML_ATTRIBUTE_ID);
5938 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5939 SKIP(6);
5940 return(XML_ATTRIBUTE_ENTITY);
5941 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5942 SKIP(8);
5943 return(XML_ATTRIBUTE_ENTITIES);
5944 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5945 SKIP(8);
5946 return(XML_ATTRIBUTE_NMTOKENS);
5947 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5948 SKIP(7);
5949 return(XML_ATTRIBUTE_NMTOKEN);
5951 return(xmlParseEnumeratedType(ctxt, tree));
5955 * xmlParseAttributeListDecl:
5956 * @ctxt: an XML parser context
5958 * : parse the Attribute list def for an element
5960 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5962 * [53] AttDef ::= S Name S AttType S DefaultDecl
5965 void
5966 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5967 const xmlChar *elemName;
5968 const xmlChar *attrName;
5969 xmlEnumerationPtr tree;
5971 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5972 int inputid = ctxt->input->id;
5974 SKIP(9);
5975 if (SKIP_BLANKS == 0) {
5976 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5977 "Space required after '<!ATTLIST'\n");
5979 elemName = xmlParseName(ctxt);
5980 if (elemName == NULL) {
5981 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5982 "ATTLIST: no name for Element\n");
5983 return;
5985 SKIP_BLANKS;
5986 GROW;
5987 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5988 int type;
5989 int def;
5990 xmlChar *defaultValue = NULL;
5992 GROW;
5993 tree = NULL;
5994 attrName = xmlParseName(ctxt);
5995 if (attrName == NULL) {
5996 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5997 "ATTLIST: no name for Attribute\n");
5998 break;
6000 GROW;
6001 if (SKIP_BLANKS == 0) {
6002 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6003 "Space required after the attribute name\n");
6004 break;
6007 type = xmlParseAttributeType(ctxt, &tree);
6008 if (type <= 0) {
6009 break;
6012 GROW;
6013 if (SKIP_BLANKS == 0) {
6014 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6015 "Space required after the attribute type\n");
6016 if (tree != NULL)
6017 xmlFreeEnumeration(tree);
6018 break;
6021 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6022 if (def <= 0) {
6023 if (defaultValue != NULL)
6024 xmlFree(defaultValue);
6025 if (tree != NULL)
6026 xmlFreeEnumeration(tree);
6027 break;
6029 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6030 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6032 GROW;
6033 if (RAW != '>') {
6034 if (SKIP_BLANKS == 0) {
6035 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6036 "Space required after the attribute default value\n");
6037 if (defaultValue != NULL)
6038 xmlFree(defaultValue);
6039 if (tree != NULL)
6040 xmlFreeEnumeration(tree);
6041 break;
6044 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6045 (ctxt->sax->attributeDecl != NULL))
6046 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6047 type, def, defaultValue, tree);
6048 else if (tree != NULL)
6049 xmlFreeEnumeration(tree);
6051 if ((ctxt->sax2) && (defaultValue != NULL) &&
6052 (def != XML_ATTRIBUTE_IMPLIED) &&
6053 (def != XML_ATTRIBUTE_REQUIRED)) {
6054 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6056 if (ctxt->sax2) {
6057 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6059 if (defaultValue != NULL)
6060 xmlFree(defaultValue);
6061 GROW;
6063 if (RAW == '>') {
6064 if (inputid != ctxt->input->id) {
6065 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6066 "Attribute list declaration doesn't start and"
6067 " stop in the same entity\n");
6069 NEXT;
6075 * xmlParseElementMixedContentDecl:
6076 * @ctxt: an XML parser context
6077 * @inputchk: the input used for the current entity, needed for boundary checks
6079 * parse the declaration for a Mixed Element content
6080 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6082 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6083 * '(' S? '#PCDATA' S? ')'
6085 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6087 * [ VC: No Duplicate Types ]
6088 * The same name must not appear more than once in a single
6089 * mixed-content declaration.
6091 * returns: the list of the xmlElementContentPtr describing the element choices
6093 xmlElementContentPtr
6094 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6095 xmlElementContentPtr ret = NULL, cur = NULL, n;
6096 const xmlChar *elem = NULL;
6098 GROW;
6099 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6100 SKIP(7);
6101 SKIP_BLANKS;
6102 SHRINK;
6103 if (RAW == ')') {
6104 if (ctxt->input->id != inputchk) {
6105 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6106 "Element content declaration doesn't start and"
6107 " stop in the same entity\n");
6109 NEXT;
6110 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6111 if (ret == NULL)
6112 return(NULL);
6113 if (RAW == '*') {
6114 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6115 NEXT;
6117 return(ret);
6119 if ((RAW == '(') || (RAW == '|')) {
6120 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6121 if (ret == NULL) return(NULL);
6123 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6124 NEXT;
6125 if (elem == NULL) {
6126 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6127 if (ret == NULL) {
6128 xmlFreeDocElementContent(ctxt->myDoc, cur);
6129 return(NULL);
6131 ret->c1 = cur;
6132 if (cur != NULL)
6133 cur->parent = ret;
6134 cur = ret;
6135 } else {
6136 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6137 if (n == NULL) {
6138 xmlFreeDocElementContent(ctxt->myDoc, ret);
6139 return(NULL);
6141 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6142 if (n->c1 != NULL)
6143 n->c1->parent = n;
6144 cur->c2 = n;
6145 if (n != NULL)
6146 n->parent = cur;
6147 cur = n;
6149 SKIP_BLANKS;
6150 elem = xmlParseName(ctxt);
6151 if (elem == NULL) {
6152 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6153 "xmlParseElementMixedContentDecl : Name expected\n");
6154 xmlFreeDocElementContent(ctxt->myDoc, ret);
6155 return(NULL);
6157 SKIP_BLANKS;
6158 GROW;
6160 if ((RAW == ')') && (NXT(1) == '*')) {
6161 if (elem != NULL) {
6162 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6163 XML_ELEMENT_CONTENT_ELEMENT);
6164 if (cur->c2 != NULL)
6165 cur->c2->parent = cur;
6167 if (ret != NULL)
6168 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6169 if (ctxt->input->id != inputchk) {
6170 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6171 "Element content declaration doesn't start and"
6172 " stop in the same entity\n");
6174 SKIP(2);
6175 } else {
6176 xmlFreeDocElementContent(ctxt->myDoc, ret);
6177 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6178 return(NULL);
6181 } else {
6182 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6184 return(ret);
6188 * xmlParseElementChildrenContentDeclPriv:
6189 * @ctxt: an XML parser context
6190 * @inputchk: the input used for the current entity, needed for boundary checks
6191 * @depth: the level of recursion
6193 * parse the declaration for a Mixed Element content
6194 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6197 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6199 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6201 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6203 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6205 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6206 * TODO Parameter-entity replacement text must be properly nested
6207 * with parenthesized groups. That is to say, if either of the
6208 * opening or closing parentheses in a choice, seq, or Mixed
6209 * construct is contained in the replacement text for a parameter
6210 * entity, both must be contained in the same replacement text. For
6211 * interoperability, if a parameter-entity reference appears in a
6212 * choice, seq, or Mixed construct, its replacement text should not
6213 * be empty, and neither the first nor last non-blank character of
6214 * the replacement text should be a connector (| or ,).
6216 * Returns the tree of xmlElementContentPtr describing the element
6217 * hierarchy.
6219 static xmlElementContentPtr
6220 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6221 int depth) {
6222 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6223 const xmlChar *elem;
6224 xmlChar type = 0;
6226 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6227 (depth > 2048)) {
6228 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6229 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6230 depth);
6231 return(NULL);
6233 SKIP_BLANKS;
6234 GROW;
6235 if (RAW == '(') {
6236 int inputid = ctxt->input->id;
6238 /* Recurse on first child */
6239 NEXT;
6240 SKIP_BLANKS;
6241 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6242 depth + 1);
6243 if (cur == NULL)
6244 return(NULL);
6245 SKIP_BLANKS;
6246 GROW;
6247 } else {
6248 elem = xmlParseName(ctxt);
6249 if (elem == NULL) {
6250 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6251 return(NULL);
6253 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6254 if (cur == NULL) {
6255 xmlErrMemory(ctxt, NULL);
6256 return(NULL);
6258 GROW;
6259 if (RAW == '?') {
6260 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6261 NEXT;
6262 } else if (RAW == '*') {
6263 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6264 NEXT;
6265 } else if (RAW == '+') {
6266 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6267 NEXT;
6268 } else {
6269 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6271 GROW;
6273 SKIP_BLANKS;
6274 SHRINK;
6275 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6277 * Each loop we parse one separator and one element.
6279 if (RAW == ',') {
6280 if (type == 0) type = CUR;
6283 * Detect "Name | Name , Name" error
6285 else if (type != CUR) {
6286 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6287 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6288 type);
6289 if ((last != NULL) && (last != ret))
6290 xmlFreeDocElementContent(ctxt->myDoc, last);
6291 if (ret != NULL)
6292 xmlFreeDocElementContent(ctxt->myDoc, ret);
6293 return(NULL);
6295 NEXT;
6297 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6298 if (op == NULL) {
6299 if ((last != NULL) && (last != ret))
6300 xmlFreeDocElementContent(ctxt->myDoc, last);
6301 xmlFreeDocElementContent(ctxt->myDoc, ret);
6302 return(NULL);
6304 if (last == NULL) {
6305 op->c1 = ret;
6306 if (ret != NULL)
6307 ret->parent = op;
6308 ret = cur = op;
6309 } else {
6310 cur->c2 = op;
6311 if (op != NULL)
6312 op->parent = cur;
6313 op->c1 = last;
6314 if (last != NULL)
6315 last->parent = op;
6316 cur =op;
6317 last = NULL;
6319 } else if (RAW == '|') {
6320 if (type == 0) type = CUR;
6323 * Detect "Name , Name | Name" error
6325 else if (type != CUR) {
6326 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6327 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6328 type);
6329 if ((last != NULL) && (last != ret))
6330 xmlFreeDocElementContent(ctxt->myDoc, last);
6331 if (ret != NULL)
6332 xmlFreeDocElementContent(ctxt->myDoc, ret);
6333 return(NULL);
6335 NEXT;
6337 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6338 if (op == NULL) {
6339 if ((last != NULL) && (last != ret))
6340 xmlFreeDocElementContent(ctxt->myDoc, last);
6341 if (ret != NULL)
6342 xmlFreeDocElementContent(ctxt->myDoc, ret);
6343 return(NULL);
6345 if (last == NULL) {
6346 op->c1 = ret;
6347 if (ret != NULL)
6348 ret->parent = op;
6349 ret = cur = op;
6350 } else {
6351 cur->c2 = op;
6352 if (op != NULL)
6353 op->parent = cur;
6354 op->c1 = last;
6355 if (last != NULL)
6356 last->parent = op;
6357 cur =op;
6358 last = NULL;
6360 } else {
6361 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6362 if ((last != NULL) && (last != ret))
6363 xmlFreeDocElementContent(ctxt->myDoc, last);
6364 if (ret != NULL)
6365 xmlFreeDocElementContent(ctxt->myDoc, ret);
6366 return(NULL);
6368 GROW;
6369 SKIP_BLANKS;
6370 GROW;
6371 if (RAW == '(') {
6372 int inputid = ctxt->input->id;
6373 /* Recurse on second child */
6374 NEXT;
6375 SKIP_BLANKS;
6376 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6377 depth + 1);
6378 if (last == NULL) {
6379 if (ret != NULL)
6380 xmlFreeDocElementContent(ctxt->myDoc, ret);
6381 return(NULL);
6383 SKIP_BLANKS;
6384 } else {
6385 elem = xmlParseName(ctxt);
6386 if (elem == NULL) {
6387 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6388 if (ret != NULL)
6389 xmlFreeDocElementContent(ctxt->myDoc, ret);
6390 return(NULL);
6392 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6393 if (last == NULL) {
6394 if (ret != NULL)
6395 xmlFreeDocElementContent(ctxt->myDoc, ret);
6396 return(NULL);
6398 if (RAW == '?') {
6399 last->ocur = XML_ELEMENT_CONTENT_OPT;
6400 NEXT;
6401 } else if (RAW == '*') {
6402 last->ocur = XML_ELEMENT_CONTENT_MULT;
6403 NEXT;
6404 } else if (RAW == '+') {
6405 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6406 NEXT;
6407 } else {
6408 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6411 SKIP_BLANKS;
6412 GROW;
6414 if ((cur != NULL) && (last != NULL)) {
6415 cur->c2 = last;
6416 if (last != NULL)
6417 last->parent = cur;
6419 if (ctxt->input->id != inputchk) {
6420 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6421 "Element content declaration doesn't start and stop in"
6422 " the same entity\n");
6424 NEXT;
6425 if (RAW == '?') {
6426 if (ret != NULL) {
6427 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6428 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6429 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6430 else
6431 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6433 NEXT;
6434 } else if (RAW == '*') {
6435 if (ret != NULL) {
6436 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6437 cur = ret;
6439 * Some normalization:
6440 * (a | b* | c?)* == (a | b | c)*
6442 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6443 if ((cur->c1 != NULL) &&
6444 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6445 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6446 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6447 if ((cur->c2 != NULL) &&
6448 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6449 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6450 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6451 cur = cur->c2;
6454 NEXT;
6455 } else if (RAW == '+') {
6456 if (ret != NULL) {
6457 int found = 0;
6459 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6460 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6461 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6462 else
6463 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6465 * Some normalization:
6466 * (a | b*)+ == (a | b)*
6467 * (a | b?)+ == (a | b)*
6469 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6470 if ((cur->c1 != NULL) &&
6471 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6472 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6473 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6474 found = 1;
6476 if ((cur->c2 != NULL) &&
6477 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6478 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6479 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6480 found = 1;
6482 cur = cur->c2;
6484 if (found)
6485 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6487 NEXT;
6489 return(ret);
6493 * xmlParseElementChildrenContentDecl:
6494 * @ctxt: an XML parser context
6495 * @inputchk: the input used for the current entity, needed for boundary checks
6497 * parse the declaration for a Mixed Element content
6498 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6500 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6502 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6504 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6506 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6508 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6509 * TODO Parameter-entity replacement text must be properly nested
6510 * with parenthesized groups. That is to say, if either of the
6511 * opening or closing parentheses in a choice, seq, or Mixed
6512 * construct is contained in the replacement text for a parameter
6513 * entity, both must be contained in the same replacement text. For
6514 * interoperability, if a parameter-entity reference appears in a
6515 * choice, seq, or Mixed construct, its replacement text should not
6516 * be empty, and neither the first nor last non-blank character of
6517 * the replacement text should be a connector (| or ,).
6519 * Returns the tree of xmlElementContentPtr describing the element
6520 * hierarchy.
6522 xmlElementContentPtr
6523 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6524 /* stub left for API/ABI compat */
6525 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6529 * xmlParseElementContentDecl:
6530 * @ctxt: an XML parser context
6531 * @name: the name of the element being defined.
6532 * @result: the Element Content pointer will be stored here if any
6534 * parse the declaration for an Element content either Mixed or Children,
6535 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6537 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6539 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6543 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6544 xmlElementContentPtr *result) {
6546 xmlElementContentPtr tree = NULL;
6547 int inputid = ctxt->input->id;
6548 int res;
6550 *result = NULL;
6552 if (RAW != '(') {
6553 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6554 "xmlParseElementContentDecl : %s '(' expected\n", name);
6555 return(-1);
6557 NEXT;
6558 GROW;
6559 if (ctxt->instate == XML_PARSER_EOF)
6560 return(-1);
6561 SKIP_BLANKS;
6562 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6563 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6564 res = XML_ELEMENT_TYPE_MIXED;
6565 } else {
6566 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6567 res = XML_ELEMENT_TYPE_ELEMENT;
6569 SKIP_BLANKS;
6570 *result = tree;
6571 return(res);
6575 * xmlParseElementDecl:
6576 * @ctxt: an XML parser context
6578 * parse an Element declaration.
6580 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6582 * [ VC: Unique Element Type Declaration ]
6583 * No element type may be declared more than once
6585 * Returns the type of the element, or -1 in case of error
6588 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6589 const xmlChar *name;
6590 int ret = -1;
6591 xmlElementContentPtr content = NULL;
6593 /* GROW; done in the caller */
6594 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6595 int inputid = ctxt->input->id;
6597 SKIP(9);
6598 if (SKIP_BLANKS == 0) {
6599 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6600 "Space required after 'ELEMENT'\n");
6601 return(-1);
6603 name = xmlParseName(ctxt);
6604 if (name == NULL) {
6605 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6606 "xmlParseElementDecl: no name for Element\n");
6607 return(-1);
6609 if (SKIP_BLANKS == 0) {
6610 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6611 "Space required after the element name\n");
6613 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6614 SKIP(5);
6616 * Element must always be empty.
6618 ret = XML_ELEMENT_TYPE_EMPTY;
6619 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6620 (NXT(2) == 'Y')) {
6621 SKIP(3);
6623 * Element is a generic container.
6625 ret = XML_ELEMENT_TYPE_ANY;
6626 } else if (RAW == '(') {
6627 ret = xmlParseElementContentDecl(ctxt, name, &content);
6628 } else {
6630 * [ WFC: PEs in Internal Subset ] error handling.
6632 if ((RAW == '%') && (ctxt->external == 0) &&
6633 (ctxt->inputNr == 1)) {
6634 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6635 "PEReference: forbidden within markup decl in internal subset\n");
6636 } else {
6637 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6638 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6640 return(-1);
6643 SKIP_BLANKS;
6645 if (RAW != '>') {
6646 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6647 if (content != NULL) {
6648 xmlFreeDocElementContent(ctxt->myDoc, content);
6650 } else {
6651 if (inputid != ctxt->input->id) {
6652 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6653 "Element declaration doesn't start and stop in"
6654 " the same entity\n");
6657 NEXT;
6658 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6659 (ctxt->sax->elementDecl != NULL)) {
6660 if (content != NULL)
6661 content->parent = NULL;
6662 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6663 content);
6664 if ((content != NULL) && (content->parent == NULL)) {
6666 * this is a trick: if xmlAddElementDecl is called,
6667 * instead of copying the full tree it is plugged directly
6668 * if called from the parser. Avoid duplicating the
6669 * interfaces or change the API/ABI
6671 xmlFreeDocElementContent(ctxt->myDoc, content);
6673 } else if (content != NULL) {
6674 xmlFreeDocElementContent(ctxt->myDoc, content);
6678 return(ret);
6682 * xmlParseConditionalSections
6683 * @ctxt: an XML parser context
6685 * [61] conditionalSect ::= includeSect | ignoreSect
6686 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6687 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6688 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6689 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6692 static void
6693 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6694 int *inputIds = NULL;
6695 size_t inputIdsSize = 0;
6696 size_t depth = 0;
6698 while (ctxt->instate != XML_PARSER_EOF) {
6699 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6700 int id = ctxt->input->id;
6702 SKIP(3);
6703 SKIP_BLANKS;
6705 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6706 SKIP(7);
6707 SKIP_BLANKS;
6708 if (RAW != '[') {
6709 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6710 xmlHaltParser(ctxt);
6711 goto error;
6713 if (ctxt->input->id != id) {
6714 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6715 "All markup of the conditional section is"
6716 " not in the same entity\n");
6718 NEXT;
6720 if (inputIdsSize <= depth) {
6721 int *tmp;
6723 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6724 tmp = (int *) xmlRealloc(inputIds,
6725 inputIdsSize * sizeof(int));
6726 if (tmp == NULL) {
6727 xmlErrMemory(ctxt, NULL);
6728 goto error;
6730 inputIds = tmp;
6732 inputIds[depth] = id;
6733 depth++;
6734 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6735 int state;
6736 xmlParserInputState instate;
6737 size_t ignoreDepth = 0;
6739 SKIP(6);
6740 SKIP_BLANKS;
6741 if (RAW != '[') {
6742 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6743 xmlHaltParser(ctxt);
6744 goto error;
6746 if (ctxt->input->id != id) {
6747 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6748 "All markup of the conditional section is"
6749 " not in the same entity\n");
6751 NEXT;
6754 * Parse up to the end of the conditional section but disable
6755 * SAX event generating DTD building in the meantime
6757 state = ctxt->disableSAX;
6758 instate = ctxt->instate;
6759 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6760 ctxt->instate = XML_PARSER_IGNORE;
6762 while (RAW != 0) {
6763 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6764 SKIP(3);
6765 ignoreDepth++;
6766 /* Check for integer overflow */
6767 if (ignoreDepth == 0) {
6768 xmlErrMemory(ctxt, NULL);
6769 goto error;
6771 } else if ((RAW == ']') && (NXT(1) == ']') &&
6772 (NXT(2) == '>')) {
6773 if (ignoreDepth == 0)
6774 break;
6775 SKIP(3);
6776 ignoreDepth--;
6777 } else {
6778 NEXT;
6782 ctxt->disableSAX = state;
6783 ctxt->instate = instate;
6785 if (RAW == 0) {
6786 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6787 goto error;
6789 if (ctxt->input->id != id) {
6790 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6791 "All markup of the conditional section is"
6792 " not in the same entity\n");
6794 SKIP(3);
6795 } else {
6796 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6797 xmlHaltParser(ctxt);
6798 goto error;
6800 } else if ((depth > 0) &&
6801 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6802 depth--;
6803 if (ctxt->input->id != inputIds[depth]) {
6804 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6805 "All markup of the conditional section is not"
6806 " in the same entity\n");
6808 SKIP(3);
6809 } else {
6810 int id = ctxt->input->id;
6811 unsigned long cons = CUR_CONSUMED;
6813 xmlParseMarkupDecl(ctxt);
6815 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6816 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6817 xmlHaltParser(ctxt);
6818 goto error;
6822 if (depth == 0)
6823 break;
6825 SKIP_BLANKS;
6826 GROW;
6829 error:
6830 xmlFree(inputIds);
6834 * xmlParseMarkupDecl:
6835 * @ctxt: an XML parser context
6837 * parse Markup declarations
6839 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6840 * NotationDecl | PI | Comment
6842 * [ VC: Proper Declaration/PE Nesting ]
6843 * Parameter-entity replacement text must be properly nested with
6844 * markup declarations. That is to say, if either the first character
6845 * or the last character of a markup declaration (markupdecl above) is
6846 * contained in the replacement text for a parameter-entity reference,
6847 * both must be contained in the same replacement text.
6849 * [ WFC: PEs in Internal Subset ]
6850 * In the internal DTD subset, parameter-entity references can occur
6851 * only where markup declarations can occur, not within markup declarations.
6852 * (This does not apply to references that occur in external parameter
6853 * entities or to the external subset.)
6855 void
6856 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6857 GROW;
6858 if (CUR == '<') {
6859 if (NXT(1) == '!') {
6860 switch (NXT(2)) {
6861 case 'E':
6862 if (NXT(3) == 'L')
6863 xmlParseElementDecl(ctxt);
6864 else if (NXT(3) == 'N')
6865 xmlParseEntityDecl(ctxt);
6866 break;
6867 case 'A':
6868 xmlParseAttributeListDecl(ctxt);
6869 break;
6870 case 'N':
6871 xmlParseNotationDecl(ctxt);
6872 break;
6873 case '-':
6874 xmlParseComment(ctxt);
6875 break;
6876 default:
6877 /* there is an error but it will be detected later */
6878 break;
6880 } else if (NXT(1) == '?') {
6881 xmlParsePI(ctxt);
6886 * detect requirement to exit there and act accordingly
6887 * and avoid having instate overridden later on
6889 if (ctxt->instate == XML_PARSER_EOF)
6890 return;
6892 ctxt->instate = XML_PARSER_DTD;
6896 * xmlParseTextDecl:
6897 * @ctxt: an XML parser context
6899 * parse an XML declaration header for external entities
6901 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6904 void
6905 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6906 xmlChar *version;
6907 const xmlChar *encoding;
6908 int oldstate;
6911 * We know that '<?xml' is here.
6913 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6914 SKIP(5);
6915 } else {
6916 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6917 return;
6920 /* Avoid expansion of parameter entities when skipping blanks. */
6921 oldstate = ctxt->instate;
6922 ctxt->instate = XML_PARSER_START;
6924 if (SKIP_BLANKS == 0) {
6925 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6926 "Space needed after '<?xml'\n");
6930 * We may have the VersionInfo here.
6932 version = xmlParseVersionInfo(ctxt);
6933 if (version == NULL)
6934 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6935 else {
6936 if (SKIP_BLANKS == 0) {
6937 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6938 "Space needed here\n");
6941 ctxt->input->version = version;
6944 * We must have the encoding declaration
6946 encoding = xmlParseEncodingDecl(ctxt);
6947 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6949 * The XML REC instructs us to stop parsing right here
6951 ctxt->instate = oldstate;
6952 return;
6954 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6955 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6956 "Missing encoding in text declaration\n");
6959 SKIP_BLANKS;
6960 if ((RAW == '?') && (NXT(1) == '>')) {
6961 SKIP(2);
6962 } else if (RAW == '>') {
6963 /* Deprecated old WD ... */
6964 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6965 NEXT;
6966 } else {
6967 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6968 MOVETO_ENDTAG(CUR_PTR);
6969 NEXT;
6972 ctxt->instate = oldstate;
6976 * xmlParseExternalSubset:
6977 * @ctxt: an XML parser context
6978 * @ExternalID: the external identifier
6979 * @SystemID: the system identifier (or URL)
6981 * parse Markup declarations from an external subset
6983 * [30] extSubset ::= textDecl? extSubsetDecl
6985 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6987 void
6988 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6989 const xmlChar *SystemID) {
6990 xmlDetectSAX2(ctxt);
6991 GROW;
6993 if ((ctxt->encoding == NULL) &&
6994 (ctxt->input->end - ctxt->input->cur >= 4)) {
6995 xmlChar start[4];
6996 xmlCharEncoding enc;
6998 start[0] = RAW;
6999 start[1] = NXT(1);
7000 start[2] = NXT(2);
7001 start[3] = NXT(3);
7002 enc = xmlDetectCharEncoding(start, 4);
7003 if (enc != XML_CHAR_ENCODING_NONE)
7004 xmlSwitchEncoding(ctxt, enc);
7007 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7008 xmlParseTextDecl(ctxt);
7009 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7011 * The XML REC instructs us to stop parsing right here
7013 xmlHaltParser(ctxt);
7014 return;
7017 if (ctxt->myDoc == NULL) {
7018 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7019 if (ctxt->myDoc == NULL) {
7020 xmlErrMemory(ctxt, "New Doc failed");
7021 return;
7023 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7025 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7026 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7028 ctxt->instate = XML_PARSER_DTD;
7029 ctxt->external = 1;
7030 SKIP_BLANKS;
7031 while (((RAW == '<') && (NXT(1) == '?')) ||
7032 ((RAW == '<') && (NXT(1) == '!')) ||
7033 (RAW == '%')) {
7034 int id = ctxt->input->id;
7035 unsigned long cons = CUR_CONSUMED;
7037 GROW;
7038 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7039 xmlParseConditionalSections(ctxt);
7040 } else
7041 xmlParseMarkupDecl(ctxt);
7042 SKIP_BLANKS;
7044 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7045 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7046 break;
7050 if (RAW != 0) {
7051 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7057 * xmlParseReference:
7058 * @ctxt: an XML parser context
7060 * parse and handle entity references in content, depending on the SAX
7061 * interface, this may end-up in a call to character() if this is a
7062 * CharRef, a predefined entity, if there is no reference() callback.
7063 * or if the parser was asked to switch to that mode.
7065 * [67] Reference ::= EntityRef | CharRef
7067 void
7068 xmlParseReference(xmlParserCtxtPtr ctxt) {
7069 xmlEntityPtr ent;
7070 xmlChar *val;
7071 int was_checked;
7072 xmlNodePtr list = NULL;
7073 xmlParserErrors ret = XML_ERR_OK;
7076 if (RAW != '&')
7077 return;
7080 * Simple case of a CharRef
7082 if (NXT(1) == '#') {
7083 int i = 0;
7084 xmlChar out[16];
7085 int hex = NXT(2);
7086 int value = xmlParseCharRef(ctxt);
7088 if (value == 0)
7089 return;
7090 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7092 * So we are using non-UTF-8 buffers
7093 * Check that the char fit on 8bits, if not
7094 * generate a CharRef.
7096 if (value <= 0xFF) {
7097 out[0] = value;
7098 out[1] = 0;
7099 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7100 (!ctxt->disableSAX))
7101 ctxt->sax->characters(ctxt->userData, out, 1);
7102 } else {
7103 if ((hex == 'x') || (hex == 'X'))
7104 snprintf((char *)out, sizeof(out), "#x%X", value);
7105 else
7106 snprintf((char *)out, sizeof(out), "#%d", value);
7107 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7108 (!ctxt->disableSAX))
7109 ctxt->sax->reference(ctxt->userData, out);
7111 } else {
7113 * Just encode the value in UTF-8
7115 COPY_BUF(0 ,out, i, value);
7116 out[i] = 0;
7117 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7118 (!ctxt->disableSAX))
7119 ctxt->sax->characters(ctxt->userData, out, i);
7121 return;
7125 * We are seeing an entity reference
7127 ent = xmlParseEntityRef(ctxt);
7128 if (ent == NULL) return;
7129 if (!ctxt->wellFormed)
7130 return;
7131 was_checked = ent->checked;
7133 /* special case of predefined entities */
7134 if ((ent->name == NULL) ||
7135 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7136 val = ent->content;
7137 if (val == NULL) return;
7139 * inline the entity.
7141 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7142 (!ctxt->disableSAX))
7143 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7144 return;
7148 * The first reference to the entity trigger a parsing phase
7149 * where the ent->children is filled with the result from
7150 * the parsing.
7151 * Note: external parsed entities will not be loaded, it is not
7152 * required for a non-validating parser, unless the parsing option
7153 * of validating, or substituting entities were given. Doing so is
7154 * far more secure as the parser will only process data coming from
7155 * the document entity by default.
7157 if (((ent->checked == 0) ||
7158 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7159 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7160 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7161 unsigned long oldnbent = ctxt->nbentities, diff;
7164 * This is a bit hackish but this seems the best
7165 * way to make sure both SAX and DOM entity support
7166 * behaves okay.
7168 void *user_data;
7169 if (ctxt->userData == ctxt)
7170 user_data = NULL;
7171 else
7172 user_data = ctxt->userData;
7175 * Check that this entity is well formed
7176 * 4.3.2: An internal general parsed entity is well-formed
7177 * if its replacement text matches the production labeled
7178 * content.
7180 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7181 ctxt->depth++;
7182 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7183 user_data, &list);
7184 ctxt->depth--;
7186 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7187 ctxt->depth++;
7188 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7189 user_data, ctxt->depth, ent->URI,
7190 ent->ExternalID, &list);
7191 ctxt->depth--;
7192 } else {
7193 ret = XML_ERR_ENTITY_PE_INTERNAL;
7194 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7195 "invalid entity type found\n", NULL);
7199 * Store the number of entities needing parsing for this entity
7200 * content and do checkings
7202 diff = ctxt->nbentities - oldnbent + 1;
7203 if (diff > INT_MAX / 2)
7204 diff = INT_MAX / 2;
7205 ent->checked = diff * 2;
7206 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7207 ent->checked |= 1;
7208 if (ret == XML_ERR_ENTITY_LOOP) {
7209 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7210 xmlHaltParser(ctxt);
7211 xmlFreeNodeList(list);
7212 return;
7214 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7215 xmlFreeNodeList(list);
7216 return;
7219 if ((ret == XML_ERR_OK) && (list != NULL)) {
7220 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7221 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7222 (ent->children == NULL)) {
7223 ent->children = list;
7225 * Prune it directly in the generated document
7226 * except for single text nodes.
7228 if ((ctxt->replaceEntities == 0) ||
7229 (ctxt->parseMode == XML_PARSE_READER) ||
7230 ((list->type == XML_TEXT_NODE) &&
7231 (list->next == NULL))) {
7232 ent->owner = 1;
7233 while (list != NULL) {
7234 list->parent = (xmlNodePtr) ent;
7235 if (list->doc != ent->doc)
7236 xmlSetTreeDoc(list, ent->doc);
7237 if (list->next == NULL)
7238 ent->last = list;
7239 list = list->next;
7241 list = NULL;
7242 } else {
7243 ent->owner = 0;
7244 while (list != NULL) {
7245 list->parent = (xmlNodePtr) ctxt->node;
7246 list->doc = ctxt->myDoc;
7247 if (list->next == NULL)
7248 ent->last = list;
7249 list = list->next;
7251 list = ent->children;
7252 #ifdef LIBXML_LEGACY_ENABLED
7253 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7254 xmlAddEntityReference(ent, list, NULL);
7255 #endif /* LIBXML_LEGACY_ENABLED */
7257 } else {
7258 xmlFreeNodeList(list);
7259 list = NULL;
7261 } else if ((ret != XML_ERR_OK) &&
7262 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7263 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7264 "Entity '%s' failed to parse\n", ent->name);
7265 if (ent->content != NULL)
7266 ent->content[0] = 0;
7267 xmlParserEntityCheck(ctxt, 0, ent, 0);
7268 } else if (list != NULL) {
7269 xmlFreeNodeList(list);
7270 list = NULL;
7272 if (ent->checked == 0)
7273 ent->checked = 2;
7275 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7276 was_checked = 0;
7277 } else if (ent->checked != 1) {
7278 ctxt->nbentities += ent->checked / 2;
7282 * Now that the entity content has been gathered
7283 * provide it to the application, this can take different forms based
7284 * on the parsing modes.
7286 if (ent->children == NULL) {
7288 * Probably running in SAX mode and the callbacks don't
7289 * build the entity content. So unless we already went
7290 * though parsing for first checking go though the entity
7291 * content to generate callbacks associated to the entity
7293 if (was_checked != 0) {
7294 void *user_data;
7296 * This is a bit hackish but this seems the best
7297 * way to make sure both SAX and DOM entity support
7298 * behaves okay.
7300 if (ctxt->userData == ctxt)
7301 user_data = NULL;
7302 else
7303 user_data = ctxt->userData;
7305 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7306 ctxt->depth++;
7307 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7308 ent->content, user_data, NULL);
7309 ctxt->depth--;
7310 } else if (ent->etype ==
7311 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7312 ctxt->depth++;
7313 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7314 ctxt->sax, user_data, ctxt->depth,
7315 ent->URI, ent->ExternalID, NULL);
7316 ctxt->depth--;
7317 } else {
7318 ret = XML_ERR_ENTITY_PE_INTERNAL;
7319 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7320 "invalid entity type found\n", NULL);
7322 if (ret == XML_ERR_ENTITY_LOOP) {
7323 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7324 return;
7327 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7328 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7330 * Entity reference callback comes second, it's somewhat
7331 * superfluous but a compatibility to historical behaviour
7333 ctxt->sax->reference(ctxt->userData, ent->name);
7335 return;
7339 * If we didn't get any children for the entity being built
7341 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7342 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7344 * Create a node.
7346 ctxt->sax->reference(ctxt->userData, ent->name);
7347 return;
7350 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7352 * There is a problem on the handling of _private for entities
7353 * (bug 155816): Should we copy the content of the field from
7354 * the entity (possibly overwriting some value set by the user
7355 * when a copy is created), should we leave it alone, or should
7356 * we try to take care of different situations? The problem
7357 * is exacerbated by the usage of this field by the xmlReader.
7358 * To fix this bug, we look at _private on the created node
7359 * and, if it's NULL, we copy in whatever was in the entity.
7360 * If it's not NULL we leave it alone. This is somewhat of a
7361 * hack - maybe we should have further tests to determine
7362 * what to do.
7364 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7366 * Seems we are generating the DOM content, do
7367 * a simple tree copy for all references except the first
7368 * In the first occurrence list contains the replacement.
7370 if (((list == NULL) && (ent->owner == 0)) ||
7371 (ctxt->parseMode == XML_PARSE_READER)) {
7372 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7375 * We are copying here, make sure there is no abuse
7377 ctxt->sizeentcopy += ent->length + 5;
7378 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7379 return;
7382 * when operating on a reader, the entities definitions
7383 * are always owning the entities subtree.
7384 if (ctxt->parseMode == XML_PARSE_READER)
7385 ent->owner = 1;
7388 cur = ent->children;
7389 while (cur != NULL) {
7390 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7391 if (nw != NULL) {
7392 if (nw->_private == NULL)
7393 nw->_private = cur->_private;
7394 if (firstChild == NULL){
7395 firstChild = nw;
7397 nw = xmlAddChild(ctxt->node, nw);
7399 if (cur == ent->last) {
7401 * needed to detect some strange empty
7402 * node cases in the reader tests
7404 if ((ctxt->parseMode == XML_PARSE_READER) &&
7405 (nw != NULL) &&
7406 (nw->type == XML_ELEMENT_NODE) &&
7407 (nw->children == NULL))
7408 nw->extra = 1;
7410 break;
7412 cur = cur->next;
7414 #ifdef LIBXML_LEGACY_ENABLED
7415 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7416 xmlAddEntityReference(ent, firstChild, nw);
7417 #endif /* LIBXML_LEGACY_ENABLED */
7418 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7419 xmlNodePtr nw = NULL, cur, next, last,
7420 firstChild = NULL;
7423 * We are copying here, make sure there is no abuse
7425 ctxt->sizeentcopy += ent->length + 5;
7426 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7427 return;
7430 * Copy the entity child list and make it the new
7431 * entity child list. The goal is to make sure any
7432 * ID or REF referenced will be the one from the
7433 * document content and not the entity copy.
7435 cur = ent->children;
7436 ent->children = NULL;
7437 last = ent->last;
7438 ent->last = NULL;
7439 while (cur != NULL) {
7440 next = cur->next;
7441 cur->next = NULL;
7442 cur->parent = NULL;
7443 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7444 if (nw != NULL) {
7445 if (nw->_private == NULL)
7446 nw->_private = cur->_private;
7447 if (firstChild == NULL){
7448 firstChild = cur;
7450 xmlAddChild((xmlNodePtr) ent, nw);
7451 xmlAddChild(ctxt->node, cur);
7453 if (cur == last)
7454 break;
7455 cur = next;
7457 if (ent->owner == 0)
7458 ent->owner = 1;
7459 #ifdef LIBXML_LEGACY_ENABLED
7460 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7461 xmlAddEntityReference(ent, firstChild, nw);
7462 #endif /* LIBXML_LEGACY_ENABLED */
7463 } else {
7464 const xmlChar *nbktext;
7467 * the name change is to avoid coalescing of the
7468 * node with a possible previous text one which
7469 * would make ent->children a dangling pointer
7471 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7472 -1);
7473 if (ent->children->type == XML_TEXT_NODE)
7474 ent->children->name = nbktext;
7475 if ((ent->last != ent->children) &&
7476 (ent->last->type == XML_TEXT_NODE))
7477 ent->last->name = nbktext;
7478 xmlAddChildList(ctxt->node, ent->children);
7482 * This is to avoid a nasty side effect, see
7483 * characters() in SAX.c
7485 ctxt->nodemem = 0;
7486 ctxt->nodelen = 0;
7487 return;
7493 * xmlParseEntityRef:
7494 * @ctxt: an XML parser context
7496 * parse ENTITY references declarations
7498 * [68] EntityRef ::= '&' Name ';'
7500 * [ WFC: Entity Declared ]
7501 * In a document without any DTD, a document with only an internal DTD
7502 * subset which contains no parameter entity references, or a document
7503 * with "standalone='yes'", the Name given in the entity reference
7504 * must match that in an entity declaration, except that well-formed
7505 * documents need not declare any of the following entities: amp, lt,
7506 * gt, apos, quot. The declaration of a parameter entity must precede
7507 * any reference to it. Similarly, the declaration of a general entity
7508 * must precede any reference to it which appears in a default value in an
7509 * attribute-list declaration. Note that if entities are declared in the
7510 * external subset or in external parameter entities, a non-validating
7511 * processor is not obligated to read and process their declarations;
7512 * for such documents, the rule that an entity must be declared is a
7513 * well-formedness constraint only if standalone='yes'.
7515 * [ WFC: Parsed Entity ]
7516 * An entity reference must not contain the name of an unparsed entity
7518 * Returns the xmlEntityPtr if found, or NULL otherwise.
7520 xmlEntityPtr
7521 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7522 const xmlChar *name;
7523 xmlEntityPtr ent = NULL;
7525 GROW;
7526 if (ctxt->instate == XML_PARSER_EOF)
7527 return(NULL);
7529 if (RAW != '&')
7530 return(NULL);
7531 NEXT;
7532 name = xmlParseName(ctxt);
7533 if (name == NULL) {
7534 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7535 "xmlParseEntityRef: no name\n");
7536 return(NULL);
7538 if (RAW != ';') {
7539 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7540 return(NULL);
7542 NEXT;
7545 * Predefined entities override any extra definition
7547 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7548 ent = xmlGetPredefinedEntity(name);
7549 if (ent != NULL)
7550 return(ent);
7554 * Increase the number of entity references parsed
7556 ctxt->nbentities++;
7559 * Ask first SAX for entity resolution, otherwise try the
7560 * entities which may have stored in the parser context.
7562 if (ctxt->sax != NULL) {
7563 if (ctxt->sax->getEntity != NULL)
7564 ent = ctxt->sax->getEntity(ctxt->userData, name);
7565 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7566 (ctxt->options & XML_PARSE_OLDSAX))
7567 ent = xmlGetPredefinedEntity(name);
7568 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7569 (ctxt->userData==ctxt)) {
7570 ent = xmlSAX2GetEntity(ctxt, name);
7573 if (ctxt->instate == XML_PARSER_EOF)
7574 return(NULL);
7576 * [ WFC: Entity Declared ]
7577 * In a document without any DTD, a document with only an
7578 * internal DTD subset which contains no parameter entity
7579 * references, or a document with "standalone='yes'", the
7580 * Name given in the entity reference must match that in an
7581 * entity declaration, except that well-formed documents
7582 * need not declare any of the following entities: amp, lt,
7583 * gt, apos, quot.
7584 * The declaration of a parameter entity must precede any
7585 * reference to it.
7586 * Similarly, the declaration of a general entity must
7587 * precede any reference to it which appears in a default
7588 * value in an attribute-list declaration. Note that if
7589 * entities are declared in the external subset or in
7590 * external parameter entities, a non-validating processor
7591 * is not obligated to read and process their declarations;
7592 * for such documents, the rule that an entity must be
7593 * declared is a well-formedness constraint only if
7594 * standalone='yes'.
7596 if (ent == NULL) {
7597 if ((ctxt->standalone == 1) ||
7598 ((ctxt->hasExternalSubset == 0) &&
7599 (ctxt->hasPErefs == 0))) {
7600 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7601 "Entity '%s' not defined\n", name);
7602 } else {
7603 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7604 "Entity '%s' not defined\n", name);
7605 if ((ctxt->inSubset == 0) &&
7606 (ctxt->sax != NULL) &&
7607 (ctxt->sax->reference != NULL)) {
7608 ctxt->sax->reference(ctxt->userData, name);
7611 xmlParserEntityCheck(ctxt, 0, ent, 0);
7612 ctxt->valid = 0;
7616 * [ WFC: Parsed Entity ]
7617 * An entity reference must not contain the name of an
7618 * unparsed entity
7620 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7621 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7622 "Entity reference to unparsed entity %s\n", name);
7626 * [ WFC: No External Entity References ]
7627 * Attribute values cannot contain direct or indirect
7628 * entity references to external entities.
7630 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7631 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7632 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7633 "Attribute references external entity '%s'\n", name);
7636 * [ WFC: No < in Attribute Values ]
7637 * The replacement text of any entity referred to directly or
7638 * indirectly in an attribute value (other than "&lt;") must
7639 * not contain a <.
7641 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7642 (ent != NULL) &&
7643 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7644 if (((ent->checked & 1) || (ent->checked == 0)) &&
7645 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7646 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7647 "'<' in entity '%s' is not allowed in attributes values\n", name);
7652 * Internal check, no parameter entities here ...
7654 else {
7655 switch (ent->etype) {
7656 case XML_INTERNAL_PARAMETER_ENTITY:
7657 case XML_EXTERNAL_PARAMETER_ENTITY:
7658 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7659 "Attempt to reference the parameter entity '%s'\n",
7660 name);
7661 break;
7662 default:
7663 break;
7668 * [ WFC: No Recursion ]
7669 * A parsed entity must not contain a recursive reference
7670 * to itself, either directly or indirectly.
7671 * Done somewhere else
7673 return(ent);
7677 * xmlParseStringEntityRef:
7678 * @ctxt: an XML parser context
7679 * @str: a pointer to an index in the string
7681 * parse ENTITY references declarations, but this version parses it from
7682 * a string value.
7684 * [68] EntityRef ::= '&' Name ';'
7686 * [ WFC: Entity Declared ]
7687 * In a document without any DTD, a document with only an internal DTD
7688 * subset which contains no parameter entity references, or a document
7689 * with "standalone='yes'", the Name given in the entity reference
7690 * must match that in an entity declaration, except that well-formed
7691 * documents need not declare any of the following entities: amp, lt,
7692 * gt, apos, quot. The declaration of a parameter entity must precede
7693 * any reference to it. Similarly, the declaration of a general entity
7694 * must precede any reference to it which appears in a default value in an
7695 * attribute-list declaration. Note that if entities are declared in the
7696 * external subset or in external parameter entities, a non-validating
7697 * processor is not obligated to read and process their declarations;
7698 * for such documents, the rule that an entity must be declared is a
7699 * well-formedness constraint only if standalone='yes'.
7701 * [ WFC: Parsed Entity ]
7702 * An entity reference must not contain the name of an unparsed entity
7704 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7705 * is updated to the current location in the string.
7707 static xmlEntityPtr
7708 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7709 xmlChar *name;
7710 const xmlChar *ptr;
7711 xmlChar cur;
7712 xmlEntityPtr ent = NULL;
7714 if ((str == NULL) || (*str == NULL))
7715 return(NULL);
7716 ptr = *str;
7717 cur = *ptr;
7718 if (cur != '&')
7719 return(NULL);
7721 ptr++;
7722 name = xmlParseStringName(ctxt, &ptr);
7723 if (name == NULL) {
7724 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7725 "xmlParseStringEntityRef: no name\n");
7726 *str = ptr;
7727 return(NULL);
7729 if (*ptr != ';') {
7730 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7731 xmlFree(name);
7732 *str = ptr;
7733 return(NULL);
7735 ptr++;
7739 * Predefined entities override any extra definition
7741 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7742 ent = xmlGetPredefinedEntity(name);
7743 if (ent != NULL) {
7744 xmlFree(name);
7745 *str = ptr;
7746 return(ent);
7751 * Increase the number of entity references parsed
7753 ctxt->nbentities++;
7756 * Ask first SAX for entity resolution, otherwise try the
7757 * entities which may have stored in the parser context.
7759 if (ctxt->sax != NULL) {
7760 if (ctxt->sax->getEntity != NULL)
7761 ent = ctxt->sax->getEntity(ctxt->userData, name);
7762 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7763 ent = xmlGetPredefinedEntity(name);
7764 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7765 ent = xmlSAX2GetEntity(ctxt, name);
7768 if (ctxt->instate == XML_PARSER_EOF) {
7769 xmlFree(name);
7770 return(NULL);
7774 * [ WFC: Entity Declared ]
7775 * In a document without any DTD, a document with only an
7776 * internal DTD subset which contains no parameter entity
7777 * references, or a document with "standalone='yes'", the
7778 * Name given in the entity reference must match that in an
7779 * entity declaration, except that well-formed documents
7780 * need not declare any of the following entities: amp, lt,
7781 * gt, apos, quot.
7782 * The declaration of a parameter entity must precede any
7783 * reference to it.
7784 * Similarly, the declaration of a general entity must
7785 * precede any reference to it which appears in a default
7786 * value in an attribute-list declaration. Note that if
7787 * entities are declared in the external subset or in
7788 * external parameter entities, a non-validating processor
7789 * is not obligated to read and process their declarations;
7790 * for such documents, the rule that an entity must be
7791 * declared is a well-formedness constraint only if
7792 * standalone='yes'.
7794 if (ent == NULL) {
7795 if ((ctxt->standalone == 1) ||
7796 ((ctxt->hasExternalSubset == 0) &&
7797 (ctxt->hasPErefs == 0))) {
7798 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7799 "Entity '%s' not defined\n", name);
7800 } else {
7801 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7802 "Entity '%s' not defined\n",
7803 name);
7805 xmlParserEntityCheck(ctxt, 0, ent, 0);
7806 /* TODO ? check regressions ctxt->valid = 0; */
7810 * [ WFC: Parsed Entity ]
7811 * An entity reference must not contain the name of an
7812 * unparsed entity
7814 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7815 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7816 "Entity reference to unparsed entity %s\n", name);
7820 * [ WFC: No External Entity References ]
7821 * Attribute values cannot contain direct or indirect
7822 * entity references to external entities.
7824 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7825 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7826 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7827 "Attribute references external entity '%s'\n", name);
7830 * [ WFC: No < in Attribute Values ]
7831 * The replacement text of any entity referred to directly or
7832 * indirectly in an attribute value (other than "&lt;") must
7833 * not contain a <.
7835 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7836 (ent != NULL) && (ent->content != NULL) &&
7837 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7838 (xmlStrchr(ent->content, '<'))) {
7839 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7840 "'<' in entity '%s' is not allowed in attributes values\n",
7841 name);
7845 * Internal check, no parameter entities here ...
7847 else {
7848 switch (ent->etype) {
7849 case XML_INTERNAL_PARAMETER_ENTITY:
7850 case XML_EXTERNAL_PARAMETER_ENTITY:
7851 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7852 "Attempt to reference the parameter entity '%s'\n",
7853 name);
7854 break;
7855 default:
7856 break;
7861 * [ WFC: No Recursion ]
7862 * A parsed entity must not contain a recursive reference
7863 * to itself, either directly or indirectly.
7864 * Done somewhere else
7867 xmlFree(name);
7868 *str = ptr;
7869 return(ent);
7873 * xmlParsePEReference:
7874 * @ctxt: an XML parser context
7876 * parse PEReference declarations
7877 * The entity content is handled directly by pushing it's content as
7878 * a new input stream.
7880 * [69] PEReference ::= '%' Name ';'
7882 * [ WFC: No Recursion ]
7883 * A parsed entity must not contain a recursive
7884 * reference to itself, either directly or indirectly.
7886 * [ WFC: Entity Declared ]
7887 * In a document without any DTD, a document with only an internal DTD
7888 * subset which contains no parameter entity references, or a document
7889 * with "standalone='yes'", ... ... The declaration of a parameter
7890 * entity must precede any reference to it...
7892 * [ VC: Entity Declared ]
7893 * In a document with an external subset or external parameter entities
7894 * with "standalone='no'", ... ... The declaration of a parameter entity
7895 * must precede any reference to it...
7897 * [ WFC: In DTD ]
7898 * Parameter-entity references may only appear in the DTD.
7899 * NOTE: misleading but this is handled.
7901 void
7902 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7904 const xmlChar *name;
7905 xmlEntityPtr entity = NULL;
7906 xmlParserInputPtr input;
7908 if (RAW != '%')
7909 return;
7910 NEXT;
7911 name = xmlParseName(ctxt);
7912 if (name == NULL) {
7913 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7914 return;
7916 if (xmlParserDebugEntities)
7917 xmlGenericError(xmlGenericErrorContext,
7918 "PEReference: %s\n", name);
7919 if (RAW != ';') {
7920 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7921 return;
7924 NEXT;
7927 * Increase the number of entity references parsed
7929 ctxt->nbentities++;
7932 * Request the entity from SAX
7934 if ((ctxt->sax != NULL) &&
7935 (ctxt->sax->getParameterEntity != NULL))
7936 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7937 if (ctxt->instate == XML_PARSER_EOF)
7938 return;
7939 if (entity == NULL) {
7941 * [ WFC: Entity Declared ]
7942 * In a document without any DTD, a document with only an
7943 * internal DTD subset which contains no parameter entity
7944 * references, or a document with "standalone='yes'", ...
7945 * ... The declaration of a parameter entity must precede
7946 * any reference to it...
7948 if ((ctxt->standalone == 1) ||
7949 ((ctxt->hasExternalSubset == 0) &&
7950 (ctxt->hasPErefs == 0))) {
7951 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7952 "PEReference: %%%s; not found\n",
7953 name);
7954 } else {
7956 * [ VC: Entity Declared ]
7957 * In a document with an external subset or external
7958 * parameter entities with "standalone='no'", ...
7959 * ... The declaration of a parameter entity must
7960 * precede any reference to it...
7962 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7963 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7964 "PEReference: %%%s; not found\n",
7965 name, NULL);
7966 } else
7967 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7968 "PEReference: %%%s; not found\n",
7969 name, NULL);
7970 ctxt->valid = 0;
7972 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7973 } else {
7975 * Internal checking in case the entity quest barfed
7977 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7978 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7979 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7980 "Internal: %%%s; is not a parameter entity\n",
7981 name, NULL);
7982 } else {
7983 xmlChar start[4];
7984 xmlCharEncoding enc;
7986 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7987 return;
7989 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7990 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7991 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7992 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7993 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7994 (ctxt->replaceEntities == 0) &&
7995 (ctxt->validate == 0))
7996 return;
7998 input = xmlNewEntityInputStream(ctxt, entity);
7999 if (xmlPushInput(ctxt, input) < 0) {
8000 xmlFreeInputStream(input);
8001 return;
8004 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8006 * Get the 4 first bytes and decode the charset
8007 * if enc != XML_CHAR_ENCODING_NONE
8008 * plug some encoding conversion routines.
8009 * Note that, since we may have some non-UTF8
8010 * encoding (like UTF16, bug 135229), the 'length'
8011 * is not known, but we can calculate based upon
8012 * the amount of data in the buffer.
8014 GROW
8015 if (ctxt->instate == XML_PARSER_EOF)
8016 return;
8017 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8018 start[0] = RAW;
8019 start[1] = NXT(1);
8020 start[2] = NXT(2);
8021 start[3] = NXT(3);
8022 enc = xmlDetectCharEncoding(start, 4);
8023 if (enc != XML_CHAR_ENCODING_NONE) {
8024 xmlSwitchEncoding(ctxt, enc);
8028 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8029 (IS_BLANK_CH(NXT(5)))) {
8030 xmlParseTextDecl(ctxt);
8035 ctxt->hasPErefs = 1;
8039 * xmlLoadEntityContent:
8040 * @ctxt: an XML parser context
8041 * @entity: an unloaded system entity
8043 * Load the original content of the given system entity from the
8044 * ExternalID/SystemID given. This is to be used for Included in Literal
8045 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8047 * Returns 0 in case of success and -1 in case of failure
8049 static int
8050 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8051 xmlParserInputPtr input;
8052 xmlBufferPtr buf;
8053 int l, c;
8054 int count = 0;
8056 if ((ctxt == NULL) || (entity == NULL) ||
8057 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8058 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8059 (entity->content != NULL)) {
8060 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8061 "xmlLoadEntityContent parameter error");
8062 return(-1);
8065 if (xmlParserDebugEntities)
8066 xmlGenericError(xmlGenericErrorContext,
8067 "Reading %s entity content input\n", entity->name);
8069 buf = xmlBufferCreate();
8070 if (buf == NULL) {
8071 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8072 "xmlLoadEntityContent parameter error");
8073 return(-1);
8075 xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8077 input = xmlNewEntityInputStream(ctxt, entity);
8078 if (input == NULL) {
8079 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8080 "xmlLoadEntityContent input error");
8081 xmlBufferFree(buf);
8082 return(-1);
8086 * Push the entity as the current input, read char by char
8087 * saving to the buffer until the end of the entity or an error
8089 if (xmlPushInput(ctxt, input) < 0) {
8090 xmlBufferFree(buf);
8091 xmlFreeInputStream(input);
8092 return(-1);
8095 GROW;
8096 c = CUR_CHAR(l);
8097 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8098 (IS_CHAR(c))) {
8099 xmlBufferAdd(buf, ctxt->input->cur, l);
8100 if (count++ > XML_PARSER_CHUNK_SIZE) {
8101 count = 0;
8102 GROW;
8103 if (ctxt->instate == XML_PARSER_EOF) {
8104 xmlBufferFree(buf);
8105 return(-1);
8108 NEXTL(l);
8109 c = CUR_CHAR(l);
8110 if (c == 0) {
8111 count = 0;
8112 GROW;
8113 if (ctxt->instate == XML_PARSER_EOF) {
8114 xmlBufferFree(buf);
8115 return(-1);
8117 c = CUR_CHAR(l);
8121 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8122 xmlPopInput(ctxt);
8123 } else if (!IS_CHAR(c)) {
8124 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8125 "xmlLoadEntityContent: invalid char value %d\n",
8127 xmlBufferFree(buf);
8128 return(-1);
8130 entity->content = buf->content;
8131 buf->content = NULL;
8132 xmlBufferFree(buf);
8134 return(0);
8138 * xmlParseStringPEReference:
8139 * @ctxt: an XML parser context
8140 * @str: a pointer to an index in the string
8142 * parse PEReference declarations
8144 * [69] PEReference ::= '%' Name ';'
8146 * [ WFC: No Recursion ]
8147 * A parsed entity must not contain a recursive
8148 * reference to itself, either directly or indirectly.
8150 * [ WFC: Entity Declared ]
8151 * In a document without any DTD, a document with only an internal DTD
8152 * subset which contains no parameter entity references, or a document
8153 * with "standalone='yes'", ... ... The declaration of a parameter
8154 * entity must precede any reference to it...
8156 * [ VC: Entity Declared ]
8157 * In a document with an external subset or external parameter entities
8158 * with "standalone='no'", ... ... The declaration of a parameter entity
8159 * must precede any reference to it...
8161 * [ WFC: In DTD ]
8162 * Parameter-entity references may only appear in the DTD.
8163 * NOTE: misleading but this is handled.
8165 * Returns the string of the entity content.
8166 * str is updated to the current value of the index
8168 static xmlEntityPtr
8169 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8170 const xmlChar *ptr;
8171 xmlChar cur;
8172 xmlChar *name;
8173 xmlEntityPtr entity = NULL;
8175 if ((str == NULL) || (*str == NULL)) return(NULL);
8176 ptr = *str;
8177 cur = *ptr;
8178 if (cur != '%')
8179 return(NULL);
8180 ptr++;
8181 name = xmlParseStringName(ctxt, &ptr);
8182 if (name == NULL) {
8183 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8184 "xmlParseStringPEReference: no name\n");
8185 *str = ptr;
8186 return(NULL);
8188 cur = *ptr;
8189 if (cur != ';') {
8190 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8191 xmlFree(name);
8192 *str = ptr;
8193 return(NULL);
8195 ptr++;
8198 * Increase the number of entity references parsed
8200 ctxt->nbentities++;
8203 * Request the entity from SAX
8205 if ((ctxt->sax != NULL) &&
8206 (ctxt->sax->getParameterEntity != NULL))
8207 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8208 if (ctxt->instate == XML_PARSER_EOF) {
8209 xmlFree(name);
8210 *str = ptr;
8211 return(NULL);
8213 if (entity == NULL) {
8215 * [ WFC: Entity Declared ]
8216 * In a document without any DTD, a document with only an
8217 * internal DTD subset which contains no parameter entity
8218 * references, or a document with "standalone='yes'", ...
8219 * ... The declaration of a parameter entity must precede
8220 * any reference to it...
8222 if ((ctxt->standalone == 1) ||
8223 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8224 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8225 "PEReference: %%%s; not found\n", name);
8226 } else {
8228 * [ VC: Entity Declared ]
8229 * In a document with an external subset or external
8230 * parameter entities with "standalone='no'", ...
8231 * ... The declaration of a parameter entity must
8232 * precede any reference to it...
8234 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8235 "PEReference: %%%s; not found\n",
8236 name, NULL);
8237 ctxt->valid = 0;
8239 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8240 } else {
8242 * Internal checking in case the entity quest barfed
8244 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8245 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8246 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8247 "%%%s; is not a parameter entity\n",
8248 name, NULL);
8251 ctxt->hasPErefs = 1;
8252 xmlFree(name);
8253 *str = ptr;
8254 return(entity);
8258 * xmlParseDocTypeDecl:
8259 * @ctxt: an XML parser context
8261 * parse a DOCTYPE declaration
8263 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8264 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8266 * [ VC: Root Element Type ]
8267 * The Name in the document type declaration must match the element
8268 * type of the root element.
8271 void
8272 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8273 const xmlChar *name = NULL;
8274 xmlChar *ExternalID = NULL;
8275 xmlChar *URI = NULL;
8278 * We know that '<!DOCTYPE' has been detected.
8280 SKIP(9);
8282 SKIP_BLANKS;
8285 * Parse the DOCTYPE name.
8287 name = xmlParseName(ctxt);
8288 if (name == NULL) {
8289 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8290 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8292 ctxt->intSubName = name;
8294 SKIP_BLANKS;
8297 * Check for SystemID and ExternalID
8299 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8301 if ((URI != NULL) || (ExternalID != NULL)) {
8302 ctxt->hasExternalSubset = 1;
8304 ctxt->extSubURI = URI;
8305 ctxt->extSubSystem = ExternalID;
8307 SKIP_BLANKS;
8310 * Create and update the internal subset.
8312 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8313 (!ctxt->disableSAX))
8314 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8315 if (ctxt->instate == XML_PARSER_EOF)
8316 return;
8319 * Is there any internal subset declarations ?
8320 * they are handled separately in xmlParseInternalSubset()
8322 if (RAW == '[')
8323 return;
8326 * We should be at the end of the DOCTYPE declaration.
8328 if (RAW != '>') {
8329 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8331 NEXT;
8335 * xmlParseInternalSubset:
8336 * @ctxt: an XML parser context
8338 * parse the internal subset declaration
8340 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8343 static void
8344 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8346 * Is there any DTD definition ?
8348 if (RAW == '[') {
8349 int baseInputNr = ctxt->inputNr;
8350 ctxt->instate = XML_PARSER_DTD;
8351 NEXT;
8353 * Parse the succession of Markup declarations and
8354 * PEReferences.
8355 * Subsequence (markupdecl | PEReference | S)*
8357 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8358 (ctxt->instate != XML_PARSER_EOF)) {
8359 int id = ctxt->input->id;
8360 unsigned long cons = CUR_CONSUMED;
8362 SKIP_BLANKS;
8363 xmlParseMarkupDecl(ctxt);
8364 xmlParsePEReference(ctxt);
8367 * Conditional sections are allowed from external entities included
8368 * by PE References in the internal subset.
8370 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8371 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8372 xmlParseConditionalSections(ctxt);
8375 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8376 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8377 "xmlParseInternalSubset: error detected in Markup declaration\n");
8378 if (ctxt->inputNr > baseInputNr)
8379 xmlPopInput(ctxt);
8380 else
8381 break;
8384 if (RAW == ']') {
8385 NEXT;
8386 SKIP_BLANKS;
8391 * We should be at the end of the DOCTYPE declaration.
8393 if (RAW != '>') {
8394 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8395 return;
8397 NEXT;
8400 #ifdef LIBXML_SAX1_ENABLED
8402 * xmlParseAttribute:
8403 * @ctxt: an XML parser context
8404 * @value: a xmlChar ** used to store the value of the attribute
8406 * parse an attribute
8408 * [41] Attribute ::= Name Eq AttValue
8410 * [ WFC: No External Entity References ]
8411 * Attribute values cannot contain direct or indirect entity references
8412 * to external entities.
8414 * [ WFC: No < in Attribute Values ]
8415 * The replacement text of any entity referred to directly or indirectly in
8416 * an attribute value (other than "&lt;") must not contain a <.
8418 * [ VC: Attribute Value Type ]
8419 * The attribute must have been declared; the value must be of the type
8420 * declared for it.
8422 * [25] Eq ::= S? '=' S?
8424 * With namespace:
8426 * [NS 11] Attribute ::= QName Eq AttValue
8428 * Also the case QName == xmlns:??? is handled independently as a namespace
8429 * definition.
8431 * Returns the attribute name, and the value in *value.
8434 const xmlChar *
8435 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8436 const xmlChar *name;
8437 xmlChar *val;
8439 *value = NULL;
8440 GROW;
8441 name = xmlParseName(ctxt);
8442 if (name == NULL) {
8443 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8444 "error parsing attribute name\n");
8445 return(NULL);
8449 * read the value
8451 SKIP_BLANKS;
8452 if (RAW == '=') {
8453 NEXT;
8454 SKIP_BLANKS;
8455 val = xmlParseAttValue(ctxt);
8456 ctxt->instate = XML_PARSER_CONTENT;
8457 } else {
8458 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8459 "Specification mandates value for attribute %s\n", name);
8460 return(NULL);
8464 * Check that xml:lang conforms to the specification
8465 * No more registered as an error, just generate a warning now
8466 * since this was deprecated in XML second edition
8468 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8469 if (!xmlCheckLanguageID(val)) {
8470 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8471 "Malformed value for xml:lang : %s\n",
8472 val, NULL);
8477 * Check that xml:space conforms to the specification
8479 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8480 if (xmlStrEqual(val, BAD_CAST "default"))
8481 *(ctxt->space) = 0;
8482 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8483 *(ctxt->space) = 1;
8484 else {
8485 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8486 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8487 val, NULL);
8491 *value = val;
8492 return(name);
8496 * xmlParseStartTag:
8497 * @ctxt: an XML parser context
8499 * parse a start of tag either for rule element or
8500 * EmptyElement. In both case we don't parse the tag closing chars.
8502 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8504 * [ WFC: Unique Att Spec ]
8505 * No attribute name may appear more than once in the same start-tag or
8506 * empty-element tag.
8508 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8510 * [ WFC: Unique Att Spec ]
8511 * No attribute name may appear more than once in the same start-tag or
8512 * empty-element tag.
8514 * With namespace:
8516 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8518 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8520 * Returns the element name parsed
8523 const xmlChar *
8524 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8525 const xmlChar *name;
8526 const xmlChar *attname;
8527 xmlChar *attvalue;
8528 const xmlChar **atts = ctxt->atts;
8529 int nbatts = 0;
8530 int maxatts = ctxt->maxatts;
8531 int i;
8533 if (RAW != '<') return(NULL);
8534 NEXT1;
8536 name = xmlParseName(ctxt);
8537 if (name == NULL) {
8538 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8539 "xmlParseStartTag: invalid element name\n");
8540 return(NULL);
8544 * Now parse the attributes, it ends up with the ending
8546 * (S Attribute)* S?
8548 SKIP_BLANKS;
8549 GROW;
8551 while (((RAW != '>') &&
8552 ((RAW != '/') || (NXT(1) != '>')) &&
8553 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8554 int id = ctxt->input->id;
8555 unsigned long cons = CUR_CONSUMED;
8557 attname = xmlParseAttribute(ctxt, &attvalue);
8558 if ((attname != NULL) && (attvalue != NULL)) {
8560 * [ WFC: Unique Att Spec ]
8561 * No attribute name may appear more than once in the same
8562 * start-tag or empty-element tag.
8564 for (i = 0; i < nbatts;i += 2) {
8565 if (xmlStrEqual(atts[i], attname)) {
8566 xmlErrAttributeDup(ctxt, NULL, attname);
8567 xmlFree(attvalue);
8568 goto failed;
8572 * Add the pair to atts
8574 if (atts == NULL) {
8575 maxatts = 22; /* allow for 10 attrs by default */
8576 atts = (const xmlChar **)
8577 xmlMalloc(maxatts * sizeof(xmlChar *));
8578 if (atts == NULL) {
8579 xmlErrMemory(ctxt, NULL);
8580 if (attvalue != NULL)
8581 xmlFree(attvalue);
8582 goto failed;
8584 ctxt->atts = atts;
8585 ctxt->maxatts = maxatts;
8586 } else if (nbatts + 4 > maxatts) {
8587 const xmlChar **n;
8589 maxatts *= 2;
8590 n = (const xmlChar **) xmlRealloc((void *) atts,
8591 maxatts * sizeof(const xmlChar *));
8592 if (n == NULL) {
8593 xmlErrMemory(ctxt, NULL);
8594 if (attvalue != NULL)
8595 xmlFree(attvalue);
8596 goto failed;
8598 atts = n;
8599 ctxt->atts = atts;
8600 ctxt->maxatts = maxatts;
8602 atts[nbatts++] = attname;
8603 atts[nbatts++] = attvalue;
8604 atts[nbatts] = NULL;
8605 atts[nbatts + 1] = NULL;
8606 } else {
8607 if (attvalue != NULL)
8608 xmlFree(attvalue);
8611 failed:
8613 GROW
8614 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8615 break;
8616 if (SKIP_BLANKS == 0) {
8617 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8618 "attributes construct error\n");
8620 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8621 (attname == NULL) && (attvalue == NULL)) {
8622 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8623 "xmlParseStartTag: problem parsing attributes\n");
8624 break;
8626 SHRINK;
8627 GROW;
8631 * SAX: Start of Element !
8633 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8634 (!ctxt->disableSAX)) {
8635 if (nbatts > 0)
8636 ctxt->sax->startElement(ctxt->userData, name, atts);
8637 else
8638 ctxt->sax->startElement(ctxt->userData, name, NULL);
8641 if (atts != NULL) {
8642 /* Free only the content strings */
8643 for (i = 1;i < nbatts;i+=2)
8644 if (atts[i] != NULL)
8645 xmlFree((xmlChar *) atts[i]);
8647 return(name);
8651 * xmlParseEndTag1:
8652 * @ctxt: an XML parser context
8653 * @line: line of the start tag
8654 * @nsNr: number of namespaces on the start tag
8656 * parse an end of tag
8658 * [42] ETag ::= '</' Name S? '>'
8660 * With namespace
8662 * [NS 9] ETag ::= '</' QName S? '>'
8665 static void
8666 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8667 const xmlChar *name;
8669 GROW;
8670 if ((RAW != '<') || (NXT(1) != '/')) {
8671 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8672 "xmlParseEndTag: '</' not found\n");
8673 return;
8675 SKIP(2);
8677 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8680 * We should definitely be at the ending "S? '>'" part
8682 GROW;
8683 SKIP_BLANKS;
8684 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8685 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8686 } else
8687 NEXT1;
8690 * [ WFC: Element Type Match ]
8691 * The Name in an element's end-tag must match the element type in the
8692 * start-tag.
8695 if (name != (xmlChar*)1) {
8696 if (name == NULL) name = BAD_CAST "unparsable";
8697 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8698 "Opening and ending tag mismatch: %s line %d and %s\n",
8699 ctxt->name, line, name);
8703 * SAX: End of Tag
8705 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8706 (!ctxt->disableSAX))
8707 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8709 namePop(ctxt);
8710 spacePop(ctxt);
8711 return;
8715 * xmlParseEndTag:
8716 * @ctxt: an XML parser context
8718 * parse an end of tag
8720 * [42] ETag ::= '</' Name S? '>'
8722 * With namespace
8724 * [NS 9] ETag ::= '</' QName S? '>'
8727 void
8728 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8729 xmlParseEndTag1(ctxt, 0);
8731 #endif /* LIBXML_SAX1_ENABLED */
8733 /************************************************************************
8735 * SAX 2 specific operations *
8737 ************************************************************************/
8740 * xmlGetNamespace:
8741 * @ctxt: an XML parser context
8742 * @prefix: the prefix to lookup
8744 * Lookup the namespace name for the @prefix (which ca be NULL)
8745 * The prefix must come from the @ctxt->dict dictionary
8747 * Returns the namespace name or NULL if not bound
8749 static const xmlChar *
8750 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8751 int i;
8753 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8754 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8755 if (ctxt->nsTab[i] == prefix) {
8756 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8757 return(NULL);
8758 return(ctxt->nsTab[i + 1]);
8760 return(NULL);
8764 * xmlParseQName:
8765 * @ctxt: an XML parser context
8766 * @prefix: pointer to store the prefix part
8768 * parse an XML Namespace QName
8770 * [6] QName ::= (Prefix ':')? LocalPart
8771 * [7] Prefix ::= NCName
8772 * [8] LocalPart ::= NCName
8774 * Returns the Name parsed or NULL
8777 static const xmlChar *
8778 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8779 const xmlChar *l, *p;
8781 GROW;
8783 l = xmlParseNCName(ctxt);
8784 if (l == NULL) {
8785 if (CUR == ':') {
8786 l = xmlParseName(ctxt);
8787 if (l != NULL) {
8788 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8789 "Failed to parse QName '%s'\n", l, NULL, NULL);
8790 *prefix = NULL;
8791 return(l);
8794 return(NULL);
8796 if (CUR == ':') {
8797 NEXT;
8798 p = l;
8799 l = xmlParseNCName(ctxt);
8800 if (l == NULL) {
8801 xmlChar *tmp;
8803 if (ctxt->instate == XML_PARSER_EOF)
8804 return(NULL);
8805 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8806 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8807 l = xmlParseNmtoken(ctxt);
8808 if (l == NULL) {
8809 if (ctxt->instate == XML_PARSER_EOF)
8810 return(NULL);
8811 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8812 } else {
8813 tmp = xmlBuildQName(l, p, NULL, 0);
8814 xmlFree((char *)l);
8816 p = xmlDictLookup(ctxt->dict, tmp, -1);
8817 if (tmp != NULL) xmlFree(tmp);
8818 *prefix = NULL;
8819 return(p);
8821 if (CUR == ':') {
8822 xmlChar *tmp;
8824 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8825 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8826 NEXT;
8827 tmp = (xmlChar *) xmlParseName(ctxt);
8828 if (tmp != NULL) {
8829 tmp = xmlBuildQName(tmp, l, NULL, 0);
8830 l = xmlDictLookup(ctxt->dict, tmp, -1);
8831 if (tmp != NULL) xmlFree(tmp);
8832 *prefix = p;
8833 return(l);
8835 if (ctxt->instate == XML_PARSER_EOF)
8836 return(NULL);
8837 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8838 l = xmlDictLookup(ctxt->dict, tmp, -1);
8839 if (tmp != NULL) xmlFree(tmp);
8840 *prefix = p;
8841 return(l);
8843 *prefix = p;
8844 } else
8845 *prefix = NULL;
8846 return(l);
8850 * xmlParseQNameAndCompare:
8851 * @ctxt: an XML parser context
8852 * @name: the localname
8853 * @prefix: the prefix, if any.
8855 * parse an XML name and compares for match
8856 * (specialized for endtag parsing)
8858 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8859 * and the name for mismatch
8862 static const xmlChar *
8863 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8864 xmlChar const *prefix) {
8865 const xmlChar *cmp;
8866 const xmlChar *in;
8867 const xmlChar *ret;
8868 const xmlChar *prefix2;
8870 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8872 GROW;
8873 in = ctxt->input->cur;
8875 cmp = prefix;
8876 while (*in != 0 && *in == *cmp) {
8877 ++in;
8878 ++cmp;
8880 if ((*cmp == 0) && (*in == ':')) {
8881 in++;
8882 cmp = name;
8883 while (*in != 0 && *in == *cmp) {
8884 ++in;
8885 ++cmp;
8887 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8888 /* success */
8889 ctxt->input->col += in - ctxt->input->cur;
8890 ctxt->input->cur = in;
8891 return((const xmlChar*) 1);
8895 * all strings coms from the dictionary, equality can be done directly
8897 ret = xmlParseQName (ctxt, &prefix2);
8898 if ((ret == name) && (prefix == prefix2))
8899 return((const xmlChar*) 1);
8900 return ret;
8904 * xmlParseAttValueInternal:
8905 * @ctxt: an XML parser context
8906 * @len: attribute len result
8907 * @alloc: whether the attribute was reallocated as a new string
8908 * @normalize: if 1 then further non-CDATA normalization must be done
8910 * parse a value for an attribute.
8911 * NOTE: if no normalization is needed, the routine will return pointers
8912 * directly from the data buffer.
8914 * 3.3.3 Attribute-Value Normalization:
8915 * Before the value of an attribute is passed to the application or
8916 * checked for validity, the XML processor must normalize it as follows:
8917 * - a character reference is processed by appending the referenced
8918 * character to the attribute value
8919 * - an entity reference is processed by recursively processing the
8920 * replacement text of the entity
8921 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8922 * appending #x20 to the normalized value, except that only a single
8923 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8924 * parsed entity or the literal entity value of an internal parsed entity
8925 * - other characters are processed by appending them to the normalized value
8926 * If the declared value is not CDATA, then the XML processor must further
8927 * process the normalized attribute value by discarding any leading and
8928 * trailing space (#x20) characters, and by replacing sequences of space
8929 * (#x20) characters by a single space (#x20) character.
8930 * All attributes for which no declaration has been read should be treated
8931 * by a non-validating parser as if declared CDATA.
8933 * Returns the AttValue parsed or NULL. The value has to be freed by the
8934 * caller if it was copied, this can be detected by val[*len] == 0.
8937 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8938 const xmlChar *oldbase = ctxt->input->base;\
8939 GROW;\
8940 if (ctxt->instate == XML_PARSER_EOF)\
8941 return(NULL);\
8942 if (oldbase != ctxt->input->base) {\
8943 ptrdiff_t delta = ctxt->input->base - oldbase;\
8944 start = start + delta;\
8945 in = in + delta;\
8947 end = ctxt->input->end;
8949 static xmlChar *
8950 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8951 int normalize)
8953 xmlChar limit = 0;
8954 const xmlChar *in = NULL, *start, *end, *last;
8955 xmlChar *ret = NULL;
8956 int line, col;
8958 GROW;
8959 in = (xmlChar *) CUR_PTR;
8960 line = ctxt->input->line;
8961 col = ctxt->input->col;
8962 if (*in != '"' && *in != '\'') {
8963 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8964 return (NULL);
8966 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8969 * try to handle in this routine the most common case where no
8970 * allocation of a new string is required and where content is
8971 * pure ASCII.
8973 limit = *in++;
8974 col++;
8975 end = ctxt->input->end;
8976 start = in;
8977 if (in >= end) {
8978 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8980 if (normalize) {
8982 * Skip any leading spaces
8984 while ((in < end) && (*in != limit) &&
8985 ((*in == 0x20) || (*in == 0x9) ||
8986 (*in == 0xA) || (*in == 0xD))) {
8987 if (*in == 0xA) {
8988 line++; col = 1;
8989 } else {
8990 col++;
8992 in++;
8993 start = in;
8994 if (in >= end) {
8995 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8996 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8997 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8998 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8999 "AttValue length too long\n");
9000 return(NULL);
9004 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9005 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9006 col++;
9007 if ((*in++ == 0x20) && (*in == 0x20)) break;
9008 if (in >= end) {
9009 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9010 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9011 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9012 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9013 "AttValue length too long\n");
9014 return(NULL);
9018 last = in;
9020 * skip the trailing blanks
9022 while ((last[-1] == 0x20) && (last > start)) last--;
9023 while ((in < end) && (*in != limit) &&
9024 ((*in == 0x20) || (*in == 0x9) ||
9025 (*in == 0xA) || (*in == 0xD))) {
9026 if (*in == 0xA) {
9027 line++, col = 1;
9028 } else {
9029 col++;
9031 in++;
9032 if (in >= end) {
9033 const xmlChar *oldbase = ctxt->input->base;
9034 GROW;
9035 if (ctxt->instate == XML_PARSER_EOF)
9036 return(NULL);
9037 if (oldbase != ctxt->input->base) {
9038 ptrdiff_t delta = ctxt->input->base - oldbase;
9039 start = start + delta;
9040 in = in + delta;
9041 last = last + delta;
9043 end = ctxt->input->end;
9044 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9045 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9046 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9047 "AttValue length too long\n");
9048 return(NULL);
9052 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9053 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9054 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9055 "AttValue length too long\n");
9056 return(NULL);
9058 if (*in != limit) goto need_complex;
9059 } else {
9060 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9061 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9062 in++;
9063 col++;
9064 if (in >= end) {
9065 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9066 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9067 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9068 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9069 "AttValue length too long\n");
9070 return(NULL);
9074 last = in;
9075 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9076 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9077 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9078 "AttValue length too long\n");
9079 return(NULL);
9081 if (*in != limit) goto need_complex;
9083 in++;
9084 col++;
9085 if (len != NULL) {
9086 *len = last - start;
9087 ret = (xmlChar *) start;
9088 } else {
9089 if (alloc) *alloc = 1;
9090 ret = xmlStrndup(start, last - start);
9092 CUR_PTR = in;
9093 ctxt->input->line = line;
9094 ctxt->input->col = col;
9095 if (alloc) *alloc = 0;
9096 return ret;
9097 need_complex:
9098 if (alloc) *alloc = 1;
9099 return xmlParseAttValueComplex(ctxt, len, normalize);
9103 * xmlParseAttribute2:
9104 * @ctxt: an XML parser context
9105 * @pref: the element prefix
9106 * @elem: the element name
9107 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9108 * @value: a xmlChar ** used to store the value of the attribute
9109 * @len: an int * to save the length of the attribute
9110 * @alloc: an int * to indicate if the attribute was allocated
9112 * parse an attribute in the new SAX2 framework.
9114 * Returns the attribute name, and the value in *value, .
9117 static const xmlChar *
9118 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9119 const xmlChar * pref, const xmlChar * elem,
9120 const xmlChar ** prefix, xmlChar ** value,
9121 int *len, int *alloc)
9123 const xmlChar *name;
9124 xmlChar *val, *internal_val = NULL;
9125 int normalize = 0;
9127 *value = NULL;
9128 GROW;
9129 name = xmlParseQName(ctxt, prefix);
9130 if (name == NULL) {
9131 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9132 "error parsing attribute name\n");
9133 return (NULL);
9137 * get the type if needed
9139 if (ctxt->attsSpecial != NULL) {
9140 int type;
9142 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9143 pref, elem, *prefix, name);
9144 if (type != 0)
9145 normalize = 1;
9149 * read the value
9151 SKIP_BLANKS;
9152 if (RAW == '=') {
9153 NEXT;
9154 SKIP_BLANKS;
9155 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9156 if (normalize) {
9158 * Sometimes a second normalisation pass for spaces is needed
9159 * but that only happens if charrefs or entities references
9160 * have been used in the attribute value, i.e. the attribute
9161 * value have been extracted in an allocated string already.
9163 if (*alloc) {
9164 const xmlChar *val2;
9166 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9167 if ((val2 != NULL) && (val2 != val)) {
9168 xmlFree(val);
9169 val = (xmlChar *) val2;
9173 ctxt->instate = XML_PARSER_CONTENT;
9174 } else {
9175 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9176 "Specification mandates value for attribute %s\n",
9177 name);
9178 return (NULL);
9181 if (*prefix == ctxt->str_xml) {
9183 * Check that xml:lang conforms to the specification
9184 * No more registered as an error, just generate a warning now
9185 * since this was deprecated in XML second edition
9187 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9188 internal_val = xmlStrndup(val, *len);
9189 if (!xmlCheckLanguageID(internal_val)) {
9190 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9191 "Malformed value for xml:lang : %s\n",
9192 internal_val, NULL);
9197 * Check that xml:space conforms to the specification
9199 if (xmlStrEqual(name, BAD_CAST "space")) {
9200 internal_val = xmlStrndup(val, *len);
9201 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9202 *(ctxt->space) = 0;
9203 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9204 *(ctxt->space) = 1;
9205 else {
9206 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9207 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9208 internal_val, NULL);
9211 if (internal_val) {
9212 xmlFree(internal_val);
9216 *value = val;
9217 return (name);
9220 * xmlParseStartTag2:
9221 * @ctxt: an XML parser context
9223 * parse a start of tag either for rule element or
9224 * EmptyElement. In both case we don't parse the tag closing chars.
9225 * This routine is called when running SAX2 parsing
9227 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9229 * [ WFC: Unique Att Spec ]
9230 * No attribute name may appear more than once in the same start-tag or
9231 * empty-element tag.
9233 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9235 * [ WFC: Unique Att Spec ]
9236 * No attribute name may appear more than once in the same start-tag or
9237 * empty-element tag.
9239 * With namespace:
9241 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9243 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9245 * Returns the element name parsed
9248 static const xmlChar *
9249 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9250 const xmlChar **URI, int *tlen) {
9251 const xmlChar *localname;
9252 const xmlChar *prefix;
9253 const xmlChar *attname;
9254 const xmlChar *aprefix;
9255 const xmlChar *nsname;
9256 xmlChar *attvalue;
9257 const xmlChar **atts = ctxt->atts;
9258 int maxatts = ctxt->maxatts;
9259 int nratts, nbatts, nbdef, inputid;
9260 int i, j, nbNs, attval;
9261 unsigned long cur;
9262 int nsNr = ctxt->nsNr;
9264 if (RAW != '<') return(NULL);
9265 NEXT1;
9268 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9269 * point since the attribute values may be stored as pointers to
9270 * the buffer and calling SHRINK would destroy them !
9271 * The Shrinking is only possible once the full set of attribute
9272 * callbacks have been done.
9274 SHRINK;
9275 cur = ctxt->input->cur - ctxt->input->base;
9276 inputid = ctxt->input->id;
9277 nbatts = 0;
9278 nratts = 0;
9279 nbdef = 0;
9280 nbNs = 0;
9281 attval = 0;
9282 /* Forget any namespaces added during an earlier parse of this element. */
9283 ctxt->nsNr = nsNr;
9285 localname = xmlParseQName(ctxt, &prefix);
9286 if (localname == NULL) {
9287 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9288 "StartTag: invalid element name\n");
9289 return(NULL);
9291 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9294 * Now parse the attributes, it ends up with the ending
9296 * (S Attribute)* S?
9298 SKIP_BLANKS;
9299 GROW;
9301 while (((RAW != '>') &&
9302 ((RAW != '/') || (NXT(1) != '>')) &&
9303 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9304 int id = ctxt->input->id;
9305 unsigned long cons = CUR_CONSUMED;
9306 int len = -1, alloc = 0;
9308 attname = xmlParseAttribute2(ctxt, prefix, localname,
9309 &aprefix, &attvalue, &len, &alloc);
9310 if ((attname == NULL) || (attvalue == NULL))
9311 goto next_attr;
9312 if (len < 0) len = xmlStrlen(attvalue);
9314 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9315 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9316 xmlURIPtr uri;
9318 if (URL == NULL) {
9319 xmlErrMemory(ctxt, "dictionary allocation failure");
9320 if ((attvalue != NULL) && (alloc != 0))
9321 xmlFree(attvalue);
9322 localname = NULL;
9323 goto done;
9325 if (*URL != 0) {
9326 uri = xmlParseURI((const char *) URL);
9327 if (uri == NULL) {
9328 xmlNsErr(ctxt, XML_WAR_NS_URI,
9329 "xmlns: '%s' is not a valid URI\n",
9330 URL, NULL, NULL);
9331 } else {
9332 if (uri->scheme == NULL) {
9333 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9334 "xmlns: URI %s is not absolute\n",
9335 URL, NULL, NULL);
9337 xmlFreeURI(uri);
9339 if (URL == ctxt->str_xml_ns) {
9340 if (attname != ctxt->str_xml) {
9341 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9342 "xml namespace URI cannot be the default namespace\n",
9343 NULL, NULL, NULL);
9345 goto next_attr;
9347 if ((len == 29) &&
9348 (xmlStrEqual(URL,
9349 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9350 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9351 "reuse of the xmlns namespace name is forbidden\n",
9352 NULL, NULL, NULL);
9353 goto next_attr;
9357 * check that it's not a defined namespace
9359 for (j = 1;j <= nbNs;j++)
9360 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9361 break;
9362 if (j <= nbNs)
9363 xmlErrAttributeDup(ctxt, NULL, attname);
9364 else
9365 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9367 } else if (aprefix == ctxt->str_xmlns) {
9368 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9369 xmlURIPtr uri;
9371 if (attname == ctxt->str_xml) {
9372 if (URL != ctxt->str_xml_ns) {
9373 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9374 "xml namespace prefix mapped to wrong URI\n",
9375 NULL, NULL, NULL);
9378 * Do not keep a namespace definition node
9380 goto next_attr;
9382 if (URL == ctxt->str_xml_ns) {
9383 if (attname != ctxt->str_xml) {
9384 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9385 "xml namespace URI mapped to wrong prefix\n",
9386 NULL, NULL, NULL);
9388 goto next_attr;
9390 if (attname == ctxt->str_xmlns) {
9391 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9392 "redefinition of the xmlns prefix is forbidden\n",
9393 NULL, NULL, NULL);
9394 goto next_attr;
9396 if ((len == 29) &&
9397 (xmlStrEqual(URL,
9398 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9399 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9400 "reuse of the xmlns namespace name is forbidden\n",
9401 NULL, NULL, NULL);
9402 goto next_attr;
9404 if ((URL == NULL) || (URL[0] == 0)) {
9405 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9406 "xmlns:%s: Empty XML namespace is not allowed\n",
9407 attname, NULL, NULL);
9408 goto next_attr;
9409 } else {
9410 uri = xmlParseURI((const char *) URL);
9411 if (uri == NULL) {
9412 xmlNsErr(ctxt, XML_WAR_NS_URI,
9413 "xmlns:%s: '%s' is not a valid URI\n",
9414 attname, URL, NULL);
9415 } else {
9416 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9417 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9418 "xmlns:%s: URI %s is not absolute\n",
9419 attname, URL, NULL);
9421 xmlFreeURI(uri);
9426 * check that it's not a defined namespace
9428 for (j = 1;j <= nbNs;j++)
9429 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9430 break;
9431 if (j <= nbNs)
9432 xmlErrAttributeDup(ctxt, aprefix, attname);
9433 else
9434 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9436 } else {
9438 * Add the pair to atts
9440 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9441 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9442 goto next_attr;
9444 maxatts = ctxt->maxatts;
9445 atts = ctxt->atts;
9447 ctxt->attallocs[nratts++] = alloc;
9448 atts[nbatts++] = attname;
9449 atts[nbatts++] = aprefix;
9451 * The namespace URI field is used temporarily to point at the
9452 * base of the current input buffer for non-alloced attributes.
9453 * When the input buffer is reallocated, all the pointers become
9454 * invalid, but they can be reconstructed later.
9456 if (alloc)
9457 atts[nbatts++] = NULL;
9458 else
9459 atts[nbatts++] = ctxt->input->base;
9460 atts[nbatts++] = attvalue;
9461 attvalue += len;
9462 atts[nbatts++] = attvalue;
9464 * tag if some deallocation is needed
9466 if (alloc != 0) attval = 1;
9467 attvalue = NULL; /* moved into atts */
9470 next_attr:
9471 if ((attvalue != NULL) && (alloc != 0)) {
9472 xmlFree(attvalue);
9473 attvalue = NULL;
9476 GROW
9477 if (ctxt->instate == XML_PARSER_EOF)
9478 break;
9479 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9480 break;
9481 if (SKIP_BLANKS == 0) {
9482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9483 "attributes construct error\n");
9484 break;
9486 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9487 (attname == NULL) && (attvalue == NULL)) {
9488 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9489 "xmlParseStartTag: problem parsing attributes\n");
9490 break;
9492 GROW;
9495 if (ctxt->input->id != inputid) {
9496 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9497 "Unexpected change of input\n");
9498 localname = NULL;
9499 goto done;
9502 /* Reconstruct attribute value pointers. */
9503 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9504 if (atts[i+2] != NULL) {
9506 * Arithmetic on dangling pointers is technically undefined
9507 * behavior, but well...
9509 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9510 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9511 atts[i+3] += offset; /* value */
9512 atts[i+4] += offset; /* valuend */
9517 * The attributes defaulting
9519 if (ctxt->attsDefault != NULL) {
9520 xmlDefAttrsPtr defaults;
9522 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9523 if (defaults != NULL) {
9524 for (i = 0;i < defaults->nbAttrs;i++) {
9525 attname = defaults->values[5 * i];
9526 aprefix = defaults->values[5 * i + 1];
9529 * special work for namespaces defaulted defs
9531 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9533 * check that it's not a defined namespace
9535 for (j = 1;j <= nbNs;j++)
9536 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9537 break;
9538 if (j <= nbNs) continue;
9540 nsname = xmlGetNamespace(ctxt, NULL);
9541 if (nsname != defaults->values[5 * i + 2]) {
9542 if (nsPush(ctxt, NULL,
9543 defaults->values[5 * i + 2]) > 0)
9544 nbNs++;
9546 } else if (aprefix == ctxt->str_xmlns) {
9548 * check that it's not a defined namespace
9550 for (j = 1;j <= nbNs;j++)
9551 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9552 break;
9553 if (j <= nbNs) continue;
9555 nsname = xmlGetNamespace(ctxt, attname);
9556 if (nsname != defaults->values[2]) {
9557 if (nsPush(ctxt, attname,
9558 defaults->values[5 * i + 2]) > 0)
9559 nbNs++;
9561 } else {
9563 * check that it's not a defined attribute
9565 for (j = 0;j < nbatts;j+=5) {
9566 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9567 break;
9569 if (j < nbatts) continue;
9571 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9572 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9573 localname = NULL;
9574 goto done;
9576 maxatts = ctxt->maxatts;
9577 atts = ctxt->atts;
9579 atts[nbatts++] = attname;
9580 atts[nbatts++] = aprefix;
9581 if (aprefix == NULL)
9582 atts[nbatts++] = NULL;
9583 else
9584 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9585 atts[nbatts++] = defaults->values[5 * i + 2];
9586 atts[nbatts++] = defaults->values[5 * i + 3];
9587 if ((ctxt->standalone == 1) &&
9588 (defaults->values[5 * i + 4] != NULL)) {
9589 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9590 "standalone: attribute %s on %s defaulted from external subset\n",
9591 attname, localname);
9593 nbdef++;
9600 * The attributes checkings
9602 for (i = 0; i < nbatts;i += 5) {
9604 * The default namespace does not apply to attribute names.
9606 if (atts[i + 1] != NULL) {
9607 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9608 if (nsname == NULL) {
9609 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9610 "Namespace prefix %s for %s on %s is not defined\n",
9611 atts[i + 1], atts[i], localname);
9613 atts[i + 2] = nsname;
9614 } else
9615 nsname = NULL;
9617 * [ WFC: Unique Att Spec ]
9618 * No attribute name may appear more than once in the same
9619 * start-tag or empty-element tag.
9620 * As extended by the Namespace in XML REC.
9622 for (j = 0; j < i;j += 5) {
9623 if (atts[i] == atts[j]) {
9624 if (atts[i+1] == atts[j+1]) {
9625 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9626 break;
9628 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9629 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9630 "Namespaced Attribute %s in '%s' redefined\n",
9631 atts[i], nsname, NULL);
9632 break;
9638 nsname = xmlGetNamespace(ctxt, prefix);
9639 if ((prefix != NULL) && (nsname == NULL)) {
9640 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9641 "Namespace prefix %s on %s is not defined\n",
9642 prefix, localname, NULL);
9644 *pref = prefix;
9645 *URI = nsname;
9648 * SAX: Start of Element !
9650 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9651 (!ctxt->disableSAX)) {
9652 if (nbNs > 0)
9653 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9654 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9655 nbatts / 5, nbdef, atts);
9656 else
9657 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9658 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9661 done:
9663 * Free up attribute allocated strings if needed
9665 if (attval != 0) {
9666 for (i = 3,j = 0; j < nratts;i += 5,j++)
9667 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9668 xmlFree((xmlChar *) atts[i]);
9671 return(localname);
9675 * xmlParseEndTag2:
9676 * @ctxt: an XML parser context
9677 * @line: line of the start tag
9678 * @nsNr: number of namespaces on the start tag
9680 * parse an end of tag
9682 * [42] ETag ::= '</' Name S? '>'
9684 * With namespace
9686 * [NS 9] ETag ::= '</' QName S? '>'
9689 static void
9690 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9691 const xmlChar *name;
9693 GROW;
9694 if ((RAW != '<') || (NXT(1) != '/')) {
9695 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9696 return;
9698 SKIP(2);
9700 if (tag->prefix == NULL)
9701 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9702 else
9703 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9706 * We should definitely be at the ending "S? '>'" part
9708 GROW;
9709 if (ctxt->instate == XML_PARSER_EOF)
9710 return;
9711 SKIP_BLANKS;
9712 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9713 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9714 } else
9715 NEXT1;
9718 * [ WFC: Element Type Match ]
9719 * The Name in an element's end-tag must match the element type in the
9720 * start-tag.
9723 if (name != (xmlChar*)1) {
9724 if (name == NULL) name = BAD_CAST "unparsable";
9725 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9726 "Opening and ending tag mismatch: %s line %d and %s\n",
9727 ctxt->name, tag->line, name);
9731 * SAX: End of Tag
9733 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9734 (!ctxt->disableSAX))
9735 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9736 tag->URI);
9738 spacePop(ctxt);
9739 if (tag->nsNr != 0)
9740 nsPop(ctxt, tag->nsNr);
9744 * xmlParseCDSect:
9745 * @ctxt: an XML parser context
9747 * Parse escaped pure raw content.
9749 * [18] CDSect ::= CDStart CData CDEnd
9751 * [19] CDStart ::= '<![CDATA['
9753 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9755 * [21] CDEnd ::= ']]>'
9757 void
9758 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9759 xmlChar *buf = NULL;
9760 int len = 0;
9761 int size = XML_PARSER_BUFFER_SIZE;
9762 int r, rl;
9763 int s, sl;
9764 int cur, l;
9765 int count = 0;
9767 /* Check 2.6.0 was NXT(0) not RAW */
9768 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9769 SKIP(9);
9770 } else
9771 return;
9773 ctxt->instate = XML_PARSER_CDATA_SECTION;
9774 r = CUR_CHAR(rl);
9775 if (!IS_CHAR(r)) {
9776 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9777 ctxt->instate = XML_PARSER_CONTENT;
9778 return;
9780 NEXTL(rl);
9781 s = CUR_CHAR(sl);
9782 if (!IS_CHAR(s)) {
9783 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9784 ctxt->instate = XML_PARSER_CONTENT;
9785 return;
9787 NEXTL(sl);
9788 cur = CUR_CHAR(l);
9789 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9790 if (buf == NULL) {
9791 xmlErrMemory(ctxt, NULL);
9792 return;
9794 while (IS_CHAR(cur) &&
9795 ((r != ']') || (s != ']') || (cur != '>'))) {
9796 if (len + 5 >= size) {
9797 xmlChar *tmp;
9799 if ((size > XML_MAX_TEXT_LENGTH) &&
9800 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9801 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9802 "CData section too big found", NULL);
9803 xmlFree (buf);
9804 return;
9806 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9807 if (tmp == NULL) {
9808 xmlFree(buf);
9809 xmlErrMemory(ctxt, NULL);
9810 return;
9812 buf = tmp;
9813 size *= 2;
9815 COPY_BUF(rl,buf,len,r);
9816 r = s;
9817 rl = sl;
9818 s = cur;
9819 sl = l;
9820 count++;
9821 if (count > 50) {
9822 SHRINK;
9823 GROW;
9824 if (ctxt->instate == XML_PARSER_EOF) {
9825 xmlFree(buf);
9826 return;
9828 count = 0;
9830 NEXTL(l);
9831 cur = CUR_CHAR(l);
9833 buf[len] = 0;
9834 ctxt->instate = XML_PARSER_CONTENT;
9835 if (cur != '>') {
9836 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9837 "CData section not finished\n%.50s\n", buf);
9838 xmlFree(buf);
9839 return;
9841 NEXTL(l);
9844 * OK the buffer is to be consumed as cdata.
9846 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9847 if (ctxt->sax->cdataBlock != NULL)
9848 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9849 else if (ctxt->sax->characters != NULL)
9850 ctxt->sax->characters(ctxt->userData, buf, len);
9852 xmlFree(buf);
9856 * xmlParseContentInternal:
9857 * @ctxt: an XML parser context
9859 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9860 * unexpected EOF to the caller.
9863 static void
9864 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9865 int nameNr = ctxt->nameNr;
9867 GROW;
9868 while ((RAW != 0) &&
9869 (ctxt->instate != XML_PARSER_EOF)) {
9870 int id = ctxt->input->id;
9871 unsigned long cons = CUR_CONSUMED;
9872 const xmlChar *cur = ctxt->input->cur;
9875 * First case : a Processing Instruction.
9877 if ((*cur == '<') && (cur[1] == '?')) {
9878 xmlParsePI(ctxt);
9882 * Second case : a CDSection
9884 /* 2.6.0 test was *cur not RAW */
9885 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9886 xmlParseCDSect(ctxt);
9890 * Third case : a comment
9892 else if ((*cur == '<') && (NXT(1) == '!') &&
9893 (NXT(2) == '-') && (NXT(3) == '-')) {
9894 xmlParseComment(ctxt);
9895 ctxt->instate = XML_PARSER_CONTENT;
9899 * Fourth case : a sub-element.
9901 else if (*cur == '<') {
9902 if (NXT(1) == '/') {
9903 if (ctxt->nameNr <= nameNr)
9904 break;
9905 xmlParseElementEnd(ctxt);
9906 } else {
9907 xmlParseElementStart(ctxt);
9912 * Fifth case : a reference. If if has not been resolved,
9913 * parsing returns it's Name, create the node
9916 else if (*cur == '&') {
9917 xmlParseReference(ctxt);
9921 * Last case, text. Note that References are handled directly.
9923 else {
9924 xmlParseCharData(ctxt, 0);
9927 GROW;
9928 SHRINK;
9930 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
9931 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9932 "detected an error in element content\n");
9933 xmlHaltParser(ctxt);
9934 break;
9940 * xmlParseContent:
9941 * @ctxt: an XML parser context
9943 * Parse a content sequence. Stops at EOF or '</'.
9945 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9948 void
9949 xmlParseContent(xmlParserCtxtPtr ctxt) {
9950 int nameNr = ctxt->nameNr;
9952 xmlParseContentInternal(ctxt);
9954 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9955 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9956 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9957 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9958 "Premature end of data in tag %s line %d\n",
9959 name, line, NULL);
9964 * xmlParseElement:
9965 * @ctxt: an XML parser context
9967 * parse an XML element
9969 * [39] element ::= EmptyElemTag | STag content ETag
9971 * [ WFC: Element Type Match ]
9972 * The Name in an element's end-tag must match the element type in the
9973 * start-tag.
9977 void
9978 xmlParseElement(xmlParserCtxtPtr ctxt) {
9979 if (xmlParseElementStart(ctxt) != 0)
9980 return;
9982 xmlParseContentInternal(ctxt);
9983 if (ctxt->instate == XML_PARSER_EOF)
9984 return;
9986 if (CUR == 0) {
9987 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9988 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9989 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9990 "Premature end of data in tag %s line %d\n",
9991 name, line, NULL);
9992 return;
9995 xmlParseElementEnd(ctxt);
9999 * xmlParseElementStart:
10000 * @ctxt: an XML parser context
10002 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10003 * opening tag was parsed, 1 if an empty element was parsed.
10005 static int
10006 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10007 const xmlChar *name;
10008 const xmlChar *prefix = NULL;
10009 const xmlChar *URI = NULL;
10010 xmlParserNodeInfo node_info;
10011 int line, tlen = 0;
10012 xmlNodePtr ret;
10013 int nsNr = ctxt->nsNr;
10015 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10016 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10017 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10018 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10019 xmlParserMaxDepth);
10020 xmlHaltParser(ctxt);
10021 return(-1);
10024 /* Capture start position */
10025 if (ctxt->record_info) {
10026 node_info.begin_pos = ctxt->input->consumed +
10027 (CUR_PTR - ctxt->input->base);
10028 node_info.begin_line = ctxt->input->line;
10031 if (ctxt->spaceNr == 0)
10032 spacePush(ctxt, -1);
10033 else if (*ctxt->space == -2)
10034 spacePush(ctxt, -1);
10035 else
10036 spacePush(ctxt, *ctxt->space);
10038 line = ctxt->input->line;
10039 #ifdef LIBXML_SAX1_ENABLED
10040 if (ctxt->sax2)
10041 #endif /* LIBXML_SAX1_ENABLED */
10042 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10043 #ifdef LIBXML_SAX1_ENABLED
10044 else
10045 name = xmlParseStartTag(ctxt);
10046 #endif /* LIBXML_SAX1_ENABLED */
10047 if (ctxt->instate == XML_PARSER_EOF)
10048 return(-1);
10049 if (name == NULL) {
10050 spacePop(ctxt);
10051 return(-1);
10053 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10054 ret = ctxt->node;
10056 #ifdef LIBXML_VALID_ENABLED
10058 * [ VC: Root Element Type ]
10059 * The Name in the document type declaration must match the element
10060 * type of the root element.
10062 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10063 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10064 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10065 #endif /* LIBXML_VALID_ENABLED */
10068 * Check for an Empty Element.
10070 if ((RAW == '/') && (NXT(1) == '>')) {
10071 SKIP(2);
10072 if (ctxt->sax2) {
10073 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10074 (!ctxt->disableSAX))
10075 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10076 #ifdef LIBXML_SAX1_ENABLED
10077 } else {
10078 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10079 (!ctxt->disableSAX))
10080 ctxt->sax->endElement(ctxt->userData, name);
10081 #endif /* LIBXML_SAX1_ENABLED */
10083 namePop(ctxt);
10084 spacePop(ctxt);
10085 if (nsNr != ctxt->nsNr)
10086 nsPop(ctxt, ctxt->nsNr - nsNr);
10087 if ( ret != NULL && ctxt->record_info ) {
10088 node_info.end_pos = ctxt->input->consumed +
10089 (CUR_PTR - ctxt->input->base);
10090 node_info.end_line = ctxt->input->line;
10091 node_info.node = ret;
10092 xmlParserAddNodeInfo(ctxt, &node_info);
10094 return(1);
10096 if (RAW == '>') {
10097 NEXT1;
10098 } else {
10099 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10100 "Couldn't find end of Start Tag %s line %d\n",
10101 name, line, NULL);
10104 * end of parsing of this node.
10106 nodePop(ctxt);
10107 namePop(ctxt);
10108 spacePop(ctxt);
10109 if (nsNr != ctxt->nsNr)
10110 nsPop(ctxt, ctxt->nsNr - nsNr);
10113 * Capture end position and add node
10115 if ( ret != NULL && ctxt->record_info ) {
10116 node_info.end_pos = ctxt->input->consumed +
10117 (CUR_PTR - ctxt->input->base);
10118 node_info.end_line = ctxt->input->line;
10119 node_info.node = ret;
10120 xmlParserAddNodeInfo(ctxt, &node_info);
10122 return(-1);
10125 return(0);
10129 * xmlParseElementEnd:
10130 * @ctxt: an XML parser context
10132 * Parse the end of an XML element.
10134 static void
10135 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10136 xmlParserNodeInfo node_info;
10137 xmlNodePtr ret = ctxt->node;
10139 if (ctxt->nameNr <= 0)
10140 return;
10143 * parse the end of tag: '</' should be here.
10145 if (ctxt->sax2) {
10146 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10147 namePop(ctxt);
10149 #ifdef LIBXML_SAX1_ENABLED
10150 else
10151 xmlParseEndTag1(ctxt, 0);
10152 #endif /* LIBXML_SAX1_ENABLED */
10155 * Capture end position and add node
10157 if ( ret != NULL && ctxt->record_info ) {
10158 node_info.end_pos = ctxt->input->consumed +
10159 (CUR_PTR - ctxt->input->base);
10160 node_info.end_line = ctxt->input->line;
10161 node_info.node = ret;
10162 xmlParserAddNodeInfo(ctxt, &node_info);
10167 * xmlParseVersionNum:
10168 * @ctxt: an XML parser context
10170 * parse the XML version value.
10172 * [26] VersionNum ::= '1.' [0-9]+
10174 * In practice allow [0-9].[0-9]+ at that level
10176 * Returns the string giving the XML version number, or NULL
10178 xmlChar *
10179 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10180 xmlChar *buf = NULL;
10181 int len = 0;
10182 int size = 10;
10183 xmlChar cur;
10185 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10186 if (buf == NULL) {
10187 xmlErrMemory(ctxt, NULL);
10188 return(NULL);
10190 cur = CUR;
10191 if (!((cur >= '0') && (cur <= '9'))) {
10192 xmlFree(buf);
10193 return(NULL);
10195 buf[len++] = cur;
10196 NEXT;
10197 cur=CUR;
10198 if (cur != '.') {
10199 xmlFree(buf);
10200 return(NULL);
10202 buf[len++] = cur;
10203 NEXT;
10204 cur=CUR;
10205 while ((cur >= '0') && (cur <= '9')) {
10206 if (len + 1 >= size) {
10207 xmlChar *tmp;
10209 size *= 2;
10210 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10211 if (tmp == NULL) {
10212 xmlFree(buf);
10213 xmlErrMemory(ctxt, NULL);
10214 return(NULL);
10216 buf = tmp;
10218 buf[len++] = cur;
10219 NEXT;
10220 cur=CUR;
10222 buf[len] = 0;
10223 return(buf);
10227 * xmlParseVersionInfo:
10228 * @ctxt: an XML parser context
10230 * parse the XML version.
10232 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10234 * [25] Eq ::= S? '=' S?
10236 * Returns the version string, e.g. "1.0"
10239 xmlChar *
10240 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10241 xmlChar *version = NULL;
10243 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10244 SKIP(7);
10245 SKIP_BLANKS;
10246 if (RAW != '=') {
10247 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10248 return(NULL);
10250 NEXT;
10251 SKIP_BLANKS;
10252 if (RAW == '"') {
10253 NEXT;
10254 version = xmlParseVersionNum(ctxt);
10255 if (RAW != '"') {
10256 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10257 } else
10258 NEXT;
10259 } else if (RAW == '\''){
10260 NEXT;
10261 version = xmlParseVersionNum(ctxt);
10262 if (RAW != '\'') {
10263 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10264 } else
10265 NEXT;
10266 } else {
10267 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10270 return(version);
10274 * xmlParseEncName:
10275 * @ctxt: an XML parser context
10277 * parse the XML encoding name
10279 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10281 * Returns the encoding name value or NULL
10283 xmlChar *
10284 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10285 xmlChar *buf = NULL;
10286 int len = 0;
10287 int size = 10;
10288 xmlChar cur;
10290 cur = CUR;
10291 if (((cur >= 'a') && (cur <= 'z')) ||
10292 ((cur >= 'A') && (cur <= 'Z'))) {
10293 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10294 if (buf == NULL) {
10295 xmlErrMemory(ctxt, NULL);
10296 return(NULL);
10299 buf[len++] = cur;
10300 NEXT;
10301 cur = CUR;
10302 while (((cur >= 'a') && (cur <= 'z')) ||
10303 ((cur >= 'A') && (cur <= 'Z')) ||
10304 ((cur >= '0') && (cur <= '9')) ||
10305 (cur == '.') || (cur == '_') ||
10306 (cur == '-')) {
10307 if (len + 1 >= size) {
10308 xmlChar *tmp;
10310 size *= 2;
10311 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10312 if (tmp == NULL) {
10313 xmlErrMemory(ctxt, NULL);
10314 xmlFree(buf);
10315 return(NULL);
10317 buf = tmp;
10319 buf[len++] = cur;
10320 NEXT;
10321 cur = CUR;
10322 if (cur == 0) {
10323 SHRINK;
10324 GROW;
10325 cur = CUR;
10328 buf[len] = 0;
10329 } else {
10330 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10332 return(buf);
10336 * xmlParseEncodingDecl:
10337 * @ctxt: an XML parser context
10339 * parse the XML encoding declaration
10341 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10343 * this setups the conversion filters.
10345 * Returns the encoding value or NULL
10348 const xmlChar *
10349 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10350 xmlChar *encoding = NULL;
10352 SKIP_BLANKS;
10353 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10354 SKIP(8);
10355 SKIP_BLANKS;
10356 if (RAW != '=') {
10357 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10358 return(NULL);
10360 NEXT;
10361 SKIP_BLANKS;
10362 if (RAW == '"') {
10363 NEXT;
10364 encoding = xmlParseEncName(ctxt);
10365 if (RAW != '"') {
10366 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10367 xmlFree((xmlChar *) encoding);
10368 return(NULL);
10369 } else
10370 NEXT;
10371 } else if (RAW == '\''){
10372 NEXT;
10373 encoding = xmlParseEncName(ctxt);
10374 if (RAW != '\'') {
10375 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10376 xmlFree((xmlChar *) encoding);
10377 return(NULL);
10378 } else
10379 NEXT;
10380 } else {
10381 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10385 * Non standard parsing, allowing the user to ignore encoding
10387 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10388 xmlFree((xmlChar *) encoding);
10389 return(NULL);
10393 * UTF-16 encoding switch has already taken place at this stage,
10394 * more over the little-endian/big-endian selection is already done
10396 if ((encoding != NULL) &&
10397 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10398 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10400 * If no encoding was passed to the parser, that we are
10401 * using UTF-16 and no decoder is present i.e. the
10402 * document is apparently UTF-8 compatible, then raise an
10403 * encoding mismatch fatal error
10405 if ((ctxt->encoding == NULL) &&
10406 (ctxt->input->buf != NULL) &&
10407 (ctxt->input->buf->encoder == NULL)) {
10408 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10409 "Document labelled UTF-16 but has UTF-8 content\n");
10411 if (ctxt->encoding != NULL)
10412 xmlFree((xmlChar *) ctxt->encoding);
10413 ctxt->encoding = encoding;
10416 * UTF-8 encoding is handled natively
10418 else if ((encoding != NULL) &&
10419 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10420 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10421 if (ctxt->encoding != NULL)
10422 xmlFree((xmlChar *) ctxt->encoding);
10423 ctxt->encoding = encoding;
10425 else if (encoding != NULL) {
10426 xmlCharEncodingHandlerPtr handler;
10428 if (ctxt->input->encoding != NULL)
10429 xmlFree((xmlChar *) ctxt->input->encoding);
10430 ctxt->input->encoding = encoding;
10432 handler = xmlFindCharEncodingHandler((const char *) encoding);
10433 if (handler != NULL) {
10434 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10435 /* failed to convert */
10436 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10437 return(NULL);
10439 } else {
10440 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10441 "Unsupported encoding %s\n", encoding);
10442 return(NULL);
10446 return(encoding);
10450 * xmlParseSDDecl:
10451 * @ctxt: an XML parser context
10453 * parse the XML standalone declaration
10455 * [32] SDDecl ::= S 'standalone' Eq
10456 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10458 * [ VC: Standalone Document Declaration ]
10459 * TODO The standalone document declaration must have the value "no"
10460 * if any external markup declarations contain declarations of:
10461 * - attributes with default values, if elements to which these
10462 * attributes apply appear in the document without specifications
10463 * of values for these attributes, or
10464 * - entities (other than amp, lt, gt, apos, quot), if references
10465 * to those entities appear in the document, or
10466 * - attributes with values subject to normalization, where the
10467 * attribute appears in the document with a value which will change
10468 * as a result of normalization, or
10469 * - element types with element content, if white space occurs directly
10470 * within any instance of those types.
10472 * Returns:
10473 * 1 if standalone="yes"
10474 * 0 if standalone="no"
10475 * -2 if standalone attribute is missing or invalid
10476 * (A standalone value of -2 means that the XML declaration was found,
10477 * but no value was specified for the standalone attribute).
10481 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10482 int standalone = -2;
10484 SKIP_BLANKS;
10485 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10486 SKIP(10);
10487 SKIP_BLANKS;
10488 if (RAW != '=') {
10489 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10490 return(standalone);
10492 NEXT;
10493 SKIP_BLANKS;
10494 if (RAW == '\''){
10495 NEXT;
10496 if ((RAW == 'n') && (NXT(1) == 'o')) {
10497 standalone = 0;
10498 SKIP(2);
10499 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10500 (NXT(2) == 's')) {
10501 standalone = 1;
10502 SKIP(3);
10503 } else {
10504 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10506 if (RAW != '\'') {
10507 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10508 } else
10509 NEXT;
10510 } else if (RAW == '"'){
10511 NEXT;
10512 if ((RAW == 'n') && (NXT(1) == 'o')) {
10513 standalone = 0;
10514 SKIP(2);
10515 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10516 (NXT(2) == 's')) {
10517 standalone = 1;
10518 SKIP(3);
10519 } else {
10520 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10522 if (RAW != '"') {
10523 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10524 } else
10525 NEXT;
10526 } else {
10527 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10530 return(standalone);
10534 * xmlParseXMLDecl:
10535 * @ctxt: an XML parser context
10537 * parse an XML declaration header
10539 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10542 void
10543 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10544 xmlChar *version;
10547 * This value for standalone indicates that the document has an
10548 * XML declaration but it does not have a standalone attribute.
10549 * It will be overwritten later if a standalone attribute is found.
10551 ctxt->input->standalone = -2;
10554 * We know that '<?xml' is here.
10556 SKIP(5);
10558 if (!IS_BLANK_CH(RAW)) {
10559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10560 "Blank needed after '<?xml'\n");
10562 SKIP_BLANKS;
10565 * We must have the VersionInfo here.
10567 version = xmlParseVersionInfo(ctxt);
10568 if (version == NULL) {
10569 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10570 } else {
10571 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10573 * Changed here for XML-1.0 5th edition
10575 if (ctxt->options & XML_PARSE_OLD10) {
10576 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10577 "Unsupported version '%s'\n",
10578 version);
10579 } else {
10580 if ((version[0] == '1') && ((version[1] == '.'))) {
10581 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10582 "Unsupported version '%s'\n",
10583 version, NULL);
10584 } else {
10585 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10586 "Unsupported version '%s'\n",
10587 version);
10591 if (ctxt->version != NULL)
10592 xmlFree((void *) ctxt->version);
10593 ctxt->version = version;
10597 * We may have the encoding declaration
10599 if (!IS_BLANK_CH(RAW)) {
10600 if ((RAW == '?') && (NXT(1) == '>')) {
10601 SKIP(2);
10602 return;
10604 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10606 xmlParseEncodingDecl(ctxt);
10607 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10608 (ctxt->instate == XML_PARSER_EOF)) {
10610 * The XML REC instructs us to stop parsing right here
10612 return;
10616 * We may have the standalone status.
10618 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10619 if ((RAW == '?') && (NXT(1) == '>')) {
10620 SKIP(2);
10621 return;
10623 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10627 * We can grow the input buffer freely at that point
10629 GROW;
10631 SKIP_BLANKS;
10632 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10634 SKIP_BLANKS;
10635 if ((RAW == '?') && (NXT(1) == '>')) {
10636 SKIP(2);
10637 } else if (RAW == '>') {
10638 /* Deprecated old WD ... */
10639 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10640 NEXT;
10641 } else {
10642 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10643 MOVETO_ENDTAG(CUR_PTR);
10644 NEXT;
10649 * xmlParseMisc:
10650 * @ctxt: an XML parser context
10652 * parse an XML Misc* optional field.
10654 * [27] Misc ::= Comment | PI | S
10657 void
10658 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10659 while (ctxt->instate != XML_PARSER_EOF) {
10660 SKIP_BLANKS;
10661 GROW;
10662 if ((RAW == '<') && (NXT(1) == '?')) {
10663 xmlParsePI(ctxt);
10664 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10665 xmlParseComment(ctxt);
10666 } else {
10667 break;
10673 * xmlParseDocument:
10674 * @ctxt: an XML parser context
10676 * parse an XML document (and build a tree if using the standard SAX
10677 * interface).
10679 * [1] document ::= prolog element Misc*
10681 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10683 * Returns 0, -1 in case of error. the parser context is augmented
10684 * as a result of the parsing.
10688 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10689 xmlChar start[4];
10690 xmlCharEncoding enc;
10692 xmlInitParser();
10694 if ((ctxt == NULL) || (ctxt->input == NULL))
10695 return(-1);
10697 GROW;
10700 * SAX: detecting the level.
10702 xmlDetectSAX2(ctxt);
10705 * SAX: beginning of the document processing.
10707 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10708 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10709 if (ctxt->instate == XML_PARSER_EOF)
10710 return(-1);
10712 if ((ctxt->encoding == NULL) &&
10713 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10715 * Get the 4 first bytes and decode the charset
10716 * if enc != XML_CHAR_ENCODING_NONE
10717 * plug some encoding conversion routines.
10719 start[0] = RAW;
10720 start[1] = NXT(1);
10721 start[2] = NXT(2);
10722 start[3] = NXT(3);
10723 enc = xmlDetectCharEncoding(&start[0], 4);
10724 if (enc != XML_CHAR_ENCODING_NONE) {
10725 xmlSwitchEncoding(ctxt, enc);
10730 if (CUR == 0) {
10731 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10732 return(-1);
10736 * Check for the XMLDecl in the Prolog.
10737 * do not GROW here to avoid the detected encoder to decode more
10738 * than just the first line, unless the amount of data is really
10739 * too small to hold "<?xml version="1.0" encoding="foo"
10741 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10742 GROW;
10744 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10747 * Note that we will switch encoding on the fly.
10749 xmlParseXMLDecl(ctxt);
10750 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10751 (ctxt->instate == XML_PARSER_EOF)) {
10753 * The XML REC instructs us to stop parsing right here
10755 return(-1);
10757 ctxt->standalone = ctxt->input->standalone;
10758 SKIP_BLANKS;
10759 } else {
10760 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10762 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10763 ctxt->sax->startDocument(ctxt->userData);
10764 if (ctxt->instate == XML_PARSER_EOF)
10765 return(-1);
10766 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10767 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10768 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10772 * The Misc part of the Prolog
10774 xmlParseMisc(ctxt);
10777 * Then possibly doc type declaration(s) and more Misc
10778 * (doctypedecl Misc*)?
10780 GROW;
10781 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10783 ctxt->inSubset = 1;
10784 xmlParseDocTypeDecl(ctxt);
10785 if (RAW == '[') {
10786 ctxt->instate = XML_PARSER_DTD;
10787 xmlParseInternalSubset(ctxt);
10788 if (ctxt->instate == XML_PARSER_EOF)
10789 return(-1);
10793 * Create and update the external subset.
10795 ctxt->inSubset = 2;
10796 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10797 (!ctxt->disableSAX))
10798 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10799 ctxt->extSubSystem, ctxt->extSubURI);
10800 if (ctxt->instate == XML_PARSER_EOF)
10801 return(-1);
10802 ctxt->inSubset = 0;
10804 xmlCleanSpecialAttr(ctxt);
10806 ctxt->instate = XML_PARSER_PROLOG;
10807 xmlParseMisc(ctxt);
10811 * Time to start parsing the tree itself
10813 GROW;
10814 if (RAW != '<') {
10815 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10816 "Start tag expected, '<' not found\n");
10817 } else {
10818 ctxt->instate = XML_PARSER_CONTENT;
10819 xmlParseElement(ctxt);
10820 ctxt->instate = XML_PARSER_EPILOG;
10824 * The Misc part at the end
10826 xmlParseMisc(ctxt);
10828 if (RAW != 0) {
10829 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10831 ctxt->instate = XML_PARSER_EOF;
10835 * SAX: end of the document processing.
10837 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10838 ctxt->sax->endDocument(ctxt->userData);
10841 * Remove locally kept entity definitions if the tree was not built
10843 if ((ctxt->myDoc != NULL) &&
10844 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10845 xmlFreeDoc(ctxt->myDoc);
10846 ctxt->myDoc = NULL;
10849 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10850 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10851 if (ctxt->valid)
10852 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10853 if (ctxt->nsWellFormed)
10854 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10855 if (ctxt->options & XML_PARSE_OLD10)
10856 ctxt->myDoc->properties |= XML_DOC_OLD10;
10858 if (! ctxt->wellFormed) {
10859 ctxt->valid = 0;
10860 return(-1);
10862 return(0);
10866 * xmlParseExtParsedEnt:
10867 * @ctxt: an XML parser context
10869 * parse a general parsed entity
10870 * An external general parsed entity is well-formed if it matches the
10871 * production labeled extParsedEnt.
10873 * [78] extParsedEnt ::= TextDecl? content
10875 * Returns 0, -1 in case of error. the parser context is augmented
10876 * as a result of the parsing.
10880 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10881 xmlChar start[4];
10882 xmlCharEncoding enc;
10884 if ((ctxt == NULL) || (ctxt->input == NULL))
10885 return(-1);
10887 xmlDetectSAX2(ctxt);
10889 GROW;
10892 * SAX: beginning of the document processing.
10894 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10895 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10898 * Get the 4 first bytes and decode the charset
10899 * if enc != XML_CHAR_ENCODING_NONE
10900 * plug some encoding conversion routines.
10902 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10903 start[0] = RAW;
10904 start[1] = NXT(1);
10905 start[2] = NXT(2);
10906 start[3] = NXT(3);
10907 enc = xmlDetectCharEncoding(start, 4);
10908 if (enc != XML_CHAR_ENCODING_NONE) {
10909 xmlSwitchEncoding(ctxt, enc);
10914 if (CUR == 0) {
10915 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10919 * Check for the XMLDecl in the Prolog.
10921 GROW;
10922 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10925 * Note that we will switch encoding on the fly.
10927 xmlParseXMLDecl(ctxt);
10928 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10930 * The XML REC instructs us to stop parsing right here
10932 return(-1);
10934 SKIP_BLANKS;
10935 } else {
10936 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10938 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10939 ctxt->sax->startDocument(ctxt->userData);
10940 if (ctxt->instate == XML_PARSER_EOF)
10941 return(-1);
10944 * Doing validity checking on chunk doesn't make sense
10946 ctxt->instate = XML_PARSER_CONTENT;
10947 ctxt->validate = 0;
10948 ctxt->loadsubset = 0;
10949 ctxt->depth = 0;
10951 xmlParseContent(ctxt);
10952 if (ctxt->instate == XML_PARSER_EOF)
10953 return(-1);
10955 if ((RAW == '<') && (NXT(1) == '/')) {
10956 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10957 } else if (RAW != 0) {
10958 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10962 * SAX: end of the document processing.
10964 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10965 ctxt->sax->endDocument(ctxt->userData);
10967 if (! ctxt->wellFormed) return(-1);
10968 return(0);
10971 #ifdef LIBXML_PUSH_ENABLED
10972 /************************************************************************
10974 * Progressive parsing interfaces *
10976 ************************************************************************/
10979 * xmlParseLookupSequence:
10980 * @ctxt: an XML parser context
10981 * @first: the first char to lookup
10982 * @next: the next char to lookup or zero
10983 * @third: the next char to lookup or zero
10985 * Try to find if a sequence (first, next, third) or just (first next) or
10986 * (first) is available in the input stream.
10987 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10988 * to avoid rescanning sequences of bytes, it DOES change the state of the
10989 * parser, do not use liberally.
10991 * Returns the index to the current parsing point if the full sequence
10992 * is available, -1 otherwise.
10994 static int
10995 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10996 xmlChar next, xmlChar third) {
10997 int base, len;
10998 xmlParserInputPtr in;
10999 const xmlChar *buf;
11001 in = ctxt->input;
11002 if (in == NULL) return(-1);
11003 base = in->cur - in->base;
11004 if (base < 0) return(-1);
11005 if (ctxt->checkIndex > base)
11006 base = ctxt->checkIndex;
11007 if (in->buf == NULL) {
11008 buf = in->base;
11009 len = in->length;
11010 } else {
11011 buf = xmlBufContent(in->buf->buffer);
11012 len = xmlBufUse(in->buf->buffer);
11014 /* take into account the sequence length */
11015 if (third) len -= 2;
11016 else if (next) len --;
11017 for (;base < len;base++) {
11018 if (buf[base] == first) {
11019 if (third != 0) {
11020 if ((buf[base + 1] != next) ||
11021 (buf[base + 2] != third)) continue;
11022 } else if (next != 0) {
11023 if (buf[base + 1] != next) continue;
11025 ctxt->checkIndex = 0;
11026 #ifdef DEBUG_PUSH
11027 if (next == 0)
11028 xmlGenericError(xmlGenericErrorContext,
11029 "PP: lookup '%c' found at %d\n",
11030 first, base);
11031 else if (third == 0)
11032 xmlGenericError(xmlGenericErrorContext,
11033 "PP: lookup '%c%c' found at %d\n",
11034 first, next, base);
11035 else
11036 xmlGenericError(xmlGenericErrorContext,
11037 "PP: lookup '%c%c%c' found at %d\n",
11038 first, next, third, base);
11039 #endif
11040 return(base - (in->cur - in->base));
11043 ctxt->checkIndex = base;
11044 #ifdef DEBUG_PUSH
11045 if (next == 0)
11046 xmlGenericError(xmlGenericErrorContext,
11047 "PP: lookup '%c' failed\n", first);
11048 else if (third == 0)
11049 xmlGenericError(xmlGenericErrorContext,
11050 "PP: lookup '%c%c' failed\n", first, next);
11051 else
11052 xmlGenericError(xmlGenericErrorContext,
11053 "PP: lookup '%c%c%c' failed\n", first, next, third);
11054 #endif
11055 return(-1);
11059 * xmlParseGetLasts:
11060 * @ctxt: an XML parser context
11061 * @lastlt: pointer to store the last '<' from the input
11062 * @lastgt: pointer to store the last '>' from the input
11064 * Lookup the last < and > in the current chunk
11066 static void
11067 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11068 const xmlChar **lastgt) {
11069 const xmlChar *tmp;
11071 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11072 xmlGenericError(xmlGenericErrorContext,
11073 "Internal error: xmlParseGetLasts\n");
11074 return;
11076 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11077 tmp = ctxt->input->end;
11078 tmp--;
11079 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11080 if (tmp < ctxt->input->base) {
11081 *lastlt = NULL;
11082 *lastgt = NULL;
11083 } else {
11084 *lastlt = tmp;
11085 tmp++;
11086 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11087 if (*tmp == '\'') {
11088 tmp++;
11089 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11090 if (tmp < ctxt->input->end) tmp++;
11091 } else if (*tmp == '"') {
11092 tmp++;
11093 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11094 if (tmp < ctxt->input->end) tmp++;
11095 } else
11096 tmp++;
11098 if (tmp < ctxt->input->end)
11099 *lastgt = tmp;
11100 else {
11101 tmp = *lastlt;
11102 tmp--;
11103 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11104 if (tmp >= ctxt->input->base)
11105 *lastgt = tmp;
11106 else
11107 *lastgt = NULL;
11110 } else {
11111 *lastlt = NULL;
11112 *lastgt = NULL;
11116 * xmlCheckCdataPush:
11117 * @cur: pointer to the block of characters
11118 * @len: length of the block in bytes
11119 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11121 * Check that the block of characters is okay as SCdata content [20]
11123 * Returns the number of bytes to pass if okay, a negative index where an
11124 * UTF-8 error occurred otherwise
11126 static int
11127 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11128 int ix;
11129 unsigned char c;
11130 int codepoint;
11132 if ((utf == NULL) || (len <= 0))
11133 return(0);
11135 for (ix = 0; ix < len;) { /* string is 0-terminated */
11136 c = utf[ix];
11137 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11138 if (c >= 0x20)
11139 ix++;
11140 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11141 ix++;
11142 else
11143 return(-ix);
11144 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11145 if (ix + 2 > len) return(complete ? -ix : ix);
11146 if ((utf[ix+1] & 0xc0 ) != 0x80)
11147 return(-ix);
11148 codepoint = (utf[ix] & 0x1f) << 6;
11149 codepoint |= utf[ix+1] & 0x3f;
11150 if (!xmlIsCharQ(codepoint))
11151 return(-ix);
11152 ix += 2;
11153 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11154 if (ix + 3 > len) return(complete ? -ix : ix);
11155 if (((utf[ix+1] & 0xc0) != 0x80) ||
11156 ((utf[ix+2] & 0xc0) != 0x80))
11157 return(-ix);
11158 codepoint = (utf[ix] & 0xf) << 12;
11159 codepoint |= (utf[ix+1] & 0x3f) << 6;
11160 codepoint |= utf[ix+2] & 0x3f;
11161 if (!xmlIsCharQ(codepoint))
11162 return(-ix);
11163 ix += 3;
11164 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11165 if (ix + 4 > len) return(complete ? -ix : ix);
11166 if (((utf[ix+1] & 0xc0) != 0x80) ||
11167 ((utf[ix+2] & 0xc0) != 0x80) ||
11168 ((utf[ix+3] & 0xc0) != 0x80))
11169 return(-ix);
11170 codepoint = (utf[ix] & 0x7) << 18;
11171 codepoint |= (utf[ix+1] & 0x3f) << 12;
11172 codepoint |= (utf[ix+2] & 0x3f) << 6;
11173 codepoint |= utf[ix+3] & 0x3f;
11174 if (!xmlIsCharQ(codepoint))
11175 return(-ix);
11176 ix += 4;
11177 } else /* unknown encoding */
11178 return(-ix);
11180 return(ix);
11184 * xmlParseTryOrFinish:
11185 * @ctxt: an XML parser context
11186 * @terminate: last chunk indicator
11188 * Try to progress on parsing
11190 * Returns zero if no parsing was possible
11192 static int
11193 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11194 int ret = 0;
11195 int avail, tlen;
11196 xmlChar cur, next;
11197 const xmlChar *lastlt, *lastgt;
11199 if (ctxt->input == NULL)
11200 return(0);
11202 #ifdef DEBUG_PUSH
11203 switch (ctxt->instate) {
11204 case XML_PARSER_EOF:
11205 xmlGenericError(xmlGenericErrorContext,
11206 "PP: try EOF\n"); break;
11207 case XML_PARSER_START:
11208 xmlGenericError(xmlGenericErrorContext,
11209 "PP: try START\n"); break;
11210 case XML_PARSER_MISC:
11211 xmlGenericError(xmlGenericErrorContext,
11212 "PP: try MISC\n");break;
11213 case XML_PARSER_COMMENT:
11214 xmlGenericError(xmlGenericErrorContext,
11215 "PP: try COMMENT\n");break;
11216 case XML_PARSER_PROLOG:
11217 xmlGenericError(xmlGenericErrorContext,
11218 "PP: try PROLOG\n");break;
11219 case XML_PARSER_START_TAG:
11220 xmlGenericError(xmlGenericErrorContext,
11221 "PP: try START_TAG\n");break;
11222 case XML_PARSER_CONTENT:
11223 xmlGenericError(xmlGenericErrorContext,
11224 "PP: try CONTENT\n");break;
11225 case XML_PARSER_CDATA_SECTION:
11226 xmlGenericError(xmlGenericErrorContext,
11227 "PP: try CDATA_SECTION\n");break;
11228 case XML_PARSER_END_TAG:
11229 xmlGenericError(xmlGenericErrorContext,
11230 "PP: try END_TAG\n");break;
11231 case XML_PARSER_ENTITY_DECL:
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: try ENTITY_DECL\n");break;
11234 case XML_PARSER_ENTITY_VALUE:
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: try ENTITY_VALUE\n");break;
11237 case XML_PARSER_ATTRIBUTE_VALUE:
11238 xmlGenericError(xmlGenericErrorContext,
11239 "PP: try ATTRIBUTE_VALUE\n");break;
11240 case XML_PARSER_DTD:
11241 xmlGenericError(xmlGenericErrorContext,
11242 "PP: try DTD\n");break;
11243 case XML_PARSER_EPILOG:
11244 xmlGenericError(xmlGenericErrorContext,
11245 "PP: try EPILOG\n");break;
11246 case XML_PARSER_PI:
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: try PI\n");break;
11249 case XML_PARSER_IGNORE:
11250 xmlGenericError(xmlGenericErrorContext,
11251 "PP: try IGNORE\n");break;
11253 #endif
11255 if ((ctxt->input != NULL) &&
11256 (ctxt->input->cur - ctxt->input->base > 4096)) {
11257 xmlSHRINK(ctxt);
11258 ctxt->checkIndex = 0;
11260 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11262 while (ctxt->instate != XML_PARSER_EOF) {
11263 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11264 return(0);
11266 if (ctxt->input == NULL) break;
11267 if (ctxt->input->buf == NULL)
11268 avail = ctxt->input->length -
11269 (ctxt->input->cur - ctxt->input->base);
11270 else {
11272 * If we are operating on converted input, try to flush
11273 * remaining chars to avoid them stalling in the non-converted
11274 * buffer. But do not do this in document start where
11275 * encoding="..." may not have been read and we work on a
11276 * guessed encoding.
11278 if ((ctxt->instate != XML_PARSER_START) &&
11279 (ctxt->input->buf->raw != NULL) &&
11280 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11281 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11282 ctxt->input);
11283 size_t current = ctxt->input->cur - ctxt->input->base;
11285 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11286 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11287 base, current);
11289 avail = xmlBufUse(ctxt->input->buf->buffer) -
11290 (ctxt->input->cur - ctxt->input->base);
11292 if (avail < 1)
11293 goto done;
11294 switch (ctxt->instate) {
11295 case XML_PARSER_EOF:
11297 * Document parsing is done !
11299 goto done;
11300 case XML_PARSER_START:
11301 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11302 xmlChar start[4];
11303 xmlCharEncoding enc;
11306 * Very first chars read from the document flow.
11308 if (avail < 4)
11309 goto done;
11312 * Get the 4 first bytes and decode the charset
11313 * if enc != XML_CHAR_ENCODING_NONE
11314 * plug some encoding conversion routines,
11315 * else xmlSwitchEncoding will set to (default)
11316 * UTF8.
11318 start[0] = RAW;
11319 start[1] = NXT(1);
11320 start[2] = NXT(2);
11321 start[3] = NXT(3);
11322 enc = xmlDetectCharEncoding(start, 4);
11323 xmlSwitchEncoding(ctxt, enc);
11324 break;
11327 if (avail < 2)
11328 goto done;
11329 cur = ctxt->input->cur[0];
11330 next = ctxt->input->cur[1];
11331 if (cur == 0) {
11332 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11333 ctxt->sax->setDocumentLocator(ctxt->userData,
11334 &xmlDefaultSAXLocator);
11335 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11336 xmlHaltParser(ctxt);
11337 #ifdef DEBUG_PUSH
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: entering EOF\n");
11340 #endif
11341 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11342 ctxt->sax->endDocument(ctxt->userData);
11343 goto done;
11345 if ((cur == '<') && (next == '?')) {
11346 /* PI or XML decl */
11347 if (avail < 5) return(ret);
11348 if ((!terminate) &&
11349 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11350 return(ret);
11351 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11352 ctxt->sax->setDocumentLocator(ctxt->userData,
11353 &xmlDefaultSAXLocator);
11354 if ((ctxt->input->cur[2] == 'x') &&
11355 (ctxt->input->cur[3] == 'm') &&
11356 (ctxt->input->cur[4] == 'l') &&
11357 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11358 ret += 5;
11359 #ifdef DEBUG_PUSH
11360 xmlGenericError(xmlGenericErrorContext,
11361 "PP: Parsing XML Decl\n");
11362 #endif
11363 xmlParseXMLDecl(ctxt);
11364 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11366 * The XML REC instructs us to stop parsing right
11367 * here
11369 xmlHaltParser(ctxt);
11370 return(0);
11372 ctxt->standalone = ctxt->input->standalone;
11373 if ((ctxt->encoding == NULL) &&
11374 (ctxt->input->encoding != NULL))
11375 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11376 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11377 (!ctxt->disableSAX))
11378 ctxt->sax->startDocument(ctxt->userData);
11379 ctxt->instate = XML_PARSER_MISC;
11380 #ifdef DEBUG_PUSH
11381 xmlGenericError(xmlGenericErrorContext,
11382 "PP: entering MISC\n");
11383 #endif
11384 } else {
11385 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11386 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11387 (!ctxt->disableSAX))
11388 ctxt->sax->startDocument(ctxt->userData);
11389 ctxt->instate = XML_PARSER_MISC;
11390 #ifdef DEBUG_PUSH
11391 xmlGenericError(xmlGenericErrorContext,
11392 "PP: entering MISC\n");
11393 #endif
11395 } else {
11396 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11397 ctxt->sax->setDocumentLocator(ctxt->userData,
11398 &xmlDefaultSAXLocator);
11399 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11400 if (ctxt->version == NULL) {
11401 xmlErrMemory(ctxt, NULL);
11402 break;
11404 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11405 (!ctxt->disableSAX))
11406 ctxt->sax->startDocument(ctxt->userData);
11407 ctxt->instate = XML_PARSER_MISC;
11408 #ifdef DEBUG_PUSH
11409 xmlGenericError(xmlGenericErrorContext,
11410 "PP: entering MISC\n");
11411 #endif
11413 break;
11414 case XML_PARSER_START_TAG: {
11415 const xmlChar *name;
11416 const xmlChar *prefix = NULL;
11417 const xmlChar *URI = NULL;
11418 int line = ctxt->input->line;
11419 int nsNr = ctxt->nsNr;
11421 if ((avail < 2) && (ctxt->inputNr == 1))
11422 goto done;
11423 cur = ctxt->input->cur[0];
11424 if (cur != '<') {
11425 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11426 xmlHaltParser(ctxt);
11427 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11428 ctxt->sax->endDocument(ctxt->userData);
11429 goto done;
11431 if (!terminate) {
11432 if (ctxt->progressive) {
11433 /* > can be found unescaped in attribute values */
11434 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11435 goto done;
11436 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11437 goto done;
11440 if (ctxt->spaceNr == 0)
11441 spacePush(ctxt, -1);
11442 else if (*ctxt->space == -2)
11443 spacePush(ctxt, -1);
11444 else
11445 spacePush(ctxt, *ctxt->space);
11446 #ifdef LIBXML_SAX1_ENABLED
11447 if (ctxt->sax2)
11448 #endif /* LIBXML_SAX1_ENABLED */
11449 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11450 #ifdef LIBXML_SAX1_ENABLED
11451 else
11452 name = xmlParseStartTag(ctxt);
11453 #endif /* LIBXML_SAX1_ENABLED */
11454 if (ctxt->instate == XML_PARSER_EOF)
11455 goto done;
11456 if (name == NULL) {
11457 spacePop(ctxt);
11458 xmlHaltParser(ctxt);
11459 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11460 ctxt->sax->endDocument(ctxt->userData);
11461 goto done;
11463 #ifdef LIBXML_VALID_ENABLED
11465 * [ VC: Root Element Type ]
11466 * The Name in the document type declaration must match
11467 * the element type of the root element.
11469 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11470 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11471 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11472 #endif /* LIBXML_VALID_ENABLED */
11475 * Check for an Empty Element.
11477 if ((RAW == '/') && (NXT(1) == '>')) {
11478 SKIP(2);
11480 if (ctxt->sax2) {
11481 if ((ctxt->sax != NULL) &&
11482 (ctxt->sax->endElementNs != NULL) &&
11483 (!ctxt->disableSAX))
11484 ctxt->sax->endElementNs(ctxt->userData, name,
11485 prefix, URI);
11486 if (ctxt->nsNr - nsNr > 0)
11487 nsPop(ctxt, ctxt->nsNr - nsNr);
11488 #ifdef LIBXML_SAX1_ENABLED
11489 } else {
11490 if ((ctxt->sax != NULL) &&
11491 (ctxt->sax->endElement != NULL) &&
11492 (!ctxt->disableSAX))
11493 ctxt->sax->endElement(ctxt->userData, name);
11494 #endif /* LIBXML_SAX1_ENABLED */
11496 if (ctxt->instate == XML_PARSER_EOF)
11497 goto done;
11498 spacePop(ctxt);
11499 if (ctxt->nameNr == 0) {
11500 ctxt->instate = XML_PARSER_EPILOG;
11501 } else {
11502 ctxt->instate = XML_PARSER_CONTENT;
11504 ctxt->progressive = 1;
11505 break;
11507 if (RAW == '>') {
11508 NEXT;
11509 } else {
11510 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11511 "Couldn't find end of Start Tag %s\n",
11512 name);
11513 nodePop(ctxt);
11514 spacePop(ctxt);
11516 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11518 ctxt->instate = XML_PARSER_CONTENT;
11519 ctxt->progressive = 1;
11520 break;
11522 case XML_PARSER_CONTENT: {
11523 int id;
11524 unsigned long cons;
11525 if ((avail < 2) && (ctxt->inputNr == 1))
11526 goto done;
11527 cur = ctxt->input->cur[0];
11528 next = ctxt->input->cur[1];
11530 id = ctxt->input->id;
11531 cons = CUR_CONSUMED;
11532 if ((cur == '<') && (next == '/')) {
11533 ctxt->instate = XML_PARSER_END_TAG;
11534 break;
11535 } else if ((cur == '<') && (next == '?')) {
11536 if ((!terminate) &&
11537 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11538 ctxt->progressive = XML_PARSER_PI;
11539 goto done;
11541 xmlParsePI(ctxt);
11542 ctxt->instate = XML_PARSER_CONTENT;
11543 ctxt->progressive = 1;
11544 } else if ((cur == '<') && (next != '!')) {
11545 ctxt->instate = XML_PARSER_START_TAG;
11546 break;
11547 } else if ((cur == '<') && (next == '!') &&
11548 (ctxt->input->cur[2] == '-') &&
11549 (ctxt->input->cur[3] == '-')) {
11550 int term;
11552 if (avail < 4)
11553 goto done;
11554 ctxt->input->cur += 4;
11555 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11556 ctxt->input->cur -= 4;
11557 if ((!terminate) && (term < 0)) {
11558 ctxt->progressive = XML_PARSER_COMMENT;
11559 goto done;
11561 xmlParseComment(ctxt);
11562 ctxt->instate = XML_PARSER_CONTENT;
11563 ctxt->progressive = 1;
11564 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11565 (ctxt->input->cur[2] == '[') &&
11566 (ctxt->input->cur[3] == 'C') &&
11567 (ctxt->input->cur[4] == 'D') &&
11568 (ctxt->input->cur[5] == 'A') &&
11569 (ctxt->input->cur[6] == 'T') &&
11570 (ctxt->input->cur[7] == 'A') &&
11571 (ctxt->input->cur[8] == '[')) {
11572 SKIP(9);
11573 ctxt->instate = XML_PARSER_CDATA_SECTION;
11574 break;
11575 } else if ((cur == '<') && (next == '!') &&
11576 (avail < 9)) {
11577 goto done;
11578 } else if (cur == '&') {
11579 if ((!terminate) &&
11580 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11581 goto done;
11582 xmlParseReference(ctxt);
11583 } else {
11584 /* TODO Avoid the extra copy, handle directly !!! */
11586 * Goal of the following test is:
11587 * - minimize calls to the SAX 'character' callback
11588 * when they are mergeable
11589 * - handle an problem for isBlank when we only parse
11590 * a sequence of blank chars and the next one is
11591 * not available to check against '<' presence.
11592 * - tries to homogenize the differences in SAX
11593 * callbacks between the push and pull versions
11594 * of the parser.
11596 if ((ctxt->inputNr == 1) &&
11597 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11598 if (!terminate) {
11599 if (ctxt->progressive) {
11600 if ((lastlt == NULL) ||
11601 (ctxt->input->cur > lastlt))
11602 goto done;
11603 } else if (xmlParseLookupSequence(ctxt,
11604 '<', 0, 0) < 0) {
11605 goto done;
11609 ctxt->checkIndex = 0;
11610 xmlParseCharData(ctxt, 0);
11612 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11613 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11614 "detected an error in element content\n");
11615 xmlHaltParser(ctxt);
11616 break;
11618 break;
11620 case XML_PARSER_END_TAG:
11621 if (avail < 2)
11622 goto done;
11623 if (!terminate) {
11624 if (ctxt->progressive) {
11625 /* > can be found unescaped in attribute values */
11626 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11627 goto done;
11628 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11629 goto done;
11632 if (ctxt->sax2) {
11633 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11634 nameNsPop(ctxt);
11636 #ifdef LIBXML_SAX1_ENABLED
11637 else
11638 xmlParseEndTag1(ctxt, 0);
11639 #endif /* LIBXML_SAX1_ENABLED */
11640 if (ctxt->instate == XML_PARSER_EOF) {
11641 /* Nothing */
11642 } else if (ctxt->nameNr == 0) {
11643 ctxt->instate = XML_PARSER_EPILOG;
11644 } else {
11645 ctxt->instate = XML_PARSER_CONTENT;
11647 break;
11648 case XML_PARSER_CDATA_SECTION: {
11650 * The Push mode need to have the SAX callback for
11651 * cdataBlock merge back contiguous callbacks.
11653 int base;
11655 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11656 if (base < 0) {
11657 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11658 int tmp;
11660 tmp = xmlCheckCdataPush(ctxt->input->cur,
11661 XML_PARSER_BIG_BUFFER_SIZE, 0);
11662 if (tmp < 0) {
11663 tmp = -tmp;
11664 ctxt->input->cur += tmp;
11665 goto encoding_error;
11667 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11668 if (ctxt->sax->cdataBlock != NULL)
11669 ctxt->sax->cdataBlock(ctxt->userData,
11670 ctxt->input->cur, tmp);
11671 else if (ctxt->sax->characters != NULL)
11672 ctxt->sax->characters(ctxt->userData,
11673 ctxt->input->cur, tmp);
11675 if (ctxt->instate == XML_PARSER_EOF)
11676 goto done;
11677 SKIPL(tmp);
11678 ctxt->checkIndex = 0;
11680 goto done;
11681 } else {
11682 int tmp;
11684 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11685 if ((tmp < 0) || (tmp != base)) {
11686 tmp = -tmp;
11687 ctxt->input->cur += tmp;
11688 goto encoding_error;
11690 if ((ctxt->sax != NULL) && (base == 0) &&
11691 (ctxt->sax->cdataBlock != NULL) &&
11692 (!ctxt->disableSAX)) {
11694 * Special case to provide identical behaviour
11695 * between pull and push parsers on enpty CDATA
11696 * sections
11698 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11699 (!strncmp((const char *)&ctxt->input->cur[-9],
11700 "<![CDATA[", 9)))
11701 ctxt->sax->cdataBlock(ctxt->userData,
11702 BAD_CAST "", 0);
11703 } else if ((ctxt->sax != NULL) && (base > 0) &&
11704 (!ctxt->disableSAX)) {
11705 if (ctxt->sax->cdataBlock != NULL)
11706 ctxt->sax->cdataBlock(ctxt->userData,
11707 ctxt->input->cur, base);
11708 else if (ctxt->sax->characters != NULL)
11709 ctxt->sax->characters(ctxt->userData,
11710 ctxt->input->cur, base);
11712 if (ctxt->instate == XML_PARSER_EOF)
11713 goto done;
11714 SKIPL(base + 3);
11715 ctxt->checkIndex = 0;
11716 ctxt->instate = XML_PARSER_CONTENT;
11717 #ifdef DEBUG_PUSH
11718 xmlGenericError(xmlGenericErrorContext,
11719 "PP: entering CONTENT\n");
11720 #endif
11722 break;
11724 case XML_PARSER_MISC:
11725 SKIP_BLANKS;
11726 if (ctxt->input->buf == NULL)
11727 avail = ctxt->input->length -
11728 (ctxt->input->cur - ctxt->input->base);
11729 else
11730 avail = xmlBufUse(ctxt->input->buf->buffer) -
11731 (ctxt->input->cur - ctxt->input->base);
11732 if (avail < 2)
11733 goto done;
11734 cur = ctxt->input->cur[0];
11735 next = ctxt->input->cur[1];
11736 if ((cur == '<') && (next == '?')) {
11737 if ((!terminate) &&
11738 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11739 ctxt->progressive = XML_PARSER_PI;
11740 goto done;
11742 #ifdef DEBUG_PUSH
11743 xmlGenericError(xmlGenericErrorContext,
11744 "PP: Parsing PI\n");
11745 #endif
11746 xmlParsePI(ctxt);
11747 if (ctxt->instate == XML_PARSER_EOF)
11748 goto done;
11749 ctxt->instate = XML_PARSER_MISC;
11750 ctxt->progressive = 1;
11751 ctxt->checkIndex = 0;
11752 } else if ((cur == '<') && (next == '!') &&
11753 (ctxt->input->cur[2] == '-') &&
11754 (ctxt->input->cur[3] == '-')) {
11755 if ((!terminate) &&
11756 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11757 ctxt->progressive = XML_PARSER_COMMENT;
11758 goto done;
11760 #ifdef DEBUG_PUSH
11761 xmlGenericError(xmlGenericErrorContext,
11762 "PP: Parsing Comment\n");
11763 #endif
11764 xmlParseComment(ctxt);
11765 if (ctxt->instate == XML_PARSER_EOF)
11766 goto done;
11767 ctxt->instate = XML_PARSER_MISC;
11768 ctxt->progressive = 1;
11769 ctxt->checkIndex = 0;
11770 } else if ((cur == '<') && (next == '!') &&
11771 (ctxt->input->cur[2] == 'D') &&
11772 (ctxt->input->cur[3] == 'O') &&
11773 (ctxt->input->cur[4] == 'C') &&
11774 (ctxt->input->cur[5] == 'T') &&
11775 (ctxt->input->cur[6] == 'Y') &&
11776 (ctxt->input->cur[7] == 'P') &&
11777 (ctxt->input->cur[8] == 'E')) {
11778 if ((!terminate) &&
11779 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11780 ctxt->progressive = XML_PARSER_DTD;
11781 goto done;
11783 #ifdef DEBUG_PUSH
11784 xmlGenericError(xmlGenericErrorContext,
11785 "PP: Parsing internal subset\n");
11786 #endif
11787 ctxt->inSubset = 1;
11788 ctxt->progressive = 0;
11789 ctxt->checkIndex = 0;
11790 xmlParseDocTypeDecl(ctxt);
11791 if (ctxt->instate == XML_PARSER_EOF)
11792 goto done;
11793 if (RAW == '[') {
11794 ctxt->instate = XML_PARSER_DTD;
11795 #ifdef DEBUG_PUSH
11796 xmlGenericError(xmlGenericErrorContext,
11797 "PP: entering DTD\n");
11798 #endif
11799 } else {
11801 * Create and update the external subset.
11803 ctxt->inSubset = 2;
11804 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11805 (ctxt->sax->externalSubset != NULL))
11806 ctxt->sax->externalSubset(ctxt->userData,
11807 ctxt->intSubName, ctxt->extSubSystem,
11808 ctxt->extSubURI);
11809 ctxt->inSubset = 0;
11810 xmlCleanSpecialAttr(ctxt);
11811 ctxt->instate = XML_PARSER_PROLOG;
11812 #ifdef DEBUG_PUSH
11813 xmlGenericError(xmlGenericErrorContext,
11814 "PP: entering PROLOG\n");
11815 #endif
11817 } else if ((cur == '<') && (next == '!') &&
11818 (avail < 9)) {
11819 goto done;
11820 } else {
11821 ctxt->instate = XML_PARSER_START_TAG;
11822 ctxt->progressive = XML_PARSER_START_TAG;
11823 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11824 #ifdef DEBUG_PUSH
11825 xmlGenericError(xmlGenericErrorContext,
11826 "PP: entering START_TAG\n");
11827 #endif
11829 break;
11830 case XML_PARSER_PROLOG:
11831 SKIP_BLANKS;
11832 if (ctxt->input->buf == NULL)
11833 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11834 else
11835 avail = xmlBufUse(ctxt->input->buf->buffer) -
11836 (ctxt->input->cur - ctxt->input->base);
11837 if (avail < 2)
11838 goto done;
11839 cur = ctxt->input->cur[0];
11840 next = ctxt->input->cur[1];
11841 if ((cur == '<') && (next == '?')) {
11842 if ((!terminate) &&
11843 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11844 ctxt->progressive = XML_PARSER_PI;
11845 goto done;
11847 #ifdef DEBUG_PUSH
11848 xmlGenericError(xmlGenericErrorContext,
11849 "PP: Parsing PI\n");
11850 #endif
11851 xmlParsePI(ctxt);
11852 if (ctxt->instate == XML_PARSER_EOF)
11853 goto done;
11854 ctxt->instate = XML_PARSER_PROLOG;
11855 ctxt->progressive = 1;
11856 } else if ((cur == '<') && (next == '!') &&
11857 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11858 if ((!terminate) &&
11859 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11860 ctxt->progressive = XML_PARSER_COMMENT;
11861 goto done;
11863 #ifdef DEBUG_PUSH
11864 xmlGenericError(xmlGenericErrorContext,
11865 "PP: Parsing Comment\n");
11866 #endif
11867 xmlParseComment(ctxt);
11868 if (ctxt->instate == XML_PARSER_EOF)
11869 goto done;
11870 ctxt->instate = XML_PARSER_PROLOG;
11871 ctxt->progressive = 1;
11872 } else if ((cur == '<') && (next == '!') &&
11873 (avail < 4)) {
11874 goto done;
11875 } else {
11876 ctxt->instate = XML_PARSER_START_TAG;
11877 if (ctxt->progressive == 0)
11878 ctxt->progressive = XML_PARSER_START_TAG;
11879 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11880 #ifdef DEBUG_PUSH
11881 xmlGenericError(xmlGenericErrorContext,
11882 "PP: entering START_TAG\n");
11883 #endif
11885 break;
11886 case XML_PARSER_EPILOG:
11887 SKIP_BLANKS;
11888 if (ctxt->input->buf == NULL)
11889 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11890 else
11891 avail = xmlBufUse(ctxt->input->buf->buffer) -
11892 (ctxt->input->cur - ctxt->input->base);
11893 if (avail < 2)
11894 goto done;
11895 cur = ctxt->input->cur[0];
11896 next = ctxt->input->cur[1];
11897 if ((cur == '<') && (next == '?')) {
11898 if ((!terminate) &&
11899 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11900 ctxt->progressive = XML_PARSER_PI;
11901 goto done;
11903 #ifdef DEBUG_PUSH
11904 xmlGenericError(xmlGenericErrorContext,
11905 "PP: Parsing PI\n");
11906 #endif
11907 xmlParsePI(ctxt);
11908 if (ctxt->instate == XML_PARSER_EOF)
11909 goto done;
11910 ctxt->instate = XML_PARSER_EPILOG;
11911 ctxt->progressive = 1;
11912 } else if ((cur == '<') && (next == '!') &&
11913 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11914 if ((!terminate) &&
11915 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11916 ctxt->progressive = XML_PARSER_COMMENT;
11917 goto done;
11919 #ifdef DEBUG_PUSH
11920 xmlGenericError(xmlGenericErrorContext,
11921 "PP: Parsing Comment\n");
11922 #endif
11923 xmlParseComment(ctxt);
11924 if (ctxt->instate == XML_PARSER_EOF)
11925 goto done;
11926 ctxt->instate = XML_PARSER_EPILOG;
11927 ctxt->progressive = 1;
11928 } else if ((cur == '<') && (next == '!') &&
11929 (avail < 4)) {
11930 goto done;
11931 } else {
11932 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11933 xmlHaltParser(ctxt);
11934 #ifdef DEBUG_PUSH
11935 xmlGenericError(xmlGenericErrorContext,
11936 "PP: entering EOF\n");
11937 #endif
11938 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11939 ctxt->sax->endDocument(ctxt->userData);
11940 goto done;
11942 break;
11943 case XML_PARSER_DTD: {
11945 * Sorry but progressive parsing of the internal subset
11946 * is not expected to be supported. We first check that
11947 * the full content of the internal subset is available and
11948 * the parsing is launched only at that point.
11949 * Internal subset ends up with "']' S? '>'" in an unescaped
11950 * section and not in a ']]>' sequence which are conditional
11951 * sections (whoever argued to keep that crap in XML deserve
11952 * a place in hell !).
11954 int base, i;
11955 xmlChar *buf;
11956 xmlChar quote = 0;
11957 size_t use;
11959 base = ctxt->input->cur - ctxt->input->base;
11960 if (base < 0) return(0);
11961 if (ctxt->checkIndex > base)
11962 base = ctxt->checkIndex;
11963 buf = xmlBufContent(ctxt->input->buf->buffer);
11964 use = xmlBufUse(ctxt->input->buf->buffer);
11965 for (;(unsigned int) base < use; base++) {
11966 if (quote != 0) {
11967 if (buf[base] == quote)
11968 quote = 0;
11969 continue;
11971 if ((quote == 0) && (buf[base] == '<')) {
11972 int found = 0;
11973 /* special handling of comments */
11974 if (((unsigned int) base + 4 < use) &&
11975 (buf[base + 1] == '!') &&
11976 (buf[base + 2] == '-') &&
11977 (buf[base + 3] == '-')) {
11978 for (;(unsigned int) base + 3 < use; base++) {
11979 if ((buf[base] == '-') &&
11980 (buf[base + 1] == '-') &&
11981 (buf[base + 2] == '>')) {
11982 found = 1;
11983 base += 2;
11984 break;
11987 if (!found) {
11988 #if 0
11989 fprintf(stderr, "unfinished comment\n");
11990 #endif
11991 break; /* for */
11993 continue;
11996 if (buf[base] == '"') {
11997 quote = '"';
11998 continue;
12000 if (buf[base] == '\'') {
12001 quote = '\'';
12002 continue;
12004 if (buf[base] == ']') {
12005 #if 0
12006 fprintf(stderr, "%c%c%c%c: ", buf[base],
12007 buf[base + 1], buf[base + 2], buf[base + 3]);
12008 #endif
12009 if ((unsigned int) base +1 >= use)
12010 break;
12011 if (buf[base + 1] == ']') {
12012 /* conditional crap, skip both ']' ! */
12013 base++;
12014 continue;
12016 for (i = 1; (unsigned int) base + i < use; i++) {
12017 if (buf[base + i] == '>') {
12018 #if 0
12019 fprintf(stderr, "found\n");
12020 #endif
12021 goto found_end_int_subset;
12023 if (!IS_BLANK_CH(buf[base + i])) {
12024 #if 0
12025 fprintf(stderr, "not found\n");
12026 #endif
12027 goto not_end_of_int_subset;
12030 #if 0
12031 fprintf(stderr, "end of stream\n");
12032 #endif
12033 break;
12036 not_end_of_int_subset:
12037 continue; /* for */
12040 * We didn't found the end of the Internal subset
12042 if (quote == 0)
12043 ctxt->checkIndex = base;
12044 else
12045 ctxt->checkIndex = 0;
12046 #ifdef DEBUG_PUSH
12047 if (next == 0)
12048 xmlGenericError(xmlGenericErrorContext,
12049 "PP: lookup of int subset end filed\n");
12050 #endif
12051 goto done;
12053 found_end_int_subset:
12054 ctxt->checkIndex = 0;
12055 xmlParseInternalSubset(ctxt);
12056 if (ctxt->instate == XML_PARSER_EOF)
12057 goto done;
12058 ctxt->inSubset = 2;
12059 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12060 (ctxt->sax->externalSubset != NULL))
12061 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12062 ctxt->extSubSystem, ctxt->extSubURI);
12063 ctxt->inSubset = 0;
12064 xmlCleanSpecialAttr(ctxt);
12065 if (ctxt->instate == XML_PARSER_EOF)
12066 goto done;
12067 ctxt->instate = XML_PARSER_PROLOG;
12068 ctxt->checkIndex = 0;
12069 #ifdef DEBUG_PUSH
12070 xmlGenericError(xmlGenericErrorContext,
12071 "PP: entering PROLOG\n");
12072 #endif
12073 break;
12075 case XML_PARSER_COMMENT:
12076 xmlGenericError(xmlGenericErrorContext,
12077 "PP: internal error, state == COMMENT\n");
12078 ctxt->instate = XML_PARSER_CONTENT;
12079 #ifdef DEBUG_PUSH
12080 xmlGenericError(xmlGenericErrorContext,
12081 "PP: entering CONTENT\n");
12082 #endif
12083 break;
12084 case XML_PARSER_IGNORE:
12085 xmlGenericError(xmlGenericErrorContext,
12086 "PP: internal error, state == IGNORE");
12087 ctxt->instate = XML_PARSER_DTD;
12088 #ifdef DEBUG_PUSH
12089 xmlGenericError(xmlGenericErrorContext,
12090 "PP: entering DTD\n");
12091 #endif
12092 break;
12093 case XML_PARSER_PI:
12094 xmlGenericError(xmlGenericErrorContext,
12095 "PP: internal error, state == PI\n");
12096 ctxt->instate = XML_PARSER_CONTENT;
12097 #ifdef DEBUG_PUSH
12098 xmlGenericError(xmlGenericErrorContext,
12099 "PP: entering CONTENT\n");
12100 #endif
12101 break;
12102 case XML_PARSER_ENTITY_DECL:
12103 xmlGenericError(xmlGenericErrorContext,
12104 "PP: internal error, state == ENTITY_DECL\n");
12105 ctxt->instate = XML_PARSER_DTD;
12106 #ifdef DEBUG_PUSH
12107 xmlGenericError(xmlGenericErrorContext,
12108 "PP: entering DTD\n");
12109 #endif
12110 break;
12111 case XML_PARSER_ENTITY_VALUE:
12112 xmlGenericError(xmlGenericErrorContext,
12113 "PP: internal error, state == ENTITY_VALUE\n");
12114 ctxt->instate = XML_PARSER_CONTENT;
12115 #ifdef DEBUG_PUSH
12116 xmlGenericError(xmlGenericErrorContext,
12117 "PP: entering DTD\n");
12118 #endif
12119 break;
12120 case XML_PARSER_ATTRIBUTE_VALUE:
12121 xmlGenericError(xmlGenericErrorContext,
12122 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12123 ctxt->instate = XML_PARSER_START_TAG;
12124 #ifdef DEBUG_PUSH
12125 xmlGenericError(xmlGenericErrorContext,
12126 "PP: entering START_TAG\n");
12127 #endif
12128 break;
12129 case XML_PARSER_SYSTEM_LITERAL:
12130 xmlGenericError(xmlGenericErrorContext,
12131 "PP: internal error, state == SYSTEM_LITERAL\n");
12132 ctxt->instate = XML_PARSER_START_TAG;
12133 #ifdef DEBUG_PUSH
12134 xmlGenericError(xmlGenericErrorContext,
12135 "PP: entering START_TAG\n");
12136 #endif
12137 break;
12138 case XML_PARSER_PUBLIC_LITERAL:
12139 xmlGenericError(xmlGenericErrorContext,
12140 "PP: internal error, state == PUBLIC_LITERAL\n");
12141 ctxt->instate = XML_PARSER_START_TAG;
12142 #ifdef DEBUG_PUSH
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: entering START_TAG\n");
12145 #endif
12146 break;
12149 done:
12150 #ifdef DEBUG_PUSH
12151 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12152 #endif
12153 return(ret);
12154 encoding_error:
12156 char buffer[150];
12158 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12159 ctxt->input->cur[0], ctxt->input->cur[1],
12160 ctxt->input->cur[2], ctxt->input->cur[3]);
12161 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12162 "Input is not proper UTF-8, indicate encoding !\n%s",
12163 BAD_CAST buffer, NULL);
12165 return(0);
12169 * xmlParseCheckTransition:
12170 * @ctxt: an XML parser context
12171 * @chunk: a char array
12172 * @size: the size in byte of the chunk
12174 * Check depending on the current parser state if the chunk given must be
12175 * processed immediately or one need more data to advance on parsing.
12177 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12179 static int
12180 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12181 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12182 return(-1);
12183 if (ctxt->instate == XML_PARSER_START_TAG) {
12184 if (memchr(chunk, '>', size) != NULL)
12185 return(1);
12186 return(0);
12188 if (ctxt->progressive == XML_PARSER_COMMENT) {
12189 if (memchr(chunk, '>', size) != NULL)
12190 return(1);
12191 return(0);
12193 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12194 if (memchr(chunk, '>', size) != NULL)
12195 return(1);
12196 return(0);
12198 if (ctxt->progressive == XML_PARSER_PI) {
12199 if (memchr(chunk, '>', size) != NULL)
12200 return(1);
12201 return(0);
12203 if (ctxt->instate == XML_PARSER_END_TAG) {
12204 if (memchr(chunk, '>', size) != NULL)
12205 return(1);
12206 return(0);
12208 if ((ctxt->progressive == XML_PARSER_DTD) ||
12209 (ctxt->instate == XML_PARSER_DTD)) {
12210 if (memchr(chunk, '>', size) != NULL)
12211 return(1);
12212 return(0);
12214 return(1);
12218 * xmlParseChunk:
12219 * @ctxt: an XML parser context
12220 * @chunk: an char array
12221 * @size: the size in byte of the chunk
12222 * @terminate: last chunk indicator
12224 * Parse a Chunk of memory
12226 * Returns zero if no error, the xmlParserErrors otherwise.
12229 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12230 int terminate) {
12231 int end_in_lf = 0;
12232 int remain = 0;
12233 size_t old_avail = 0;
12234 size_t avail = 0;
12236 if (ctxt == NULL)
12237 return(XML_ERR_INTERNAL_ERROR);
12238 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12239 return(ctxt->errNo);
12240 if (ctxt->instate == XML_PARSER_EOF)
12241 return(-1);
12242 if (ctxt->instate == XML_PARSER_START)
12243 xmlDetectSAX2(ctxt);
12244 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12245 (chunk[size - 1] == '\r')) {
12246 end_in_lf = 1;
12247 size--;
12250 xmldecl_done:
12252 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12253 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12254 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12255 size_t cur = ctxt->input->cur - ctxt->input->base;
12256 int res;
12258 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12260 * Specific handling if we autodetected an encoding, we should not
12261 * push more than the first line ... which depend on the encoding
12262 * And only push the rest once the final encoding was detected
12264 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12265 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12266 unsigned int len = 45;
12268 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12269 BAD_CAST "UTF-16")) ||
12270 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12271 BAD_CAST "UTF16")))
12272 len = 90;
12273 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12274 BAD_CAST "UCS-4")) ||
12275 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12276 BAD_CAST "UCS4")))
12277 len = 180;
12279 if (ctxt->input->buf->rawconsumed < len)
12280 len -= ctxt->input->buf->rawconsumed;
12283 * Change size for reading the initial declaration only
12284 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12285 * will blindly copy extra bytes from memory.
12287 if ((unsigned int) size > len) {
12288 remain = size - len;
12289 size = len;
12290 } else {
12291 remain = 0;
12294 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12295 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12296 if (res < 0) {
12297 ctxt->errNo = XML_PARSER_EOF;
12298 xmlHaltParser(ctxt);
12299 return (XML_PARSER_EOF);
12301 #ifdef DEBUG_PUSH
12302 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12303 #endif
12305 } else if (ctxt->instate != XML_PARSER_EOF) {
12306 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12307 xmlParserInputBufferPtr in = ctxt->input->buf;
12308 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12309 (in->raw != NULL)) {
12310 int nbchars;
12311 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12312 size_t current = ctxt->input->cur - ctxt->input->base;
12314 nbchars = xmlCharEncInput(in, terminate);
12315 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12316 if (nbchars < 0) {
12317 /* TODO 2.6.0 */
12318 xmlGenericError(xmlGenericErrorContext,
12319 "xmlParseChunk: encoder error\n");
12320 xmlHaltParser(ctxt);
12321 return(XML_ERR_INVALID_ENCODING);
12326 if (remain != 0) {
12327 xmlParseTryOrFinish(ctxt, 0);
12328 } else {
12329 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12330 avail = xmlBufUse(ctxt->input->buf->buffer);
12332 * Depending on the current state it may not be such
12333 * a good idea to try parsing if there is nothing in the chunk
12334 * which would be worth doing a parser state transition and we
12335 * need to wait for more data
12337 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12338 (old_avail == 0) || (avail == 0) ||
12339 (xmlParseCheckTransition(ctxt,
12340 (const char *)&ctxt->input->base[old_avail],
12341 avail - old_avail)))
12342 xmlParseTryOrFinish(ctxt, terminate);
12344 if (ctxt->instate == XML_PARSER_EOF)
12345 return(ctxt->errNo);
12347 if ((ctxt->input != NULL) &&
12348 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12349 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12350 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12351 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12352 xmlHaltParser(ctxt);
12354 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12355 return(ctxt->errNo);
12357 if (remain != 0) {
12358 chunk += size;
12359 size = remain;
12360 remain = 0;
12361 goto xmldecl_done;
12363 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12364 (ctxt->input->buf != NULL)) {
12365 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12366 ctxt->input);
12367 size_t current = ctxt->input->cur - ctxt->input->base;
12369 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12371 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12372 base, current);
12374 if (terminate) {
12376 * Check for termination
12378 int cur_avail = 0;
12380 if (ctxt->input != NULL) {
12381 if (ctxt->input->buf == NULL)
12382 cur_avail = ctxt->input->length -
12383 (ctxt->input->cur - ctxt->input->base);
12384 else
12385 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12386 (ctxt->input->cur - ctxt->input->base);
12389 if ((ctxt->instate != XML_PARSER_EOF) &&
12390 (ctxt->instate != XML_PARSER_EPILOG)) {
12391 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12393 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12394 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12396 if (ctxt->instate != XML_PARSER_EOF) {
12397 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12398 ctxt->sax->endDocument(ctxt->userData);
12400 ctxt->instate = XML_PARSER_EOF;
12402 if (ctxt->wellFormed == 0)
12403 return((xmlParserErrors) ctxt->errNo);
12404 else
12405 return(0);
12408 /************************************************************************
12410 * I/O front end functions to the parser *
12412 ************************************************************************/
12415 * xmlCreatePushParserCtxt:
12416 * @sax: a SAX handler
12417 * @user_data: The user data returned on SAX callbacks
12418 * @chunk: a pointer to an array of chars
12419 * @size: number of chars in the array
12420 * @filename: an optional file name or URI
12422 * Create a parser context for using the XML parser in push mode.
12423 * If @buffer and @size are non-NULL, the data is used to detect
12424 * the encoding. The remaining characters will be parsed so they
12425 * don't need to be fed in again through xmlParseChunk.
12426 * To allow content encoding detection, @size should be >= 4
12427 * The value of @filename is used for fetching external entities
12428 * and error/warning reports.
12430 * Returns the new parser context or NULL
12433 xmlParserCtxtPtr
12434 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12435 const char *chunk, int size, const char *filename) {
12436 xmlParserCtxtPtr ctxt;
12437 xmlParserInputPtr inputStream;
12438 xmlParserInputBufferPtr buf;
12439 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12442 * plug some encoding conversion routines
12444 if ((chunk != NULL) && (size >= 4))
12445 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12447 buf = xmlAllocParserInputBuffer(enc);
12448 if (buf == NULL) return(NULL);
12450 ctxt = xmlNewParserCtxt();
12451 if (ctxt == NULL) {
12452 xmlErrMemory(NULL, "creating parser: out of memory\n");
12453 xmlFreeParserInputBuffer(buf);
12454 return(NULL);
12456 ctxt->dictNames = 1;
12457 if (sax != NULL) {
12458 #ifdef LIBXML_SAX1_ENABLED
12459 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12460 #endif /* LIBXML_SAX1_ENABLED */
12461 xmlFree(ctxt->sax);
12462 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12463 if (ctxt->sax == NULL) {
12464 xmlErrMemory(ctxt, NULL);
12465 xmlFreeParserInputBuffer(buf);
12466 xmlFreeParserCtxt(ctxt);
12467 return(NULL);
12469 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12470 if (sax->initialized == XML_SAX2_MAGIC)
12471 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12472 else
12473 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12474 if (user_data != NULL)
12475 ctxt->userData = user_data;
12477 if (filename == NULL) {
12478 ctxt->directory = NULL;
12479 } else {
12480 ctxt->directory = xmlParserGetDirectory(filename);
12483 inputStream = xmlNewInputStream(ctxt);
12484 if (inputStream == NULL) {
12485 xmlFreeParserCtxt(ctxt);
12486 xmlFreeParserInputBuffer(buf);
12487 return(NULL);
12490 if (filename == NULL)
12491 inputStream->filename = NULL;
12492 else {
12493 inputStream->filename = (char *)
12494 xmlCanonicPath((const xmlChar *) filename);
12495 if (inputStream->filename == NULL) {
12496 xmlFreeParserCtxt(ctxt);
12497 xmlFreeParserInputBuffer(buf);
12498 return(NULL);
12501 inputStream->buf = buf;
12502 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12503 inputPush(ctxt, inputStream);
12506 * If the caller didn't provide an initial 'chunk' for determining
12507 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12508 * that it can be automatically determined later
12510 if ((size == 0) || (chunk == NULL)) {
12511 ctxt->charset = XML_CHAR_ENCODING_NONE;
12512 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12513 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12514 size_t cur = ctxt->input->cur - ctxt->input->base;
12516 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12518 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12519 #ifdef DEBUG_PUSH
12520 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12521 #endif
12524 if (enc != XML_CHAR_ENCODING_NONE) {
12525 xmlSwitchEncoding(ctxt, enc);
12528 return(ctxt);
12530 #endif /* LIBXML_PUSH_ENABLED */
12533 * xmlHaltParser:
12534 * @ctxt: an XML parser context
12536 * Blocks further parser processing don't override error
12537 * for internal use
12539 static void
12540 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12541 if (ctxt == NULL)
12542 return;
12543 ctxt->instate = XML_PARSER_EOF;
12544 ctxt->disableSAX = 1;
12545 while (ctxt->inputNr > 1)
12546 xmlFreeInputStream(inputPop(ctxt));
12547 if (ctxt->input != NULL) {
12549 * in case there was a specific allocation deallocate before
12550 * overriding base
12552 if (ctxt->input->free != NULL) {
12553 ctxt->input->free((xmlChar *) ctxt->input->base);
12554 ctxt->input->free = NULL;
12556 if (ctxt->input->buf != NULL) {
12557 xmlFreeParserInputBuffer(ctxt->input->buf);
12558 ctxt->input->buf = NULL;
12560 ctxt->input->cur = BAD_CAST"";
12561 ctxt->input->length = 0;
12562 ctxt->input->base = ctxt->input->cur;
12563 ctxt->input->end = ctxt->input->cur;
12568 * xmlStopParser:
12569 * @ctxt: an XML parser context
12571 * Blocks further parser processing
12573 void
12574 xmlStopParser(xmlParserCtxtPtr ctxt) {
12575 if (ctxt == NULL)
12576 return;
12577 xmlHaltParser(ctxt);
12578 ctxt->errNo = XML_ERR_USER_STOP;
12582 * xmlCreateIOParserCtxt:
12583 * @sax: a SAX handler
12584 * @user_data: The user data returned on SAX callbacks
12585 * @ioread: an I/O read function
12586 * @ioclose: an I/O close function
12587 * @ioctx: an I/O handler
12588 * @enc: the charset encoding if known
12590 * Create a parser context for using the XML parser with an existing
12591 * I/O stream
12593 * Returns the new parser context or NULL
12595 xmlParserCtxtPtr
12596 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12597 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12598 void *ioctx, xmlCharEncoding enc) {
12599 xmlParserCtxtPtr ctxt;
12600 xmlParserInputPtr inputStream;
12601 xmlParserInputBufferPtr buf;
12603 if (ioread == NULL) return(NULL);
12605 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12606 if (buf == NULL) {
12607 if (ioclose != NULL)
12608 ioclose(ioctx);
12609 return (NULL);
12612 ctxt = xmlNewParserCtxt();
12613 if (ctxt == NULL) {
12614 xmlFreeParserInputBuffer(buf);
12615 return(NULL);
12617 if (sax != NULL) {
12618 #ifdef LIBXML_SAX1_ENABLED
12619 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12620 #endif /* LIBXML_SAX1_ENABLED */
12621 xmlFree(ctxt->sax);
12622 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12623 if (ctxt->sax == NULL) {
12624 xmlFreeParserInputBuffer(buf);
12625 xmlErrMemory(ctxt, NULL);
12626 xmlFreeParserCtxt(ctxt);
12627 return(NULL);
12629 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12630 if (sax->initialized == XML_SAX2_MAGIC)
12631 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12632 else
12633 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12634 if (user_data != NULL)
12635 ctxt->userData = user_data;
12638 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12639 if (inputStream == NULL) {
12640 xmlFreeParserCtxt(ctxt);
12641 return(NULL);
12643 inputPush(ctxt, inputStream);
12645 return(ctxt);
12648 #ifdef LIBXML_VALID_ENABLED
12649 /************************************************************************
12651 * Front ends when parsing a DTD *
12653 ************************************************************************/
12656 * xmlIOParseDTD:
12657 * @sax: the SAX handler block or NULL
12658 * @input: an Input Buffer
12659 * @enc: the charset encoding if known
12661 * Load and parse a DTD
12663 * Returns the resulting xmlDtdPtr or NULL in case of error.
12664 * @input will be freed by the function in any case.
12667 xmlDtdPtr
12668 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12669 xmlCharEncoding enc) {
12670 xmlDtdPtr ret = NULL;
12671 xmlParserCtxtPtr ctxt;
12672 xmlParserInputPtr pinput = NULL;
12673 xmlChar start[4];
12675 if (input == NULL)
12676 return(NULL);
12678 ctxt = xmlNewParserCtxt();
12679 if (ctxt == NULL) {
12680 xmlFreeParserInputBuffer(input);
12681 return(NULL);
12684 /* We are loading a DTD */
12685 ctxt->options |= XML_PARSE_DTDLOAD;
12688 * Set-up the SAX context
12690 if (sax != NULL) {
12691 if (ctxt->sax != NULL)
12692 xmlFree(ctxt->sax);
12693 ctxt->sax = sax;
12694 ctxt->userData = ctxt;
12696 xmlDetectSAX2(ctxt);
12699 * generate a parser input from the I/O handler
12702 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12703 if (pinput == NULL) {
12704 if (sax != NULL) ctxt->sax = NULL;
12705 xmlFreeParserInputBuffer(input);
12706 xmlFreeParserCtxt(ctxt);
12707 return(NULL);
12711 * plug some encoding conversion routines here.
12713 if (xmlPushInput(ctxt, pinput) < 0) {
12714 if (sax != NULL) ctxt->sax = NULL;
12715 xmlFreeParserCtxt(ctxt);
12716 return(NULL);
12718 if (enc != XML_CHAR_ENCODING_NONE) {
12719 xmlSwitchEncoding(ctxt, enc);
12722 pinput->filename = NULL;
12723 pinput->line = 1;
12724 pinput->col = 1;
12725 pinput->base = ctxt->input->cur;
12726 pinput->cur = ctxt->input->cur;
12727 pinput->free = NULL;
12730 * let's parse that entity knowing it's an external subset.
12732 ctxt->inSubset = 2;
12733 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12734 if (ctxt->myDoc == NULL) {
12735 xmlErrMemory(ctxt, "New Doc failed");
12736 return(NULL);
12738 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12739 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12740 BAD_CAST "none", BAD_CAST "none");
12742 if ((enc == XML_CHAR_ENCODING_NONE) &&
12743 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12745 * Get the 4 first bytes and decode the charset
12746 * if enc != XML_CHAR_ENCODING_NONE
12747 * plug some encoding conversion routines.
12749 start[0] = RAW;
12750 start[1] = NXT(1);
12751 start[2] = NXT(2);
12752 start[3] = NXT(3);
12753 enc = xmlDetectCharEncoding(start, 4);
12754 if (enc != XML_CHAR_ENCODING_NONE) {
12755 xmlSwitchEncoding(ctxt, enc);
12759 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12761 if (ctxt->myDoc != NULL) {
12762 if (ctxt->wellFormed) {
12763 ret = ctxt->myDoc->extSubset;
12764 ctxt->myDoc->extSubset = NULL;
12765 if (ret != NULL) {
12766 xmlNodePtr tmp;
12768 ret->doc = NULL;
12769 tmp = ret->children;
12770 while (tmp != NULL) {
12771 tmp->doc = NULL;
12772 tmp = tmp->next;
12775 } else {
12776 ret = NULL;
12778 xmlFreeDoc(ctxt->myDoc);
12779 ctxt->myDoc = NULL;
12781 if (sax != NULL) ctxt->sax = NULL;
12782 xmlFreeParserCtxt(ctxt);
12784 return(ret);
12788 * xmlSAXParseDTD:
12789 * @sax: the SAX handler block
12790 * @ExternalID: a NAME* containing the External ID of the DTD
12791 * @SystemID: a NAME* containing the URL to the DTD
12793 * Load and parse an external subset.
12795 * Returns the resulting xmlDtdPtr or NULL in case of error.
12798 xmlDtdPtr
12799 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12800 const xmlChar *SystemID) {
12801 xmlDtdPtr ret = NULL;
12802 xmlParserCtxtPtr ctxt;
12803 xmlParserInputPtr input = NULL;
12804 xmlCharEncoding enc;
12805 xmlChar* systemIdCanonic;
12807 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12809 ctxt = xmlNewParserCtxt();
12810 if (ctxt == NULL) {
12811 return(NULL);
12814 /* We are loading a DTD */
12815 ctxt->options |= XML_PARSE_DTDLOAD;
12818 * Set-up the SAX context
12820 if (sax != NULL) {
12821 if (ctxt->sax != NULL)
12822 xmlFree(ctxt->sax);
12823 ctxt->sax = sax;
12824 ctxt->userData = ctxt;
12828 * Canonicalise the system ID
12830 systemIdCanonic = xmlCanonicPath(SystemID);
12831 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12832 xmlFreeParserCtxt(ctxt);
12833 return(NULL);
12837 * Ask the Entity resolver to load the damn thing
12840 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12841 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12842 systemIdCanonic);
12843 if (input == NULL) {
12844 if (sax != NULL) ctxt->sax = NULL;
12845 xmlFreeParserCtxt(ctxt);
12846 if (systemIdCanonic != NULL)
12847 xmlFree(systemIdCanonic);
12848 return(NULL);
12852 * plug some encoding conversion routines here.
12854 if (xmlPushInput(ctxt, input) < 0) {
12855 if (sax != NULL) ctxt->sax = NULL;
12856 xmlFreeParserCtxt(ctxt);
12857 if (systemIdCanonic != NULL)
12858 xmlFree(systemIdCanonic);
12859 return(NULL);
12861 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12862 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12863 xmlSwitchEncoding(ctxt, enc);
12866 if (input->filename == NULL)
12867 input->filename = (char *) systemIdCanonic;
12868 else
12869 xmlFree(systemIdCanonic);
12870 input->line = 1;
12871 input->col = 1;
12872 input->base = ctxt->input->cur;
12873 input->cur = ctxt->input->cur;
12874 input->free = NULL;
12877 * let's parse that entity knowing it's an external subset.
12879 ctxt->inSubset = 2;
12880 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12881 if (ctxt->myDoc == NULL) {
12882 xmlErrMemory(ctxt, "New Doc failed");
12883 if (sax != NULL) ctxt->sax = NULL;
12884 xmlFreeParserCtxt(ctxt);
12885 return(NULL);
12887 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12888 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12889 ExternalID, SystemID);
12890 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12892 if (ctxt->myDoc != NULL) {
12893 if (ctxt->wellFormed) {
12894 ret = ctxt->myDoc->extSubset;
12895 ctxt->myDoc->extSubset = NULL;
12896 if (ret != NULL) {
12897 xmlNodePtr tmp;
12899 ret->doc = NULL;
12900 tmp = ret->children;
12901 while (tmp != NULL) {
12902 tmp->doc = NULL;
12903 tmp = tmp->next;
12906 } else {
12907 ret = NULL;
12909 xmlFreeDoc(ctxt->myDoc);
12910 ctxt->myDoc = NULL;
12912 if (sax != NULL) ctxt->sax = NULL;
12913 xmlFreeParserCtxt(ctxt);
12915 return(ret);
12920 * xmlParseDTD:
12921 * @ExternalID: a NAME* containing the External ID of the DTD
12922 * @SystemID: a NAME* containing the URL to the DTD
12924 * Load and parse an external subset.
12926 * Returns the resulting xmlDtdPtr or NULL in case of error.
12929 xmlDtdPtr
12930 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12931 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12933 #endif /* LIBXML_VALID_ENABLED */
12935 /************************************************************************
12937 * Front ends when parsing an Entity *
12939 ************************************************************************/
12942 * xmlParseCtxtExternalEntity:
12943 * @ctx: the existing parsing context
12944 * @URL: the URL for the entity to load
12945 * @ID: the System ID for the entity to load
12946 * @lst: the return value for the set of parsed nodes
12948 * Parse an external general entity within an existing parsing context
12949 * An external general parsed entity is well-formed if it matches the
12950 * production labeled extParsedEnt.
12952 * [78] extParsedEnt ::= TextDecl? content
12954 * Returns 0 if the entity is well formed, -1 in case of args problem and
12955 * the parser error code otherwise
12959 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12960 const xmlChar *ID, xmlNodePtr *lst) {
12961 void *userData;
12963 if (ctx == NULL) return(-1);
12965 * If the user provided their own SAX callbacks, then reuse the
12966 * userData callback field, otherwise the expected setup in a
12967 * DOM builder is to have userData == ctxt
12969 if (ctx->userData == ctx)
12970 userData = NULL;
12971 else
12972 userData = ctx->userData;
12973 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12974 userData, ctx->depth + 1,
12975 URL, ID, lst);
12979 * xmlParseExternalEntityPrivate:
12980 * @doc: the document the chunk pertains to
12981 * @oldctxt: the previous parser context if available
12982 * @sax: the SAX handler block (possibly NULL)
12983 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12984 * @depth: Used for loop detection, use 0
12985 * @URL: the URL for the entity to load
12986 * @ID: the System ID for the entity to load
12987 * @list: the return value for the set of parsed nodes
12989 * Private version of xmlParseExternalEntity()
12991 * Returns 0 if the entity is well formed, -1 in case of args problem and
12992 * the parser error code otherwise
12995 static xmlParserErrors
12996 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12997 xmlSAXHandlerPtr sax,
12998 void *user_data, int depth, const xmlChar *URL,
12999 const xmlChar *ID, xmlNodePtr *list) {
13000 xmlParserCtxtPtr ctxt;
13001 xmlDocPtr newDoc;
13002 xmlNodePtr newRoot;
13003 xmlSAXHandlerPtr oldsax = NULL;
13004 xmlParserErrors ret = XML_ERR_OK;
13005 xmlChar start[4];
13006 xmlCharEncoding enc;
13008 if (((depth > 40) &&
13009 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13010 (depth > 1024)) {
13011 return(XML_ERR_ENTITY_LOOP);
13014 if (list != NULL)
13015 *list = NULL;
13016 if ((URL == NULL) && (ID == NULL))
13017 return(XML_ERR_INTERNAL_ERROR);
13018 if (doc == NULL)
13019 return(XML_ERR_INTERNAL_ERROR);
13022 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13023 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13024 ctxt->userData = ctxt;
13025 if (sax != NULL) {
13026 oldsax = ctxt->sax;
13027 ctxt->sax = sax;
13028 if (user_data != NULL)
13029 ctxt->userData = user_data;
13031 xmlDetectSAX2(ctxt);
13032 newDoc = xmlNewDoc(BAD_CAST "1.0");
13033 if (newDoc == NULL) {
13034 xmlFreeParserCtxt(ctxt);
13035 return(XML_ERR_INTERNAL_ERROR);
13037 newDoc->properties = XML_DOC_INTERNAL;
13038 if (doc) {
13039 newDoc->intSubset = doc->intSubset;
13040 newDoc->extSubset = doc->extSubset;
13041 if (doc->dict) {
13042 newDoc->dict = doc->dict;
13043 xmlDictReference(newDoc->dict);
13045 if (doc->URL != NULL) {
13046 newDoc->URL = xmlStrdup(doc->URL);
13049 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13050 if (newRoot == NULL) {
13051 if (sax != NULL)
13052 ctxt->sax = oldsax;
13053 xmlFreeParserCtxt(ctxt);
13054 newDoc->intSubset = NULL;
13055 newDoc->extSubset = NULL;
13056 xmlFreeDoc(newDoc);
13057 return(XML_ERR_INTERNAL_ERROR);
13059 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13060 nodePush(ctxt, newDoc->children);
13061 if (doc == NULL) {
13062 ctxt->myDoc = newDoc;
13063 } else {
13064 ctxt->myDoc = doc;
13065 newRoot->doc = doc;
13069 * Get the 4 first bytes and decode the charset
13070 * if enc != XML_CHAR_ENCODING_NONE
13071 * plug some encoding conversion routines.
13073 GROW;
13074 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13075 start[0] = RAW;
13076 start[1] = NXT(1);
13077 start[2] = NXT(2);
13078 start[3] = NXT(3);
13079 enc = xmlDetectCharEncoding(start, 4);
13080 if (enc != XML_CHAR_ENCODING_NONE) {
13081 xmlSwitchEncoding(ctxt, enc);
13086 * Parse a possible text declaration first
13088 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13089 xmlParseTextDecl(ctxt);
13091 * An XML-1.0 document can't reference an entity not XML-1.0
13093 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13094 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13095 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13096 "Version mismatch between document and entity\n");
13100 ctxt->instate = XML_PARSER_CONTENT;
13101 ctxt->depth = depth;
13102 if (oldctxt != NULL) {
13103 ctxt->_private = oldctxt->_private;
13104 ctxt->loadsubset = oldctxt->loadsubset;
13105 ctxt->validate = oldctxt->validate;
13106 ctxt->valid = oldctxt->valid;
13107 ctxt->replaceEntities = oldctxt->replaceEntities;
13108 if (oldctxt->validate) {
13109 ctxt->vctxt.error = oldctxt->vctxt.error;
13110 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13111 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13113 ctxt->external = oldctxt->external;
13114 if (ctxt->dict) xmlDictFree(ctxt->dict);
13115 ctxt->dict = oldctxt->dict;
13116 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13117 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13118 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13119 ctxt->dictNames = oldctxt->dictNames;
13120 ctxt->attsDefault = oldctxt->attsDefault;
13121 ctxt->attsSpecial = oldctxt->attsSpecial;
13122 ctxt->linenumbers = oldctxt->linenumbers;
13123 ctxt->record_info = oldctxt->record_info;
13124 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13125 ctxt->node_seq.length = oldctxt->node_seq.length;
13126 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13127 } else {
13129 * Doing validity checking on chunk without context
13130 * doesn't make sense
13132 ctxt->_private = NULL;
13133 ctxt->validate = 0;
13134 ctxt->external = 2;
13135 ctxt->loadsubset = 0;
13138 xmlParseContent(ctxt);
13140 if ((RAW == '<') && (NXT(1) == '/')) {
13141 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13142 } else if (RAW != 0) {
13143 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13145 if (ctxt->node != newDoc->children) {
13146 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13149 if (!ctxt->wellFormed) {
13150 if (ctxt->errNo == 0)
13151 ret = XML_ERR_INTERNAL_ERROR;
13152 else
13153 ret = (xmlParserErrors)ctxt->errNo;
13154 } else {
13155 if (list != NULL) {
13156 xmlNodePtr cur;
13159 * Return the newly created nodeset after unlinking it from
13160 * they pseudo parent.
13162 cur = newDoc->children->children;
13163 *list = cur;
13164 while (cur != NULL) {
13165 cur->parent = NULL;
13166 cur = cur->next;
13168 newDoc->children->children = NULL;
13170 ret = XML_ERR_OK;
13174 * Record in the parent context the number of entities replacement
13175 * done when parsing that reference.
13177 if (oldctxt != NULL)
13178 oldctxt->nbentities += ctxt->nbentities;
13181 * Also record the size of the entity parsed
13183 if (ctxt->input != NULL && oldctxt != NULL) {
13184 oldctxt->sizeentities += ctxt->input->consumed;
13185 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13188 * And record the last error if any
13190 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13191 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13193 if (sax != NULL)
13194 ctxt->sax = oldsax;
13195 if (oldctxt != NULL) {
13196 ctxt->dict = NULL;
13197 ctxt->attsDefault = NULL;
13198 ctxt->attsSpecial = NULL;
13199 oldctxt->validate = ctxt->validate;
13200 oldctxt->valid = ctxt->valid;
13201 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13202 oldctxt->node_seq.length = ctxt->node_seq.length;
13203 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13205 ctxt->node_seq.maximum = 0;
13206 ctxt->node_seq.length = 0;
13207 ctxt->node_seq.buffer = NULL;
13208 xmlFreeParserCtxt(ctxt);
13209 newDoc->intSubset = NULL;
13210 newDoc->extSubset = NULL;
13211 xmlFreeDoc(newDoc);
13213 return(ret);
13216 #ifdef LIBXML_SAX1_ENABLED
13218 * xmlParseExternalEntity:
13219 * @doc: the document the chunk pertains to
13220 * @sax: the SAX handler block (possibly NULL)
13221 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13222 * @depth: Used for loop detection, use 0
13223 * @URL: the URL for the entity to load
13224 * @ID: the System ID for the entity to load
13225 * @lst: the return value for the set of parsed nodes
13227 * Parse an external general entity
13228 * An external general parsed entity is well-formed if it matches the
13229 * production labeled extParsedEnt.
13231 * [78] extParsedEnt ::= TextDecl? content
13233 * Returns 0 if the entity is well formed, -1 in case of args problem and
13234 * the parser error code otherwise
13238 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13239 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13240 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13241 ID, lst));
13245 * xmlParseBalancedChunkMemory:
13246 * @doc: the document the chunk pertains to (must not be NULL)
13247 * @sax: the SAX handler block (possibly NULL)
13248 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13249 * @depth: Used for loop detection, use 0
13250 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13251 * @lst: the return value for the set of parsed nodes
13253 * Parse a well-balanced chunk of an XML document
13254 * called by the parser
13255 * The allowed sequence for the Well Balanced Chunk is the one defined by
13256 * the content production in the XML grammar:
13258 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13260 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13261 * the parser error code otherwise
13265 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13266 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13267 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13268 depth, string, lst, 0 );
13270 #endif /* LIBXML_SAX1_ENABLED */
13273 * xmlParseBalancedChunkMemoryInternal:
13274 * @oldctxt: the existing parsing context
13275 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13276 * @user_data: the user data field for the parser context
13277 * @lst: the return value for the set of parsed nodes
13280 * Parse a well-balanced chunk of an XML document
13281 * called by the parser
13282 * The allowed sequence for the Well Balanced Chunk is the one defined by
13283 * the content production in the XML grammar:
13285 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13287 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13288 * error code otherwise
13290 * In case recover is set to 1, the nodelist will not be empty even if
13291 * the parsed chunk is not well balanced.
13293 static xmlParserErrors
13294 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13295 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13296 xmlParserCtxtPtr ctxt;
13297 xmlDocPtr newDoc = NULL;
13298 xmlNodePtr newRoot;
13299 xmlSAXHandlerPtr oldsax = NULL;
13300 xmlNodePtr content = NULL;
13301 xmlNodePtr last = NULL;
13302 int size;
13303 xmlParserErrors ret = XML_ERR_OK;
13304 #ifdef SAX2
13305 int i;
13306 #endif
13308 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13309 (oldctxt->depth > 1024)) {
13310 return(XML_ERR_ENTITY_LOOP);
13314 if (lst != NULL)
13315 *lst = NULL;
13316 if (string == NULL)
13317 return(XML_ERR_INTERNAL_ERROR);
13319 size = xmlStrlen(string);
13321 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13322 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13323 if (user_data != NULL)
13324 ctxt->userData = user_data;
13325 else
13326 ctxt->userData = ctxt;
13327 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13328 ctxt->dict = oldctxt->dict;
13329 ctxt->input_id = oldctxt->input_id + 1;
13330 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13331 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13332 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13334 #ifdef SAX2
13335 /* propagate namespaces down the entity */
13336 for (i = 0;i < oldctxt->nsNr;i += 2) {
13337 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13339 #endif
13341 oldsax = ctxt->sax;
13342 ctxt->sax = oldctxt->sax;
13343 xmlDetectSAX2(ctxt);
13344 ctxt->replaceEntities = oldctxt->replaceEntities;
13345 ctxt->options = oldctxt->options;
13347 ctxt->_private = oldctxt->_private;
13348 if (oldctxt->myDoc == NULL) {
13349 newDoc = xmlNewDoc(BAD_CAST "1.0");
13350 if (newDoc == NULL) {
13351 ctxt->sax = oldsax;
13352 ctxt->dict = NULL;
13353 xmlFreeParserCtxt(ctxt);
13354 return(XML_ERR_INTERNAL_ERROR);
13356 newDoc->properties = XML_DOC_INTERNAL;
13357 newDoc->dict = ctxt->dict;
13358 xmlDictReference(newDoc->dict);
13359 ctxt->myDoc = newDoc;
13360 } else {
13361 ctxt->myDoc = oldctxt->myDoc;
13362 content = ctxt->myDoc->children;
13363 last = ctxt->myDoc->last;
13365 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13366 if (newRoot == NULL) {
13367 ctxt->sax = oldsax;
13368 ctxt->dict = NULL;
13369 xmlFreeParserCtxt(ctxt);
13370 if (newDoc != NULL) {
13371 xmlFreeDoc(newDoc);
13373 return(XML_ERR_INTERNAL_ERROR);
13375 ctxt->myDoc->children = NULL;
13376 ctxt->myDoc->last = NULL;
13377 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13378 nodePush(ctxt, ctxt->myDoc->children);
13379 ctxt->instate = XML_PARSER_CONTENT;
13380 ctxt->depth = oldctxt->depth + 1;
13382 ctxt->validate = 0;
13383 ctxt->loadsubset = oldctxt->loadsubset;
13384 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13386 * ID/IDREF registration will be done in xmlValidateElement below
13388 ctxt->loadsubset |= XML_SKIP_IDS;
13390 ctxt->dictNames = oldctxt->dictNames;
13391 ctxt->attsDefault = oldctxt->attsDefault;
13392 ctxt->attsSpecial = oldctxt->attsSpecial;
13394 xmlParseContent(ctxt);
13395 if ((RAW == '<') && (NXT(1) == '/')) {
13396 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13397 } else if (RAW != 0) {
13398 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13400 if (ctxt->node != ctxt->myDoc->children) {
13401 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13404 if (!ctxt->wellFormed) {
13405 if (ctxt->errNo == 0)
13406 ret = XML_ERR_INTERNAL_ERROR;
13407 else
13408 ret = (xmlParserErrors)ctxt->errNo;
13409 } else {
13410 ret = XML_ERR_OK;
13413 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13414 xmlNodePtr cur;
13417 * Return the newly created nodeset after unlinking it from
13418 * they pseudo parent.
13420 cur = ctxt->myDoc->children->children;
13421 *lst = cur;
13422 while (cur != NULL) {
13423 #ifdef LIBXML_VALID_ENABLED
13424 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13425 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13426 (cur->type == XML_ELEMENT_NODE)) {
13427 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13428 oldctxt->myDoc, cur);
13430 #endif /* LIBXML_VALID_ENABLED */
13431 cur->parent = NULL;
13432 cur = cur->next;
13434 ctxt->myDoc->children->children = NULL;
13436 if (ctxt->myDoc != NULL) {
13437 xmlFreeNode(ctxt->myDoc->children);
13438 ctxt->myDoc->children = content;
13439 ctxt->myDoc->last = last;
13443 * Record in the parent context the number of entities replacement
13444 * done when parsing that reference.
13446 if (oldctxt != NULL)
13447 oldctxt->nbentities += ctxt->nbentities;
13450 * Also record the last error if any
13452 if (ctxt->lastError.code != XML_ERR_OK)
13453 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13455 ctxt->sax = oldsax;
13456 ctxt->dict = NULL;
13457 ctxt->attsDefault = NULL;
13458 ctxt->attsSpecial = NULL;
13459 xmlFreeParserCtxt(ctxt);
13460 if (newDoc != NULL) {
13461 xmlFreeDoc(newDoc);
13464 return(ret);
13468 * xmlParseInNodeContext:
13469 * @node: the context node
13470 * @data: the input string
13471 * @datalen: the input string length in bytes
13472 * @options: a combination of xmlParserOption
13473 * @lst: the return value for the set of parsed nodes
13475 * Parse a well-balanced chunk of an XML document
13476 * within the context (DTD, namespaces, etc ...) of the given node.
13478 * The allowed sequence for the data is a Well Balanced Chunk defined by
13479 * the content production in the XML grammar:
13481 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13483 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13484 * error code otherwise
13486 xmlParserErrors
13487 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13488 int options, xmlNodePtr *lst) {
13489 #ifdef SAX2
13490 xmlParserCtxtPtr ctxt;
13491 xmlDocPtr doc = NULL;
13492 xmlNodePtr fake, cur;
13493 int nsnr = 0;
13495 xmlParserErrors ret = XML_ERR_OK;
13498 * check all input parameters, grab the document
13500 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13501 return(XML_ERR_INTERNAL_ERROR);
13502 switch (node->type) {
13503 case XML_ELEMENT_NODE:
13504 case XML_ATTRIBUTE_NODE:
13505 case XML_TEXT_NODE:
13506 case XML_CDATA_SECTION_NODE:
13507 case XML_ENTITY_REF_NODE:
13508 case XML_PI_NODE:
13509 case XML_COMMENT_NODE:
13510 case XML_DOCUMENT_NODE:
13511 case XML_HTML_DOCUMENT_NODE:
13512 break;
13513 default:
13514 return(XML_ERR_INTERNAL_ERROR);
13517 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13518 (node->type != XML_DOCUMENT_NODE) &&
13519 (node->type != XML_HTML_DOCUMENT_NODE))
13520 node = node->parent;
13521 if (node == NULL)
13522 return(XML_ERR_INTERNAL_ERROR);
13523 if (node->type == XML_ELEMENT_NODE)
13524 doc = node->doc;
13525 else
13526 doc = (xmlDocPtr) node;
13527 if (doc == NULL)
13528 return(XML_ERR_INTERNAL_ERROR);
13531 * allocate a context and set-up everything not related to the
13532 * node position in the tree
13534 if (doc->type == XML_DOCUMENT_NODE)
13535 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13536 #ifdef LIBXML_HTML_ENABLED
13537 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13538 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13540 * When parsing in context, it makes no sense to add implied
13541 * elements like html/body/etc...
13543 options |= HTML_PARSE_NOIMPLIED;
13545 #endif
13546 else
13547 return(XML_ERR_INTERNAL_ERROR);
13549 if (ctxt == NULL)
13550 return(XML_ERR_NO_MEMORY);
13553 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13554 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13555 * we must wait until the last moment to free the original one.
13557 if (doc->dict != NULL) {
13558 if (ctxt->dict != NULL)
13559 xmlDictFree(ctxt->dict);
13560 ctxt->dict = doc->dict;
13561 } else
13562 options |= XML_PARSE_NODICT;
13564 if (doc->encoding != NULL) {
13565 xmlCharEncodingHandlerPtr hdlr;
13567 if (ctxt->encoding != NULL)
13568 xmlFree((xmlChar *) ctxt->encoding);
13569 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13571 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13572 if (hdlr != NULL) {
13573 xmlSwitchToEncoding(ctxt, hdlr);
13574 } else {
13575 return(XML_ERR_UNSUPPORTED_ENCODING);
13579 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13580 xmlDetectSAX2(ctxt);
13581 ctxt->myDoc = doc;
13582 /* parsing in context, i.e. as within existing content */
13583 ctxt->input_id = 2;
13584 ctxt->instate = XML_PARSER_CONTENT;
13586 fake = xmlNewDocComment(node->doc, NULL);
13587 if (fake == NULL) {
13588 xmlFreeParserCtxt(ctxt);
13589 return(XML_ERR_NO_MEMORY);
13591 xmlAddChild(node, fake);
13593 if (node->type == XML_ELEMENT_NODE) {
13594 nodePush(ctxt, node);
13596 * initialize the SAX2 namespaces stack
13598 cur = node;
13599 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13600 xmlNsPtr ns = cur->nsDef;
13601 const xmlChar *iprefix, *ihref;
13603 while (ns != NULL) {
13604 if (ctxt->dict) {
13605 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13606 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13607 } else {
13608 iprefix = ns->prefix;
13609 ihref = ns->href;
13612 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13613 nsPush(ctxt, iprefix, ihref);
13614 nsnr++;
13616 ns = ns->next;
13618 cur = cur->parent;
13622 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13624 * ID/IDREF registration will be done in xmlValidateElement below
13626 ctxt->loadsubset |= XML_SKIP_IDS;
13629 #ifdef LIBXML_HTML_ENABLED
13630 if (doc->type == XML_HTML_DOCUMENT_NODE)
13631 __htmlParseContent(ctxt);
13632 else
13633 #endif
13634 xmlParseContent(ctxt);
13636 nsPop(ctxt, nsnr);
13637 if ((RAW == '<') && (NXT(1) == '/')) {
13638 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13639 } else if (RAW != 0) {
13640 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13642 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13643 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13644 ctxt->wellFormed = 0;
13647 if (!ctxt->wellFormed) {
13648 if (ctxt->errNo == 0)
13649 ret = XML_ERR_INTERNAL_ERROR;
13650 else
13651 ret = (xmlParserErrors)ctxt->errNo;
13652 } else {
13653 ret = XML_ERR_OK;
13657 * Return the newly created nodeset after unlinking it from
13658 * the pseudo sibling.
13661 cur = fake->next;
13662 fake->next = NULL;
13663 node->last = fake;
13665 if (cur != NULL) {
13666 cur->prev = NULL;
13669 *lst = cur;
13671 while (cur != NULL) {
13672 cur->parent = NULL;
13673 cur = cur->next;
13676 xmlUnlinkNode(fake);
13677 xmlFreeNode(fake);
13680 if (ret != XML_ERR_OK) {
13681 xmlFreeNodeList(*lst);
13682 *lst = NULL;
13685 if (doc->dict != NULL)
13686 ctxt->dict = NULL;
13687 xmlFreeParserCtxt(ctxt);
13689 return(ret);
13690 #else /* !SAX2 */
13691 return(XML_ERR_INTERNAL_ERROR);
13692 #endif
13695 #ifdef LIBXML_SAX1_ENABLED
13697 * xmlParseBalancedChunkMemoryRecover:
13698 * @doc: the document the chunk pertains to (must not be NULL)
13699 * @sax: the SAX handler block (possibly NULL)
13700 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13701 * @depth: Used for loop detection, use 0
13702 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13703 * @lst: the return value for the set of parsed nodes
13704 * @recover: return nodes even if the data is broken (use 0)
13707 * Parse a well-balanced chunk of an XML document
13708 * called by the parser
13709 * The allowed sequence for the Well Balanced Chunk is the one defined by
13710 * the content production in the XML grammar:
13712 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13714 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13715 * the parser error code otherwise
13717 * In case recover is set to 1, the nodelist will not be empty even if
13718 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13719 * some extent.
13722 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13723 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13724 int recover) {
13725 xmlParserCtxtPtr ctxt;
13726 xmlDocPtr newDoc;
13727 xmlSAXHandlerPtr oldsax = NULL;
13728 xmlNodePtr content, newRoot;
13729 int size;
13730 int ret = 0;
13732 if (depth > 40) {
13733 return(XML_ERR_ENTITY_LOOP);
13737 if (lst != NULL)
13738 *lst = NULL;
13739 if (string == NULL)
13740 return(-1);
13742 size = xmlStrlen(string);
13744 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13745 if (ctxt == NULL) return(-1);
13746 ctxt->userData = ctxt;
13747 if (sax != NULL) {
13748 oldsax = ctxt->sax;
13749 ctxt->sax = sax;
13750 if (user_data != NULL)
13751 ctxt->userData = user_data;
13753 newDoc = xmlNewDoc(BAD_CAST "1.0");
13754 if (newDoc == NULL) {
13755 xmlFreeParserCtxt(ctxt);
13756 return(-1);
13758 newDoc->properties = XML_DOC_INTERNAL;
13759 if ((doc != NULL) && (doc->dict != NULL)) {
13760 xmlDictFree(ctxt->dict);
13761 ctxt->dict = doc->dict;
13762 xmlDictReference(ctxt->dict);
13763 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13764 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13765 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13766 ctxt->dictNames = 1;
13767 } else {
13768 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13770 /* doc == NULL is only supported for historic reasons */
13771 if (doc != NULL) {
13772 newDoc->intSubset = doc->intSubset;
13773 newDoc->extSubset = doc->extSubset;
13775 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13776 if (newRoot == NULL) {
13777 if (sax != NULL)
13778 ctxt->sax = oldsax;
13779 xmlFreeParserCtxt(ctxt);
13780 newDoc->intSubset = NULL;
13781 newDoc->extSubset = NULL;
13782 xmlFreeDoc(newDoc);
13783 return(-1);
13785 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13786 nodePush(ctxt, newRoot);
13787 /* doc == NULL is only supported for historic reasons */
13788 if (doc == NULL) {
13789 ctxt->myDoc = newDoc;
13790 } else {
13791 ctxt->myDoc = newDoc;
13792 newDoc->children->doc = doc;
13793 /* Ensure that doc has XML spec namespace */
13794 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13795 newDoc->oldNs = doc->oldNs;
13797 ctxt->instate = XML_PARSER_CONTENT;
13798 ctxt->input_id = 2;
13799 ctxt->depth = depth;
13802 * Doing validity checking on chunk doesn't make sense
13804 ctxt->validate = 0;
13805 ctxt->loadsubset = 0;
13806 xmlDetectSAX2(ctxt);
13808 if ( doc != NULL ){
13809 content = doc->children;
13810 doc->children = NULL;
13811 xmlParseContent(ctxt);
13812 doc->children = content;
13814 else {
13815 xmlParseContent(ctxt);
13817 if ((RAW == '<') && (NXT(1) == '/')) {
13818 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13819 } else if (RAW != 0) {
13820 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13822 if (ctxt->node != newDoc->children) {
13823 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13826 if (!ctxt->wellFormed) {
13827 if (ctxt->errNo == 0)
13828 ret = 1;
13829 else
13830 ret = ctxt->errNo;
13831 } else {
13832 ret = 0;
13835 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13836 xmlNodePtr cur;
13839 * Return the newly created nodeset after unlinking it from
13840 * they pseudo parent.
13842 cur = newDoc->children->children;
13843 *lst = cur;
13844 while (cur != NULL) {
13845 xmlSetTreeDoc(cur, doc);
13846 cur->parent = NULL;
13847 cur = cur->next;
13849 newDoc->children->children = NULL;
13852 if (sax != NULL)
13853 ctxt->sax = oldsax;
13854 xmlFreeParserCtxt(ctxt);
13855 newDoc->intSubset = NULL;
13856 newDoc->extSubset = NULL;
13857 /* This leaks the namespace list if doc == NULL */
13858 newDoc->oldNs = NULL;
13859 xmlFreeDoc(newDoc);
13861 return(ret);
13865 * xmlSAXParseEntity:
13866 * @sax: the SAX handler block
13867 * @filename: the filename
13869 * parse an XML external entity out of context and build a tree.
13870 * It use the given SAX function block to handle the parsing callback.
13871 * If sax is NULL, fallback to the default DOM tree building routines.
13873 * [78] extParsedEnt ::= TextDecl? content
13875 * This correspond to a "Well Balanced" chunk
13877 * Returns the resulting document tree
13880 xmlDocPtr
13881 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13882 xmlDocPtr ret;
13883 xmlParserCtxtPtr ctxt;
13885 ctxt = xmlCreateFileParserCtxt(filename);
13886 if (ctxt == NULL) {
13887 return(NULL);
13889 if (sax != NULL) {
13890 if (ctxt->sax != NULL)
13891 xmlFree(ctxt->sax);
13892 ctxt->sax = sax;
13893 ctxt->userData = NULL;
13896 xmlParseExtParsedEnt(ctxt);
13898 if (ctxt->wellFormed)
13899 ret = ctxt->myDoc;
13900 else {
13901 ret = NULL;
13902 xmlFreeDoc(ctxt->myDoc);
13903 ctxt->myDoc = NULL;
13905 if (sax != NULL)
13906 ctxt->sax = NULL;
13907 xmlFreeParserCtxt(ctxt);
13909 return(ret);
13913 * xmlParseEntity:
13914 * @filename: the filename
13916 * parse an XML external entity out of context and build a tree.
13918 * [78] extParsedEnt ::= TextDecl? content
13920 * This correspond to a "Well Balanced" chunk
13922 * Returns the resulting document tree
13925 xmlDocPtr
13926 xmlParseEntity(const char *filename) {
13927 return(xmlSAXParseEntity(NULL, filename));
13929 #endif /* LIBXML_SAX1_ENABLED */
13932 * xmlCreateEntityParserCtxtInternal:
13933 * @URL: the entity URL
13934 * @ID: the entity PUBLIC ID
13935 * @base: a possible base for the target URI
13936 * @pctx: parser context used to set options on new context
13938 * Create a parser context for an external entity
13939 * Automatic support for ZLIB/Compress compressed document is provided
13940 * by default if found at compile-time.
13942 * Returns the new parser context or NULL
13944 static xmlParserCtxtPtr
13945 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13946 const xmlChar *base, xmlParserCtxtPtr pctx) {
13947 xmlParserCtxtPtr ctxt;
13948 xmlParserInputPtr inputStream;
13949 char *directory = NULL;
13950 xmlChar *uri;
13952 ctxt = xmlNewParserCtxt();
13953 if (ctxt == NULL) {
13954 return(NULL);
13957 if (pctx != NULL) {
13958 ctxt->options = pctx->options;
13959 ctxt->_private = pctx->_private;
13961 * this is a subparser of pctx, so the input_id should be
13962 * incremented to distinguish from main entity
13964 ctxt->input_id = pctx->input_id + 1;
13967 /* Don't read from stdin. */
13968 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13969 URL = BAD_CAST "./-";
13971 uri = xmlBuildURI(URL, base);
13973 if (uri == NULL) {
13974 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13975 if (inputStream == NULL) {
13976 xmlFreeParserCtxt(ctxt);
13977 return(NULL);
13980 inputPush(ctxt, inputStream);
13982 if ((ctxt->directory == NULL) && (directory == NULL))
13983 directory = xmlParserGetDirectory((char *)URL);
13984 if ((ctxt->directory == NULL) && (directory != NULL))
13985 ctxt->directory = directory;
13986 } else {
13987 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13988 if (inputStream == NULL) {
13989 xmlFree(uri);
13990 xmlFreeParserCtxt(ctxt);
13991 return(NULL);
13994 inputPush(ctxt, inputStream);
13996 if ((ctxt->directory == NULL) && (directory == NULL))
13997 directory = xmlParserGetDirectory((char *)uri);
13998 if ((ctxt->directory == NULL) && (directory != NULL))
13999 ctxt->directory = directory;
14000 xmlFree(uri);
14002 return(ctxt);
14006 * xmlCreateEntityParserCtxt:
14007 * @URL: the entity URL
14008 * @ID: the entity PUBLIC ID
14009 * @base: a possible base for the target URI
14011 * Create a parser context for an external entity
14012 * Automatic support for ZLIB/Compress compressed document is provided
14013 * by default if found at compile-time.
14015 * Returns the new parser context or NULL
14017 xmlParserCtxtPtr
14018 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14019 const xmlChar *base) {
14020 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14024 /************************************************************************
14026 * Front ends when parsing from a file *
14028 ************************************************************************/
14031 * xmlCreateURLParserCtxt:
14032 * @filename: the filename or URL
14033 * @options: a combination of xmlParserOption
14035 * Create a parser context for a file or URL content.
14036 * Automatic support for ZLIB/Compress compressed document is provided
14037 * by default if found at compile-time and for file accesses
14039 * Returns the new parser context or NULL
14041 xmlParserCtxtPtr
14042 xmlCreateURLParserCtxt(const char *filename, int options)
14044 xmlParserCtxtPtr ctxt;
14045 xmlParserInputPtr inputStream;
14046 char *directory = NULL;
14048 ctxt = xmlNewParserCtxt();
14049 if (ctxt == NULL) {
14050 xmlErrMemory(NULL, "cannot allocate parser context");
14051 return(NULL);
14054 if (options)
14055 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14056 ctxt->linenumbers = 1;
14058 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14059 if (inputStream == NULL) {
14060 xmlFreeParserCtxt(ctxt);
14061 return(NULL);
14064 inputPush(ctxt, inputStream);
14065 if ((ctxt->directory == NULL) && (directory == NULL))
14066 directory = xmlParserGetDirectory(filename);
14067 if ((ctxt->directory == NULL) && (directory != NULL))
14068 ctxt->directory = directory;
14070 return(ctxt);
14074 * xmlCreateFileParserCtxt:
14075 * @filename: the filename
14077 * Create a parser context for a file content.
14078 * Automatic support for ZLIB/Compress compressed document is provided
14079 * by default if found at compile-time.
14081 * Returns the new parser context or NULL
14083 xmlParserCtxtPtr
14084 xmlCreateFileParserCtxt(const char *filename)
14086 return(xmlCreateURLParserCtxt(filename, 0));
14089 #ifdef LIBXML_SAX1_ENABLED
14091 * xmlSAXParseFileWithData:
14092 * @sax: the SAX handler block
14093 * @filename: the filename
14094 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14095 * documents
14096 * @data: the userdata
14098 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14099 * compressed document is provided by default if found at compile-time.
14100 * It use the given SAX function block to handle the parsing callback.
14101 * If sax is NULL, fallback to the default DOM tree building routines.
14103 * User data (void *) is stored within the parser context in the
14104 * context's _private member, so it is available nearly everywhere in libxml
14106 * Returns the resulting document tree
14109 xmlDocPtr
14110 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14111 int recovery, void *data) {
14112 xmlDocPtr ret;
14113 xmlParserCtxtPtr ctxt;
14115 xmlInitParser();
14117 ctxt = xmlCreateFileParserCtxt(filename);
14118 if (ctxt == NULL) {
14119 return(NULL);
14121 if (sax != NULL) {
14122 if (ctxt->sax != NULL)
14123 xmlFree(ctxt->sax);
14124 ctxt->sax = sax;
14126 xmlDetectSAX2(ctxt);
14127 if (data!=NULL) {
14128 ctxt->_private = data;
14131 if (ctxt->directory == NULL)
14132 ctxt->directory = xmlParserGetDirectory(filename);
14134 ctxt->recovery = recovery;
14136 xmlParseDocument(ctxt);
14138 if ((ctxt->wellFormed) || recovery) {
14139 ret = ctxt->myDoc;
14140 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14141 if (ctxt->input->buf->compressed > 0)
14142 ret->compression = 9;
14143 else
14144 ret->compression = ctxt->input->buf->compressed;
14147 else {
14148 ret = NULL;
14149 xmlFreeDoc(ctxt->myDoc);
14150 ctxt->myDoc = NULL;
14152 if (sax != NULL)
14153 ctxt->sax = NULL;
14154 xmlFreeParserCtxt(ctxt);
14156 return(ret);
14160 * xmlSAXParseFile:
14161 * @sax: the SAX handler block
14162 * @filename: the filename
14163 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14164 * documents
14166 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14167 * compressed document is provided by default if found at compile-time.
14168 * It use the given SAX function block to handle the parsing callback.
14169 * If sax is NULL, fallback to the default DOM tree building routines.
14171 * Returns the resulting document tree
14174 xmlDocPtr
14175 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14176 int recovery) {
14177 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14181 * xmlRecoverDoc:
14182 * @cur: a pointer to an array of xmlChar
14184 * parse an XML in-memory document and build a tree.
14185 * In the case the document is not Well Formed, a attempt to build a
14186 * tree is tried anyway
14188 * Returns the resulting document tree or NULL in case of failure
14191 xmlDocPtr
14192 xmlRecoverDoc(const xmlChar *cur) {
14193 return(xmlSAXParseDoc(NULL, cur, 1));
14197 * xmlParseFile:
14198 * @filename: the filename
14200 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14201 * compressed document is provided by default if found at compile-time.
14203 * Returns the resulting document tree if the file was wellformed,
14204 * NULL otherwise.
14207 xmlDocPtr
14208 xmlParseFile(const char *filename) {
14209 return(xmlSAXParseFile(NULL, filename, 0));
14213 * xmlRecoverFile:
14214 * @filename: the filename
14216 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14217 * compressed document is provided by default if found at compile-time.
14218 * In the case the document is not Well Formed, it attempts to build
14219 * a tree anyway
14221 * Returns the resulting document tree or NULL in case of failure
14224 xmlDocPtr
14225 xmlRecoverFile(const char *filename) {
14226 return(xmlSAXParseFile(NULL, filename, 1));
14231 * xmlSetupParserForBuffer:
14232 * @ctxt: an XML parser context
14233 * @buffer: a xmlChar * buffer
14234 * @filename: a file name
14236 * Setup the parser context to parse a new buffer; Clears any prior
14237 * contents from the parser context. The buffer parameter must not be
14238 * NULL, but the filename parameter can be
14240 void
14241 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14242 const char* filename)
14244 xmlParserInputPtr input;
14246 if ((ctxt == NULL) || (buffer == NULL))
14247 return;
14249 input = xmlNewInputStream(ctxt);
14250 if (input == NULL) {
14251 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14252 xmlClearParserCtxt(ctxt);
14253 return;
14256 xmlClearParserCtxt(ctxt);
14257 if (filename != NULL)
14258 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14259 input->base = buffer;
14260 input->cur = buffer;
14261 input->end = &buffer[xmlStrlen(buffer)];
14262 inputPush(ctxt, input);
14266 * xmlSAXUserParseFile:
14267 * @sax: a SAX handler
14268 * @user_data: The user data returned on SAX callbacks
14269 * @filename: a file name
14271 * parse an XML file and call the given SAX handler routines.
14272 * Automatic support for ZLIB/Compress compressed document is provided
14274 * Returns 0 in case of success or a error number otherwise
14277 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14278 const char *filename) {
14279 int ret = 0;
14280 xmlParserCtxtPtr ctxt;
14282 ctxt = xmlCreateFileParserCtxt(filename);
14283 if (ctxt == NULL) return -1;
14284 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14285 xmlFree(ctxt->sax);
14286 ctxt->sax = sax;
14287 xmlDetectSAX2(ctxt);
14289 if (user_data != NULL)
14290 ctxt->userData = user_data;
14292 xmlParseDocument(ctxt);
14294 if (ctxt->wellFormed)
14295 ret = 0;
14296 else {
14297 if (ctxt->errNo != 0)
14298 ret = ctxt->errNo;
14299 else
14300 ret = -1;
14302 if (sax != NULL)
14303 ctxt->sax = NULL;
14304 if (ctxt->myDoc != NULL) {
14305 xmlFreeDoc(ctxt->myDoc);
14306 ctxt->myDoc = NULL;
14308 xmlFreeParserCtxt(ctxt);
14310 return ret;
14312 #endif /* LIBXML_SAX1_ENABLED */
14314 /************************************************************************
14316 * Front ends when parsing from memory *
14318 ************************************************************************/
14321 * xmlCreateMemoryParserCtxt:
14322 * @buffer: a pointer to a char array
14323 * @size: the size of the array
14325 * Create a parser context for an XML in-memory document.
14327 * Returns the new parser context or NULL
14329 xmlParserCtxtPtr
14330 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14331 xmlParserCtxtPtr ctxt;
14332 xmlParserInputPtr input;
14333 xmlParserInputBufferPtr buf;
14335 if (buffer == NULL)
14336 return(NULL);
14337 if (size <= 0)
14338 return(NULL);
14340 ctxt = xmlNewParserCtxt();
14341 if (ctxt == NULL)
14342 return(NULL);
14344 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14345 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14346 if (buf == NULL) {
14347 xmlFreeParserCtxt(ctxt);
14348 return(NULL);
14351 input = xmlNewInputStream(ctxt);
14352 if (input == NULL) {
14353 xmlFreeParserInputBuffer(buf);
14354 xmlFreeParserCtxt(ctxt);
14355 return(NULL);
14358 input->filename = NULL;
14359 input->buf = buf;
14360 xmlBufResetInput(input->buf->buffer, input);
14362 inputPush(ctxt, input);
14363 return(ctxt);
14366 #ifdef LIBXML_SAX1_ENABLED
14368 * xmlSAXParseMemoryWithData:
14369 * @sax: the SAX handler block
14370 * @buffer: an pointer to a char array
14371 * @size: the size of the array
14372 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14373 * documents
14374 * @data: the userdata
14376 * parse an XML in-memory block and use the given SAX function block
14377 * to handle the parsing callback. If sax is NULL, fallback to the default
14378 * DOM tree building routines.
14380 * User data (void *) is stored within the parser context in the
14381 * context's _private member, so it is available nearly everywhere in libxml
14383 * Returns the resulting document tree
14386 xmlDocPtr
14387 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14388 int size, int recovery, void *data) {
14389 xmlDocPtr ret;
14390 xmlParserCtxtPtr ctxt;
14392 xmlInitParser();
14394 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14395 if (ctxt == NULL) return(NULL);
14396 if (sax != NULL) {
14397 if (ctxt->sax != NULL)
14398 xmlFree(ctxt->sax);
14399 ctxt->sax = sax;
14401 xmlDetectSAX2(ctxt);
14402 if (data!=NULL) {
14403 ctxt->_private=data;
14406 ctxt->recovery = recovery;
14408 xmlParseDocument(ctxt);
14410 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14411 else {
14412 ret = NULL;
14413 xmlFreeDoc(ctxt->myDoc);
14414 ctxt->myDoc = NULL;
14416 if (sax != NULL)
14417 ctxt->sax = NULL;
14418 xmlFreeParserCtxt(ctxt);
14420 return(ret);
14424 * xmlSAXParseMemory:
14425 * @sax: the SAX handler block
14426 * @buffer: an pointer to a char array
14427 * @size: the size of the array
14428 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14429 * documents
14431 * parse an XML in-memory block and use the given SAX function block
14432 * to handle the parsing callback. If sax is NULL, fallback to the default
14433 * DOM tree building routines.
14435 * Returns the resulting document tree
14437 xmlDocPtr
14438 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14439 int size, int recovery) {
14440 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14444 * xmlParseMemory:
14445 * @buffer: an pointer to a char array
14446 * @size: the size of the array
14448 * parse an XML in-memory block and build a tree.
14450 * Returns the resulting document tree
14453 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14454 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14458 * xmlRecoverMemory:
14459 * @buffer: an pointer to a char array
14460 * @size: the size of the array
14462 * parse an XML in-memory block and build a tree.
14463 * In the case the document is not Well Formed, an attempt to
14464 * build a tree is tried anyway
14466 * Returns the resulting document tree or NULL in case of error
14469 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14470 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14474 * xmlSAXUserParseMemory:
14475 * @sax: a SAX handler
14476 * @user_data: The user data returned on SAX callbacks
14477 * @buffer: an in-memory XML document input
14478 * @size: the length of the XML document in bytes
14480 * A better SAX parsing routine.
14481 * parse an XML in-memory buffer and call the given SAX handler routines.
14483 * Returns 0 in case of success or a error number otherwise
14485 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14486 const char *buffer, int size) {
14487 int ret = 0;
14488 xmlParserCtxtPtr ctxt;
14490 xmlInitParser();
14492 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14493 if (ctxt == NULL) return -1;
14494 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14495 xmlFree(ctxt->sax);
14496 ctxt->sax = sax;
14497 xmlDetectSAX2(ctxt);
14499 if (user_data != NULL)
14500 ctxt->userData = user_data;
14502 xmlParseDocument(ctxt);
14504 if (ctxt->wellFormed)
14505 ret = 0;
14506 else {
14507 if (ctxt->errNo != 0)
14508 ret = ctxt->errNo;
14509 else
14510 ret = -1;
14512 if (sax != NULL)
14513 ctxt->sax = NULL;
14514 if (ctxt->myDoc != NULL) {
14515 xmlFreeDoc(ctxt->myDoc);
14516 ctxt->myDoc = NULL;
14518 xmlFreeParserCtxt(ctxt);
14520 return ret;
14522 #endif /* LIBXML_SAX1_ENABLED */
14525 * xmlCreateDocParserCtxt:
14526 * @cur: a pointer to an array of xmlChar
14528 * Creates a parser context for an XML in-memory document.
14530 * Returns the new parser context or NULL
14532 xmlParserCtxtPtr
14533 xmlCreateDocParserCtxt(const xmlChar *cur) {
14534 int len;
14536 if (cur == NULL)
14537 return(NULL);
14538 len = xmlStrlen(cur);
14539 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14542 #ifdef LIBXML_SAX1_ENABLED
14544 * xmlSAXParseDoc:
14545 * @sax: the SAX handler block
14546 * @cur: a pointer to an array of xmlChar
14547 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14548 * documents
14550 * parse an XML in-memory document and build a tree.
14551 * It use the given SAX function block to handle the parsing callback.
14552 * If sax is NULL, fallback to the default DOM tree building routines.
14554 * Returns the resulting document tree
14557 xmlDocPtr
14558 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14559 xmlDocPtr ret;
14560 xmlParserCtxtPtr ctxt;
14561 xmlSAXHandlerPtr oldsax = NULL;
14563 if (cur == NULL) return(NULL);
14566 ctxt = xmlCreateDocParserCtxt(cur);
14567 if (ctxt == NULL) return(NULL);
14568 if (sax != NULL) {
14569 oldsax = ctxt->sax;
14570 ctxt->sax = sax;
14571 ctxt->userData = NULL;
14573 xmlDetectSAX2(ctxt);
14575 xmlParseDocument(ctxt);
14576 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14577 else {
14578 ret = NULL;
14579 xmlFreeDoc(ctxt->myDoc);
14580 ctxt->myDoc = NULL;
14582 if (sax != NULL)
14583 ctxt->sax = oldsax;
14584 xmlFreeParserCtxt(ctxt);
14586 return(ret);
14590 * xmlParseDoc:
14591 * @cur: a pointer to an array of xmlChar
14593 * parse an XML in-memory document and build a tree.
14595 * Returns the resulting document tree
14598 xmlDocPtr
14599 xmlParseDoc(const xmlChar *cur) {
14600 return(xmlSAXParseDoc(NULL, cur, 0));
14602 #endif /* LIBXML_SAX1_ENABLED */
14604 #ifdef LIBXML_LEGACY_ENABLED
14605 /************************************************************************
14607 * Specific function to keep track of entities references *
14608 * and used by the XSLT debugger *
14610 ************************************************************************/
14612 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14615 * xmlAddEntityReference:
14616 * @ent : A valid entity
14617 * @firstNode : A valid first node for children of entity
14618 * @lastNode : A valid last node of children entity
14620 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14622 static void
14623 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14624 xmlNodePtr lastNode)
14626 if (xmlEntityRefFunc != NULL) {
14627 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14633 * xmlSetEntityReferenceFunc:
14634 * @func: A valid function
14636 * Set the function to call call back when a xml reference has been made
14638 void
14639 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14641 xmlEntityRefFunc = func;
14643 #endif /* LIBXML_LEGACY_ENABLED */
14645 /************************************************************************
14647 * Miscellaneous *
14649 ************************************************************************/
14651 #ifdef LIBXML_XPATH_ENABLED
14652 #include <libxml/xpath.h>
14653 #endif
14655 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14656 static int xmlParserInitialized = 0;
14659 * xmlInitParser:
14661 * Initialization function for the XML parser.
14662 * This is not reentrant. Call once before processing in case of
14663 * use in multithreaded programs.
14666 void
14667 xmlInitParser(void) {
14668 if (xmlParserInitialized != 0)
14669 return;
14671 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14672 if (xmlFree == free)
14673 atexit(xmlCleanupParser);
14674 #endif
14676 #ifdef LIBXML_THREAD_ENABLED
14677 __xmlGlobalInitMutexLock();
14678 if (xmlParserInitialized == 0) {
14679 #endif
14680 xmlInitThreads();
14681 xmlInitGlobals();
14682 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14683 (xmlGenericError == NULL))
14684 initGenericErrorDefaultFunc(NULL);
14685 xmlInitMemory();
14686 xmlInitializeDict();
14687 xmlInitCharEncodingHandlers();
14688 xmlDefaultSAXHandlerInit();
14689 xmlRegisterDefaultInputCallbacks();
14690 #ifdef LIBXML_OUTPUT_ENABLED
14691 xmlRegisterDefaultOutputCallbacks();
14692 #endif /* LIBXML_OUTPUT_ENABLED */
14693 #ifdef LIBXML_HTML_ENABLED
14694 htmlInitAutoClose();
14695 htmlDefaultSAXHandlerInit();
14696 #endif
14697 #ifdef LIBXML_XPATH_ENABLED
14698 xmlXPathInit();
14699 #endif
14700 xmlParserInitialized = 1;
14701 #ifdef LIBXML_THREAD_ENABLED
14703 __xmlGlobalInitMutexUnlock();
14704 #endif
14708 * xmlCleanupParser:
14710 * This function name is somewhat misleading. It does not clean up
14711 * parser state, it cleans up memory allocated by the library itself.
14712 * It is a cleanup function for the XML library. It tries to reclaim all
14713 * related global memory allocated for the library processing.
14714 * It doesn't deallocate any document related memory. One should
14715 * call xmlCleanupParser() only when the process has finished using
14716 * the library and all XML/HTML documents built with it.
14717 * See also xmlInitParser() which has the opposite function of preparing
14718 * the library for operations.
14720 * WARNING: if your application is multithreaded or has plugin support
14721 * calling this may crash the application if another thread or
14722 * a plugin is still using libxml2. It's sometimes very hard to
14723 * guess if libxml2 is in use in the application, some libraries
14724 * or plugins may use it without notice. In case of doubt abstain
14725 * from calling this function or do it just before calling exit()
14726 * to avoid leak reports from valgrind !
14729 void
14730 xmlCleanupParser(void) {
14731 if (!xmlParserInitialized)
14732 return;
14734 xmlCleanupCharEncodingHandlers();
14735 #ifdef LIBXML_CATALOG_ENABLED
14736 xmlCatalogCleanup();
14737 #endif
14738 xmlDictCleanup();
14739 xmlCleanupInputCallbacks();
14740 #ifdef LIBXML_OUTPUT_ENABLED
14741 xmlCleanupOutputCallbacks();
14742 #endif
14743 #ifdef LIBXML_SCHEMAS_ENABLED
14744 xmlSchemaCleanupTypes();
14745 xmlRelaxNGCleanupTypes();
14746 #endif
14747 xmlCleanupGlobals();
14748 xmlCleanupThreads(); /* must be last if called not from the main thread */
14749 xmlCleanupMemory();
14750 xmlParserInitialized = 0;
14753 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14754 !defined(_WIN32)
14755 static void
14756 ATTRIBUTE_DESTRUCTOR
14757 xmlDestructor(void) {
14759 * Calling custom deallocation functions in a destructor can cause
14760 * problems, for example with Nokogiri.
14762 if (xmlFree == free)
14763 xmlCleanupParser();
14765 #endif
14767 /************************************************************************
14769 * New set (2.6.0) of simpler and more flexible APIs *
14771 ************************************************************************/
14774 * DICT_FREE:
14775 * @str: a string
14777 * Free a string if it is not owned by the "dict" dictionary in the
14778 * current scope
14780 #define DICT_FREE(str) \
14781 if ((str) && ((!dict) || \
14782 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14783 xmlFree((char *)(str));
14786 * xmlCtxtReset:
14787 * @ctxt: an XML parser context
14789 * Reset a parser context
14791 void
14792 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14794 xmlParserInputPtr input;
14795 xmlDictPtr dict;
14797 if (ctxt == NULL)
14798 return;
14800 dict = ctxt->dict;
14802 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14803 xmlFreeInputStream(input);
14805 ctxt->inputNr = 0;
14806 ctxt->input = NULL;
14808 ctxt->spaceNr = 0;
14809 if (ctxt->spaceTab != NULL) {
14810 ctxt->spaceTab[0] = -1;
14811 ctxt->space = &ctxt->spaceTab[0];
14812 } else {
14813 ctxt->space = NULL;
14817 ctxt->nodeNr = 0;
14818 ctxt->node = NULL;
14820 ctxt->nameNr = 0;
14821 ctxt->name = NULL;
14823 ctxt->nsNr = 0;
14825 DICT_FREE(ctxt->version);
14826 ctxt->version = NULL;
14827 DICT_FREE(ctxt->encoding);
14828 ctxt->encoding = NULL;
14829 DICT_FREE(ctxt->directory);
14830 ctxt->directory = NULL;
14831 DICT_FREE(ctxt->extSubURI);
14832 ctxt->extSubURI = NULL;
14833 DICT_FREE(ctxt->extSubSystem);
14834 ctxt->extSubSystem = NULL;
14835 if (ctxt->myDoc != NULL)
14836 xmlFreeDoc(ctxt->myDoc);
14837 ctxt->myDoc = NULL;
14839 ctxt->standalone = -1;
14840 ctxt->hasExternalSubset = 0;
14841 ctxt->hasPErefs = 0;
14842 ctxt->html = 0;
14843 ctxt->external = 0;
14844 ctxt->instate = XML_PARSER_START;
14845 ctxt->token = 0;
14847 ctxt->wellFormed = 1;
14848 ctxt->nsWellFormed = 1;
14849 ctxt->disableSAX = 0;
14850 ctxt->valid = 1;
14851 #if 0
14852 ctxt->vctxt.userData = ctxt;
14853 ctxt->vctxt.error = xmlParserValidityError;
14854 ctxt->vctxt.warning = xmlParserValidityWarning;
14855 #endif
14856 ctxt->record_info = 0;
14857 ctxt->checkIndex = 0;
14858 ctxt->inSubset = 0;
14859 ctxt->errNo = XML_ERR_OK;
14860 ctxt->depth = 0;
14861 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14862 ctxt->catalogs = NULL;
14863 ctxt->nbentities = 0;
14864 ctxt->sizeentities = 0;
14865 ctxt->sizeentcopy = 0;
14866 xmlInitNodeInfoSeq(&ctxt->node_seq);
14868 if (ctxt->attsDefault != NULL) {
14869 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14870 ctxt->attsDefault = NULL;
14872 if (ctxt->attsSpecial != NULL) {
14873 xmlHashFree(ctxt->attsSpecial, NULL);
14874 ctxt->attsSpecial = NULL;
14877 #ifdef LIBXML_CATALOG_ENABLED
14878 if (ctxt->catalogs != NULL)
14879 xmlCatalogFreeLocal(ctxt->catalogs);
14880 #endif
14881 if (ctxt->lastError.code != XML_ERR_OK)
14882 xmlResetError(&ctxt->lastError);
14886 * xmlCtxtResetPush:
14887 * @ctxt: an XML parser context
14888 * @chunk: a pointer to an array of chars
14889 * @size: number of chars in the array
14890 * @filename: an optional file name or URI
14891 * @encoding: the document encoding, or NULL
14893 * Reset a push parser context
14895 * Returns 0 in case of success and 1 in case of error
14898 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14899 int size, const char *filename, const char *encoding)
14901 xmlParserInputPtr inputStream;
14902 xmlParserInputBufferPtr buf;
14903 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14905 if (ctxt == NULL)
14906 return(1);
14908 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14909 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14911 buf = xmlAllocParserInputBuffer(enc);
14912 if (buf == NULL)
14913 return(1);
14915 if (ctxt == NULL) {
14916 xmlFreeParserInputBuffer(buf);
14917 return(1);
14920 xmlCtxtReset(ctxt);
14922 if (filename == NULL) {
14923 ctxt->directory = NULL;
14924 } else {
14925 ctxt->directory = xmlParserGetDirectory(filename);
14928 inputStream = xmlNewInputStream(ctxt);
14929 if (inputStream == NULL) {
14930 xmlFreeParserInputBuffer(buf);
14931 return(1);
14934 if (filename == NULL)
14935 inputStream->filename = NULL;
14936 else
14937 inputStream->filename = (char *)
14938 xmlCanonicPath((const xmlChar *) filename);
14939 inputStream->buf = buf;
14940 xmlBufResetInput(buf->buffer, inputStream);
14942 inputPush(ctxt, inputStream);
14944 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14945 (ctxt->input->buf != NULL)) {
14946 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14947 size_t cur = ctxt->input->cur - ctxt->input->base;
14949 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14951 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14952 #ifdef DEBUG_PUSH
14953 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14954 #endif
14957 if (encoding != NULL) {
14958 xmlCharEncodingHandlerPtr hdlr;
14960 if (ctxt->encoding != NULL)
14961 xmlFree((xmlChar *) ctxt->encoding);
14962 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14964 hdlr = xmlFindCharEncodingHandler(encoding);
14965 if (hdlr != NULL) {
14966 xmlSwitchToEncoding(ctxt, hdlr);
14967 } else {
14968 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14969 "Unsupported encoding %s\n", BAD_CAST encoding);
14971 } else if (enc != XML_CHAR_ENCODING_NONE) {
14972 xmlSwitchEncoding(ctxt, enc);
14975 return(0);
14980 * xmlCtxtUseOptionsInternal:
14981 * @ctxt: an XML parser context
14982 * @options: a combination of xmlParserOption
14983 * @encoding: the user provided encoding to use
14985 * Applies the options to the parser context
14987 * Returns 0 in case of success, the set of unknown or unimplemented options
14988 * in case of error.
14990 static int
14991 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14993 if (ctxt == NULL)
14994 return(-1);
14995 if (encoding != NULL) {
14996 if (ctxt->encoding != NULL)
14997 xmlFree((xmlChar *) ctxt->encoding);
14998 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15000 if (options & XML_PARSE_RECOVER) {
15001 ctxt->recovery = 1;
15002 options -= XML_PARSE_RECOVER;
15003 ctxt->options |= XML_PARSE_RECOVER;
15004 } else
15005 ctxt->recovery = 0;
15006 if (options & XML_PARSE_DTDLOAD) {
15007 ctxt->loadsubset = XML_DETECT_IDS;
15008 options -= XML_PARSE_DTDLOAD;
15009 ctxt->options |= XML_PARSE_DTDLOAD;
15010 } else
15011 ctxt->loadsubset = 0;
15012 if (options & XML_PARSE_DTDATTR) {
15013 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15014 options -= XML_PARSE_DTDATTR;
15015 ctxt->options |= XML_PARSE_DTDATTR;
15017 if (options & XML_PARSE_NOENT) {
15018 ctxt->replaceEntities = 1;
15019 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15020 options -= XML_PARSE_NOENT;
15021 ctxt->options |= XML_PARSE_NOENT;
15022 } else
15023 ctxt->replaceEntities = 0;
15024 if (options & XML_PARSE_PEDANTIC) {
15025 ctxt->pedantic = 1;
15026 options -= XML_PARSE_PEDANTIC;
15027 ctxt->options |= XML_PARSE_PEDANTIC;
15028 } else
15029 ctxt->pedantic = 0;
15030 if (options & XML_PARSE_NOBLANKS) {
15031 ctxt->keepBlanks = 0;
15032 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15033 options -= XML_PARSE_NOBLANKS;
15034 ctxt->options |= XML_PARSE_NOBLANKS;
15035 } else
15036 ctxt->keepBlanks = 1;
15037 if (options & XML_PARSE_DTDVALID) {
15038 ctxt->validate = 1;
15039 if (options & XML_PARSE_NOWARNING)
15040 ctxt->vctxt.warning = NULL;
15041 if (options & XML_PARSE_NOERROR)
15042 ctxt->vctxt.error = NULL;
15043 options -= XML_PARSE_DTDVALID;
15044 ctxt->options |= XML_PARSE_DTDVALID;
15045 } else
15046 ctxt->validate = 0;
15047 if (options & XML_PARSE_NOWARNING) {
15048 ctxt->sax->warning = NULL;
15049 options -= XML_PARSE_NOWARNING;
15051 if (options & XML_PARSE_NOERROR) {
15052 ctxt->sax->error = NULL;
15053 ctxt->sax->fatalError = NULL;
15054 options -= XML_PARSE_NOERROR;
15056 #ifdef LIBXML_SAX1_ENABLED
15057 if (options & XML_PARSE_SAX1) {
15058 ctxt->sax->startElement = xmlSAX2StartElement;
15059 ctxt->sax->endElement = xmlSAX2EndElement;
15060 ctxt->sax->startElementNs = NULL;
15061 ctxt->sax->endElementNs = NULL;
15062 ctxt->sax->initialized = 1;
15063 options -= XML_PARSE_SAX1;
15064 ctxt->options |= XML_PARSE_SAX1;
15066 #endif /* LIBXML_SAX1_ENABLED */
15067 if (options & XML_PARSE_NODICT) {
15068 ctxt->dictNames = 0;
15069 options -= XML_PARSE_NODICT;
15070 ctxt->options |= XML_PARSE_NODICT;
15071 } else {
15072 ctxt->dictNames = 1;
15074 if (options & XML_PARSE_NOCDATA) {
15075 ctxt->sax->cdataBlock = NULL;
15076 options -= XML_PARSE_NOCDATA;
15077 ctxt->options |= XML_PARSE_NOCDATA;
15079 if (options & XML_PARSE_NSCLEAN) {
15080 ctxt->options |= XML_PARSE_NSCLEAN;
15081 options -= XML_PARSE_NSCLEAN;
15083 if (options & XML_PARSE_NONET) {
15084 ctxt->options |= XML_PARSE_NONET;
15085 options -= XML_PARSE_NONET;
15087 if (options & XML_PARSE_COMPACT) {
15088 ctxt->options |= XML_PARSE_COMPACT;
15089 options -= XML_PARSE_COMPACT;
15091 if (options & XML_PARSE_OLD10) {
15092 ctxt->options |= XML_PARSE_OLD10;
15093 options -= XML_PARSE_OLD10;
15095 if (options & XML_PARSE_NOBASEFIX) {
15096 ctxt->options |= XML_PARSE_NOBASEFIX;
15097 options -= XML_PARSE_NOBASEFIX;
15099 if (options & XML_PARSE_HUGE) {
15100 ctxt->options |= XML_PARSE_HUGE;
15101 options -= XML_PARSE_HUGE;
15102 if (ctxt->dict != NULL)
15103 xmlDictSetLimit(ctxt->dict, 0);
15105 if (options & XML_PARSE_OLDSAX) {
15106 ctxt->options |= XML_PARSE_OLDSAX;
15107 options -= XML_PARSE_OLDSAX;
15109 if (options & XML_PARSE_IGNORE_ENC) {
15110 ctxt->options |= XML_PARSE_IGNORE_ENC;
15111 options -= XML_PARSE_IGNORE_ENC;
15113 if (options & XML_PARSE_BIG_LINES) {
15114 ctxt->options |= XML_PARSE_BIG_LINES;
15115 options -= XML_PARSE_BIG_LINES;
15117 ctxt->linenumbers = 1;
15118 return (options);
15122 * xmlCtxtUseOptions:
15123 * @ctxt: an XML parser context
15124 * @options: a combination of xmlParserOption
15126 * Applies the options to the parser context
15128 * Returns 0 in case of success, the set of unknown or unimplemented options
15129 * in case of error.
15132 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15134 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15138 * xmlDoRead:
15139 * @ctxt: an XML parser context
15140 * @URL: the base URL to use for the document
15141 * @encoding: the document encoding, or NULL
15142 * @options: a combination of xmlParserOption
15143 * @reuse: keep the context for reuse
15145 * Common front-end for the xmlRead functions
15147 * Returns the resulting document tree or NULL
15149 static xmlDocPtr
15150 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15151 int options, int reuse)
15153 xmlDocPtr ret;
15155 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15156 if (encoding != NULL) {
15157 xmlCharEncodingHandlerPtr hdlr;
15159 hdlr = xmlFindCharEncodingHandler(encoding);
15160 if (hdlr != NULL)
15161 xmlSwitchToEncoding(ctxt, hdlr);
15163 if ((URL != NULL) && (ctxt->input != NULL) &&
15164 (ctxt->input->filename == NULL))
15165 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15166 xmlParseDocument(ctxt);
15167 if ((ctxt->wellFormed) || ctxt->recovery)
15168 ret = ctxt->myDoc;
15169 else {
15170 ret = NULL;
15171 if (ctxt->myDoc != NULL) {
15172 xmlFreeDoc(ctxt->myDoc);
15175 ctxt->myDoc = NULL;
15176 if (!reuse) {
15177 xmlFreeParserCtxt(ctxt);
15180 return (ret);
15184 * xmlReadDoc:
15185 * @cur: a pointer to a zero terminated string
15186 * @URL: the base URL to use for the document
15187 * @encoding: the document encoding, or NULL
15188 * @options: a combination of xmlParserOption
15190 * parse an XML in-memory document and build a tree.
15192 * Returns the resulting document tree
15194 xmlDocPtr
15195 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15197 xmlParserCtxtPtr ctxt;
15199 if (cur == NULL)
15200 return (NULL);
15201 xmlInitParser();
15203 ctxt = xmlCreateDocParserCtxt(cur);
15204 if (ctxt == NULL)
15205 return (NULL);
15206 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15210 * xmlReadFile:
15211 * @filename: a file or URL
15212 * @encoding: the document encoding, or NULL
15213 * @options: a combination of xmlParserOption
15215 * parse an XML file from the filesystem or the network.
15217 * Returns the resulting document tree
15219 xmlDocPtr
15220 xmlReadFile(const char *filename, const char *encoding, int options)
15222 xmlParserCtxtPtr ctxt;
15224 xmlInitParser();
15225 ctxt = xmlCreateURLParserCtxt(filename, options);
15226 if (ctxt == NULL)
15227 return (NULL);
15228 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15232 * xmlReadMemory:
15233 * @buffer: a pointer to a char array
15234 * @size: the size of the array
15235 * @URL: the base URL to use for the document
15236 * @encoding: the document encoding, or NULL
15237 * @options: a combination of xmlParserOption
15239 * parse an XML in-memory document and build a tree.
15241 * Returns the resulting document tree
15243 xmlDocPtr
15244 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15246 xmlParserCtxtPtr ctxt;
15248 xmlInitParser();
15249 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15250 if (ctxt == NULL)
15251 return (NULL);
15252 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15256 * xmlReadFd:
15257 * @fd: an open file descriptor
15258 * @URL: the base URL to use for the document
15259 * @encoding: the document encoding, or NULL
15260 * @options: a combination of xmlParserOption
15262 * parse an XML from a file descriptor and build a tree.
15263 * NOTE that the file descriptor will not be closed when the
15264 * reader is closed or reset.
15266 * Returns the resulting document tree
15268 xmlDocPtr
15269 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15271 xmlParserCtxtPtr ctxt;
15272 xmlParserInputBufferPtr input;
15273 xmlParserInputPtr stream;
15275 if (fd < 0)
15276 return (NULL);
15277 xmlInitParser();
15279 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15280 if (input == NULL)
15281 return (NULL);
15282 input->closecallback = NULL;
15283 ctxt = xmlNewParserCtxt();
15284 if (ctxt == NULL) {
15285 xmlFreeParserInputBuffer(input);
15286 return (NULL);
15288 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15289 if (stream == NULL) {
15290 xmlFreeParserInputBuffer(input);
15291 xmlFreeParserCtxt(ctxt);
15292 return (NULL);
15294 inputPush(ctxt, stream);
15295 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15299 * xmlReadIO:
15300 * @ioread: an I/O read function
15301 * @ioclose: an I/O close function
15302 * @ioctx: an I/O handler
15303 * @URL: the base URL to use for the document
15304 * @encoding: the document encoding, or NULL
15305 * @options: a combination of xmlParserOption
15307 * parse an XML document from I/O functions and source and build a tree.
15309 * Returns the resulting document tree
15311 xmlDocPtr
15312 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15313 void *ioctx, const char *URL, const char *encoding, int options)
15315 xmlParserCtxtPtr ctxt;
15316 xmlParserInputBufferPtr input;
15317 xmlParserInputPtr stream;
15319 if (ioread == NULL)
15320 return (NULL);
15321 xmlInitParser();
15323 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15324 XML_CHAR_ENCODING_NONE);
15325 if (input == NULL) {
15326 if (ioclose != NULL)
15327 ioclose(ioctx);
15328 return (NULL);
15330 ctxt = xmlNewParserCtxt();
15331 if (ctxt == NULL) {
15332 xmlFreeParserInputBuffer(input);
15333 return (NULL);
15335 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15336 if (stream == NULL) {
15337 xmlFreeParserInputBuffer(input);
15338 xmlFreeParserCtxt(ctxt);
15339 return (NULL);
15341 inputPush(ctxt, stream);
15342 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15346 * xmlCtxtReadDoc:
15347 * @ctxt: an XML parser context
15348 * @cur: a pointer to a zero terminated string
15349 * @URL: the base URL to use for the document
15350 * @encoding: the document encoding, or NULL
15351 * @options: a combination of xmlParserOption
15353 * parse an XML in-memory document and build a tree.
15354 * This reuses the existing @ctxt parser context
15356 * Returns the resulting document tree
15358 xmlDocPtr
15359 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15360 const char *URL, const char *encoding, int options)
15362 xmlParserInputPtr stream;
15364 if (cur == NULL)
15365 return (NULL);
15366 if (ctxt == NULL)
15367 return (NULL);
15368 xmlInitParser();
15370 xmlCtxtReset(ctxt);
15372 stream = xmlNewStringInputStream(ctxt, cur);
15373 if (stream == NULL) {
15374 return (NULL);
15376 inputPush(ctxt, stream);
15377 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15381 * xmlCtxtReadFile:
15382 * @ctxt: an XML parser context
15383 * @filename: a file or URL
15384 * @encoding: the document encoding, or NULL
15385 * @options: a combination of xmlParserOption
15387 * parse an XML file from the filesystem or the network.
15388 * This reuses the existing @ctxt parser context
15390 * Returns the resulting document tree
15392 xmlDocPtr
15393 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15394 const char *encoding, int options)
15396 xmlParserInputPtr stream;
15398 if (filename == NULL)
15399 return (NULL);
15400 if (ctxt == NULL)
15401 return (NULL);
15402 xmlInitParser();
15404 xmlCtxtReset(ctxt);
15406 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15407 if (stream == NULL) {
15408 return (NULL);
15410 inputPush(ctxt, stream);
15411 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15415 * xmlCtxtReadMemory:
15416 * @ctxt: an XML parser context
15417 * @buffer: a pointer to a char array
15418 * @size: the size of the array
15419 * @URL: the base URL to use for the document
15420 * @encoding: the document encoding, or NULL
15421 * @options: a combination of xmlParserOption
15423 * parse an XML in-memory document and build a tree.
15424 * This reuses the existing @ctxt parser context
15426 * Returns the resulting document tree
15428 xmlDocPtr
15429 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15430 const char *URL, const char *encoding, int options)
15432 xmlParserInputBufferPtr input;
15433 xmlParserInputPtr stream;
15435 if (ctxt == NULL)
15436 return (NULL);
15437 if (buffer == NULL)
15438 return (NULL);
15439 xmlInitParser();
15441 xmlCtxtReset(ctxt);
15443 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15444 if (input == NULL) {
15445 return(NULL);
15448 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15449 if (stream == NULL) {
15450 xmlFreeParserInputBuffer(input);
15451 return(NULL);
15454 inputPush(ctxt, stream);
15455 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15459 * xmlCtxtReadFd:
15460 * @ctxt: an XML parser context
15461 * @fd: an open file descriptor
15462 * @URL: the base URL to use for the document
15463 * @encoding: the document encoding, or NULL
15464 * @options: a combination of xmlParserOption
15466 * parse an XML from a file descriptor and build a tree.
15467 * This reuses the existing @ctxt parser context
15468 * NOTE that the file descriptor will not be closed when the
15469 * reader is closed or reset.
15471 * Returns the resulting document tree
15473 xmlDocPtr
15474 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15475 const char *URL, const char *encoding, int options)
15477 xmlParserInputBufferPtr input;
15478 xmlParserInputPtr stream;
15480 if (fd < 0)
15481 return (NULL);
15482 if (ctxt == NULL)
15483 return (NULL);
15484 xmlInitParser();
15486 xmlCtxtReset(ctxt);
15489 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15490 if (input == NULL)
15491 return (NULL);
15492 input->closecallback = NULL;
15493 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15494 if (stream == NULL) {
15495 xmlFreeParserInputBuffer(input);
15496 return (NULL);
15498 inputPush(ctxt, stream);
15499 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15503 * xmlCtxtReadIO:
15504 * @ctxt: an XML parser context
15505 * @ioread: an I/O read function
15506 * @ioclose: an I/O close function
15507 * @ioctx: an I/O handler
15508 * @URL: the base URL to use for the document
15509 * @encoding: the document encoding, or NULL
15510 * @options: a combination of xmlParserOption
15512 * parse an XML document from I/O functions and source and build a tree.
15513 * This reuses the existing @ctxt parser context
15515 * Returns the resulting document tree
15517 xmlDocPtr
15518 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15519 xmlInputCloseCallback ioclose, void *ioctx,
15520 const char *URL,
15521 const char *encoding, int options)
15523 xmlParserInputBufferPtr input;
15524 xmlParserInputPtr stream;
15526 if (ioread == NULL)
15527 return (NULL);
15528 if (ctxt == NULL)
15529 return (NULL);
15530 xmlInitParser();
15532 xmlCtxtReset(ctxt);
15534 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15535 XML_CHAR_ENCODING_NONE);
15536 if (input == NULL) {
15537 if (ioclose != NULL)
15538 ioclose(ioctx);
15539 return (NULL);
15541 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15542 if (stream == NULL) {
15543 xmlFreeParserInputBuffer(input);
15544 return (NULL);
15546 inputPush(ctxt, stream);
15547 return (xmlDoRead(ctxt, URL, encoding, options, 1));