mscms: Fix double free on error path in EnumColorProfilesA (scan-build).
[wine.git] / libs / xml2 / parser.c
blobd3f30b2a767aa2a11bb8d41116a03017fea72bd5
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
28 * See Copyright for the status of this software.
30 * daniel@veillard.com
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
38 #define IN_LIBXML
39 #include "libxml.h"
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/HTMLparser.h>
61 #include <libxml/valid.h>
62 #include <libxml/entities.h>
63 #include <libxml/xmlerror.h>
64 #include <libxml/encoding.h>
65 #include <libxml/xmlIO.h>
66 #include <libxml/uri.h>
67 #ifdef LIBXML_CATALOG_ENABLED
68 #include <libxml/catalog.h>
69 #endif
70 #ifdef LIBXML_SCHEMAS_ENABLED
71 #include <libxml/xmlschemastypes.h>
72 #include <libxml/relaxng.h>
73 #endif
74 #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75 #include <libxml/xpath.h>
76 #endif
78 #include "private/buf.h"
79 #include "private/dict.h"
80 #include "private/enc.h"
81 #include "private/entities.h"
82 #include "private/error.h"
83 #include "private/globals.h"
84 #include "private/html.h"
85 #include "private/io.h"
86 #include "private/memory.h"
87 #include "private/parser.h"
88 #include "private/threads.h"
89 #include "private/xpath.h"
91 struct _xmlStartTag {
92 const xmlChar *prefix;
93 const xmlChar *URI;
94 int line;
95 int nsNr;
98 static xmlParserCtxtPtr
99 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100 const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101 xmlParserCtxtPtr pctx);
103 static int
104 xmlParseElementStart(xmlParserCtxtPtr ctxt);
106 static void
107 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
109 /************************************************************************
111 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
113 ************************************************************************/
115 #define XML_PARSER_BIG_ENTITY 1000
116 #define XML_PARSER_LOT_ENTITY 5000
119 * Constants for protection against abusive entity expansion
120 * ("billion laughs").
124 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125 * of serialized output after entity expansion.
127 #define XML_PARSER_NON_LINEAR 5
130 * A certain amount is always allowed.
132 #define XML_PARSER_ALLOWED_EXPANSION 1000000
135 * Fixed cost for each entity reference. This crudely models processing time
136 * as well to protect, for example, against exponential expansion of empty
137 * or very short entities.
139 #define XML_ENT_FIXED_COST 20
142 * xmlParserMaxDepth:
144 * arbitrary depth limit for the XML documents that we allow to
145 * process. This is not a limitation of the parser but a safety
146 * boundary feature. It can be disabled with the XML_PARSE_HUGE
147 * parser option.
149 unsigned int xmlParserMaxDepth = 256;
153 #define SAX2 1
154 #define XML_PARSER_BIG_BUFFER_SIZE 300
155 #define XML_PARSER_BUFFER_SIZE 100
156 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
159 * XML_PARSER_CHUNK_SIZE
161 * When calling GROW that's the minimal amount of data
162 * the parser expected to have received. It is not a hard
163 * limit but an optimization when reading strings like Names
164 * It is not strictly needed as long as inputs available characters
165 * are followed by 0, which should be provided by the I/O level
167 #define XML_PARSER_CHUNK_SIZE 100
170 * List of XML prefixed PI allowed by W3C specs
173 static const char* const xmlW3CPIs[] = {
174 "xml-stylesheet",
175 "xml-model",
176 NULL
180 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
182 const xmlChar **str);
184 static xmlParserErrors
185 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
186 xmlSAXHandlerPtr sax,
187 void *user_data, int depth, const xmlChar *URL,
188 const xmlChar *ID, xmlNodePtr *list);
190 static int
191 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
192 const char *encoding);
193 #ifdef LIBXML_LEGACY_ENABLED
194 static void
195 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
196 xmlNodePtr lastNode);
197 #endif /* LIBXML_LEGACY_ENABLED */
199 static xmlParserErrors
200 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
201 const xmlChar *string, void *user_data, xmlNodePtr *lst);
203 static int
204 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
206 /************************************************************************
208 * Some factorized error routines *
210 ************************************************************************/
213 * xmlErrAttributeDup:
214 * @ctxt: an XML parser context
215 * @prefix: the attribute prefix
216 * @localname: the attribute localname
218 * Handle a redefinition of attribute error
220 static void
221 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
222 const xmlChar * localname)
224 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
225 (ctxt->instate == XML_PARSER_EOF))
226 return;
227 if (ctxt != NULL)
228 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
230 if (prefix == NULL)
231 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
232 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
233 (const char *) localname, NULL, NULL, 0, 0,
234 "Attribute %s redefined\n", localname);
235 else
236 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
237 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
238 (const char *) prefix, (const char *) localname,
239 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
240 localname);
241 if (ctxt != NULL) {
242 ctxt->wellFormed = 0;
243 if (ctxt->recovery == 0)
244 ctxt->disableSAX = 1;
249 * xmlFatalErr:
250 * @ctxt: an XML parser context
251 * @error: the error number
252 * @extra: extra information string
254 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
256 static void
257 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
259 const char *errmsg;
261 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
262 (ctxt->instate == XML_PARSER_EOF))
263 return;
264 switch (error) {
265 case XML_ERR_INVALID_HEX_CHARREF:
266 errmsg = "CharRef: invalid hexadecimal value";
267 break;
268 case XML_ERR_INVALID_DEC_CHARREF:
269 errmsg = "CharRef: invalid decimal value";
270 break;
271 case XML_ERR_INVALID_CHARREF:
272 errmsg = "CharRef: invalid value";
273 break;
274 case XML_ERR_INTERNAL_ERROR:
275 errmsg = "internal error";
276 break;
277 case XML_ERR_PEREF_AT_EOF:
278 errmsg = "PEReference at end of document";
279 break;
280 case XML_ERR_PEREF_IN_PROLOG:
281 errmsg = "PEReference in prolog";
282 break;
283 case XML_ERR_PEREF_IN_EPILOG:
284 errmsg = "PEReference in epilog";
285 break;
286 case XML_ERR_PEREF_NO_NAME:
287 errmsg = "PEReference: no name";
288 break;
289 case XML_ERR_PEREF_SEMICOL_MISSING:
290 errmsg = "PEReference: expecting ';'";
291 break;
292 case XML_ERR_ENTITY_LOOP:
293 errmsg = "Detected an entity reference loop";
294 break;
295 case XML_ERR_ENTITY_NOT_STARTED:
296 errmsg = "EntityValue: \" or ' expected";
297 break;
298 case XML_ERR_ENTITY_PE_INTERNAL:
299 errmsg = "PEReferences forbidden in internal subset";
300 break;
301 case XML_ERR_ENTITY_NOT_FINISHED:
302 errmsg = "EntityValue: \" or ' expected";
303 break;
304 case XML_ERR_ATTRIBUTE_NOT_STARTED:
305 errmsg = "AttValue: \" or ' expected";
306 break;
307 case XML_ERR_LT_IN_ATTRIBUTE:
308 errmsg = "Unescaped '<' not allowed in attributes values";
309 break;
310 case XML_ERR_LITERAL_NOT_STARTED:
311 errmsg = "SystemLiteral \" or ' expected";
312 break;
313 case XML_ERR_LITERAL_NOT_FINISHED:
314 errmsg = "Unfinished System or Public ID \" or ' expected";
315 break;
316 case XML_ERR_MISPLACED_CDATA_END:
317 errmsg = "Sequence ']]>' not allowed in content";
318 break;
319 case XML_ERR_URI_REQUIRED:
320 errmsg = "SYSTEM or PUBLIC, the URI is missing";
321 break;
322 case XML_ERR_PUBID_REQUIRED:
323 errmsg = "PUBLIC, the Public Identifier is missing";
324 break;
325 case XML_ERR_HYPHEN_IN_COMMENT:
326 errmsg = "Comment must not contain '--' (double-hyphen)";
327 break;
328 case XML_ERR_PI_NOT_STARTED:
329 errmsg = "xmlParsePI : no target name";
330 break;
331 case XML_ERR_RESERVED_XML_NAME:
332 errmsg = "Invalid PI name";
333 break;
334 case XML_ERR_NOTATION_NOT_STARTED:
335 errmsg = "NOTATION: Name expected here";
336 break;
337 case XML_ERR_NOTATION_NOT_FINISHED:
338 errmsg = "'>' required to close NOTATION declaration";
339 break;
340 case XML_ERR_VALUE_REQUIRED:
341 errmsg = "Entity value required";
342 break;
343 case XML_ERR_URI_FRAGMENT:
344 errmsg = "Fragment not allowed";
345 break;
346 case XML_ERR_ATTLIST_NOT_STARTED:
347 errmsg = "'(' required to start ATTLIST enumeration";
348 break;
349 case XML_ERR_NMTOKEN_REQUIRED:
350 errmsg = "NmToken expected in ATTLIST enumeration";
351 break;
352 case XML_ERR_ATTLIST_NOT_FINISHED:
353 errmsg = "')' required to finish ATTLIST enumeration";
354 break;
355 case XML_ERR_MIXED_NOT_STARTED:
356 errmsg = "MixedContentDecl : '|' or ')*' expected";
357 break;
358 case XML_ERR_PCDATA_REQUIRED:
359 errmsg = "MixedContentDecl : '#PCDATA' expected";
360 break;
361 case XML_ERR_ELEMCONTENT_NOT_STARTED:
362 errmsg = "ContentDecl : Name or '(' expected";
363 break;
364 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
365 errmsg = "ContentDecl : ',' '|' or ')' expected";
366 break;
367 case XML_ERR_PEREF_IN_INT_SUBSET:
368 errmsg =
369 "PEReference: forbidden within markup decl in internal subset";
370 break;
371 case XML_ERR_GT_REQUIRED:
372 errmsg = "expected '>'";
373 break;
374 case XML_ERR_CONDSEC_INVALID:
375 errmsg = "XML conditional section '[' expected";
376 break;
377 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
378 errmsg = "Content error in the external subset";
379 break;
380 case XML_ERR_CONDSEC_INVALID_KEYWORD:
381 errmsg =
382 "conditional section INCLUDE or IGNORE keyword expected";
383 break;
384 case XML_ERR_CONDSEC_NOT_FINISHED:
385 errmsg = "XML conditional section not closed";
386 break;
387 case XML_ERR_XMLDECL_NOT_STARTED:
388 errmsg = "Text declaration '<?xml' required";
389 break;
390 case XML_ERR_XMLDECL_NOT_FINISHED:
391 errmsg = "parsing XML declaration: '?>' expected";
392 break;
393 case XML_ERR_EXT_ENTITY_STANDALONE:
394 errmsg = "external parsed entities cannot be standalone";
395 break;
396 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
397 errmsg = "EntityRef: expecting ';'";
398 break;
399 case XML_ERR_DOCTYPE_NOT_FINISHED:
400 errmsg = "DOCTYPE improperly terminated";
401 break;
402 case XML_ERR_LTSLASH_REQUIRED:
403 errmsg = "EndTag: '</' not found";
404 break;
405 case XML_ERR_EQUAL_REQUIRED:
406 errmsg = "expected '='";
407 break;
408 case XML_ERR_STRING_NOT_CLOSED:
409 errmsg = "String not closed expecting \" or '";
410 break;
411 case XML_ERR_STRING_NOT_STARTED:
412 errmsg = "String not started expecting ' or \"";
413 break;
414 case XML_ERR_ENCODING_NAME:
415 errmsg = "Invalid XML encoding name";
416 break;
417 case XML_ERR_STANDALONE_VALUE:
418 errmsg = "standalone accepts only 'yes' or 'no'";
419 break;
420 case XML_ERR_DOCUMENT_EMPTY:
421 errmsg = "Document is empty";
422 break;
423 case XML_ERR_DOCUMENT_END:
424 errmsg = "Extra content at the end of the document";
425 break;
426 case XML_ERR_NOT_WELL_BALANCED:
427 errmsg = "chunk is not well balanced";
428 break;
429 case XML_ERR_EXTRA_CONTENT:
430 errmsg = "extra content at the end of well balanced chunk";
431 break;
432 case XML_ERR_VERSION_MISSING:
433 errmsg = "Malformed declaration expecting version";
434 break;
435 case XML_ERR_NAME_TOO_LONG:
436 errmsg = "Name too long";
437 break;
438 #if 0
439 case:
440 errmsg = "";
441 break;
442 #endif
443 default:
444 errmsg = "Unregistered error message";
446 if (ctxt != NULL)
447 ctxt->errNo = error;
448 if (info == NULL) {
449 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
450 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
451 errmsg);
452 } else {
453 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
454 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
455 errmsg, info);
457 if (ctxt != NULL) {
458 ctxt->wellFormed = 0;
459 if (ctxt->recovery == 0)
460 ctxt->disableSAX = 1;
465 * xmlFatalErrMsg:
466 * @ctxt: an XML parser context
467 * @error: the error number
468 * @msg: the error message
470 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
472 static void LIBXML_ATTR_FORMAT(3,0)
473 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
474 const char *msg)
476 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
477 (ctxt->instate == XML_PARSER_EOF))
478 return;
479 if (ctxt != NULL)
480 ctxt->errNo = error;
481 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
482 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
483 if (ctxt != NULL) {
484 ctxt->wellFormed = 0;
485 if (ctxt->recovery == 0)
486 ctxt->disableSAX = 1;
491 * xmlWarningMsg:
492 * @ctxt: an XML parser context
493 * @error: the error number
494 * @msg: the error message
495 * @str1: extra data
496 * @str2: extra data
498 * Handle a warning.
500 static void LIBXML_ATTR_FORMAT(3,0)
501 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
502 const char *msg, const xmlChar *str1, const xmlChar *str2)
504 xmlStructuredErrorFunc schannel = NULL;
506 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
507 (ctxt->instate == XML_PARSER_EOF))
508 return;
509 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
510 (ctxt->sax->initialized == XML_SAX2_MAGIC))
511 schannel = ctxt->sax->serror;
512 if (ctxt != NULL) {
513 __xmlRaiseError(schannel,
514 (ctxt->sax) ? ctxt->sax->warning : NULL,
515 ctxt->userData,
516 ctxt, NULL, XML_FROM_PARSER, error,
517 XML_ERR_WARNING, NULL, 0,
518 (const char *) str1, (const char *) str2, NULL, 0, 0,
519 msg, (const char *) str1, (const char *) str2);
520 } else {
521 __xmlRaiseError(schannel, NULL, NULL,
522 ctxt, NULL, XML_FROM_PARSER, error,
523 XML_ERR_WARNING, NULL, 0,
524 (const char *) str1, (const char *) str2, NULL, 0, 0,
525 msg, (const char *) str1, (const char *) str2);
530 * xmlValidityError:
531 * @ctxt: an XML parser context
532 * @error: the error number
533 * @msg: the error message
534 * @str1: extra data
536 * Handle a validity error.
538 static void LIBXML_ATTR_FORMAT(3,0)
539 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
540 const char *msg, const xmlChar *str1, const xmlChar *str2)
542 xmlStructuredErrorFunc schannel = NULL;
544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545 (ctxt->instate == XML_PARSER_EOF))
546 return;
547 if (ctxt != NULL) {
548 ctxt->errNo = error;
549 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
550 schannel = ctxt->sax->serror;
552 if (ctxt != NULL) {
553 __xmlRaiseError(schannel,
554 ctxt->vctxt.error, ctxt->vctxt.userData,
555 ctxt, NULL, XML_FROM_DTD, error,
556 XML_ERR_ERROR, NULL, 0, (const char *) str1,
557 (const char *) str2, NULL, 0, 0,
558 msg, (const char *) str1, (const char *) str2);
559 ctxt->valid = 0;
560 } else {
561 __xmlRaiseError(schannel, NULL, NULL,
562 ctxt, NULL, XML_FROM_DTD, error,
563 XML_ERR_ERROR, NULL, 0, (const char *) str1,
564 (const char *) str2, NULL, 0, 0,
565 msg, (const char *) str1, (const char *) str2);
570 * xmlFatalErrMsgInt:
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
578 static void LIBXML_ATTR_FORMAT(3,0)
579 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
580 const char *msg, int val)
582 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583 (ctxt->instate == XML_PARSER_EOF))
584 return;
585 if (ctxt != NULL)
586 ctxt->errNo = error;
587 __xmlRaiseError(NULL, NULL, NULL,
588 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
590 if (ctxt != NULL) {
591 ctxt->wellFormed = 0;
592 if (ctxt->recovery == 0)
593 ctxt->disableSAX = 1;
598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
608 static void LIBXML_ATTR_FORMAT(3,0)
609 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610 const char *msg, const xmlChar *str1, int val,
611 const xmlChar *str2)
613 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614 (ctxt->instate == XML_PARSER_EOF))
615 return;
616 if (ctxt != NULL)
617 ctxt->errNo = error;
618 __xmlRaiseError(NULL, NULL, NULL,
619 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620 NULL, 0, (const char *) str1, (const char *) str2,
621 NULL, val, 0, msg, str1, val, str2);
622 if (ctxt != NULL) {
623 ctxt->wellFormed = 0;
624 if (ctxt->recovery == 0)
625 ctxt->disableSAX = 1;
630 * xmlFatalErrMsgStr:
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
638 static void LIBXML_ATTR_FORMAT(3,0)
639 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
640 const char *msg, const xmlChar * val)
642 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643 (ctxt->instate == XML_PARSER_EOF))
644 return;
645 if (ctxt != NULL)
646 ctxt->errNo = error;
647 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
648 XML_FROM_PARSER, error, XML_ERR_FATAL,
649 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650 val);
651 if (ctxt != NULL) {
652 ctxt->wellFormed = 0;
653 if (ctxt->recovery == 0)
654 ctxt->disableSAX = 1;
659 * xmlErrMsgStr:
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
665 * Handle a non fatal parser error
667 static void LIBXML_ATTR_FORMAT(3,0)
668 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669 const char *msg, const xmlChar * val)
671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
674 if (ctxt != NULL)
675 ctxt->errNo = error;
676 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
677 XML_FROM_PARSER, error, XML_ERR_ERROR,
678 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679 val);
683 * xmlNsErr:
684 * @ctxt: an XML parser context
685 * @error: the error number
686 * @msg: the message
687 * @info1: extra information string
688 * @info2: extra information string
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
692 static void LIBXML_ATTR_FORMAT(3,0)
693 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg,
695 const xmlChar * info1, const xmlChar * info2,
696 const xmlChar * info3)
698 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699 (ctxt->instate == XML_PARSER_EOF))
700 return;
701 if (ctxt != NULL)
702 ctxt->errNo = error;
703 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
704 XML_ERR_ERROR, NULL, 0, (const char *) info1,
705 (const char *) info2, (const char *) info3, 0, 0, msg,
706 info1, info2, info3);
707 if (ctxt != NULL)
708 ctxt->nsWellFormed = 0;
712 * xmlNsWarn
713 * @ctxt: an XML parser context
714 * @error: the error number
715 * @msg: the message
716 * @info1: extra information string
717 * @info2: extra information string
719 * Handle a namespace warning error
721 static void LIBXML_ATTR_FORMAT(3,0)
722 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723 const char *msg,
724 const xmlChar * info1, const xmlChar * info2,
725 const xmlChar * info3)
727 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728 (ctxt->instate == XML_PARSER_EOF))
729 return;
730 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731 XML_ERR_WARNING, NULL, 0, (const char *) info1,
732 (const char *) info2, (const char *) info3, 0, 0, msg,
733 info1, info2, info3);
736 static void
737 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
738 if (val > ULONG_MAX - *dst)
739 *dst = ULONG_MAX;
740 else
741 *dst += val;
744 static void
745 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
746 if (val > ULONG_MAX - *dst)
747 *dst = ULONG_MAX;
748 else
749 *dst += val;
753 * xmlParserEntityCheck:
754 * @ctxt: parser context
755 * @extra: sum of unexpanded entity sizes
757 * Check for non-linear entity expansion behaviour.
759 * In some cases like xmlStringDecodeEntities, this function is called
760 * for each, possibly nested entity and its unexpanded content length.
762 * In other cases like xmlParseReference, it's only called for each
763 * top-level entity with its unexpanded content length plus the sum of
764 * the unexpanded content lengths (plus fixed cost) of all nested
765 * entities.
767 * Summing the unexpanded lengths also adds the length of the reference.
768 * This is by design. Taking the length of the entity name into account
769 * discourages attacks that try to waste CPU time with abusively long
770 * entity names. See test/recurse/lol6.xml for example. Each call also
771 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
772 * short entities.
774 * Returns 1 on error, 0 on success.
776 static int
777 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
779 unsigned long consumed;
780 xmlParserInputPtr input = ctxt->input;
781 xmlEntityPtr entity = input->entity;
784 * Compute total consumed bytes so far, including input streams of
785 * external entities.
787 consumed = input->parentConsumed;
788 if ((entity == NULL) ||
789 ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
790 ((entity->flags & XML_ENT_PARSED) == 0))) {
791 xmlSaturatedAdd(&consumed, input->consumed);
792 xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
794 xmlSaturatedAdd(&consumed, ctxt->sizeentities);
797 * Add extra cost and some fixed cost.
799 xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
800 xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
803 * It's important to always use saturation arithmetic when tracking
804 * entity sizes to make the size checks reliable. If "sizeentcopy"
805 * overflows, we have to abort.
807 if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
808 ((ctxt->sizeentcopy >= ULONG_MAX) ||
809 (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
810 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
811 "Maximum entity amplification factor exceeded");
812 xmlHaltParser(ctxt);
813 return(1);
816 return(0);
819 /************************************************************************
821 * Library wide options *
823 ************************************************************************/
826 * xmlHasFeature:
827 * @feature: the feature to be examined
829 * Examines if the library has been compiled with a given feature.
831 * Returns a non-zero value if the feature exist, otherwise zero.
832 * Returns zero (0) if the feature does not exist or an unknown
833 * unknown feature is requested, non-zero otherwise.
836 xmlHasFeature(xmlFeature feature)
838 switch (feature) {
839 case XML_WITH_THREAD:
840 #ifdef LIBXML_THREAD_ENABLED
841 return(1);
842 #else
843 return(0);
844 #endif
845 case XML_WITH_TREE:
846 #ifdef LIBXML_TREE_ENABLED
847 return(1);
848 #else
849 return(0);
850 #endif
851 case XML_WITH_OUTPUT:
852 #ifdef LIBXML_OUTPUT_ENABLED
853 return(1);
854 #else
855 return(0);
856 #endif
857 case XML_WITH_PUSH:
858 #ifdef LIBXML_PUSH_ENABLED
859 return(1);
860 #else
861 return(0);
862 #endif
863 case XML_WITH_READER:
864 #ifdef LIBXML_READER_ENABLED
865 return(1);
866 #else
867 return(0);
868 #endif
869 case XML_WITH_PATTERN:
870 #ifdef LIBXML_PATTERN_ENABLED
871 return(1);
872 #else
873 return(0);
874 #endif
875 case XML_WITH_WRITER:
876 #ifdef LIBXML_WRITER_ENABLED
877 return(1);
878 #else
879 return(0);
880 #endif
881 case XML_WITH_SAX1:
882 #ifdef LIBXML_SAX1_ENABLED
883 return(1);
884 #else
885 return(0);
886 #endif
887 case XML_WITH_FTP:
888 #ifdef LIBXML_FTP_ENABLED
889 return(1);
890 #else
891 return(0);
892 #endif
893 case XML_WITH_HTTP:
894 #ifdef LIBXML_HTTP_ENABLED
895 return(1);
896 #else
897 return(0);
898 #endif
899 case XML_WITH_VALID:
900 #ifdef LIBXML_VALID_ENABLED
901 return(1);
902 #else
903 return(0);
904 #endif
905 case XML_WITH_HTML:
906 #ifdef LIBXML_HTML_ENABLED
907 return(1);
908 #else
909 return(0);
910 #endif
911 case XML_WITH_LEGACY:
912 #ifdef LIBXML_LEGACY_ENABLED
913 return(1);
914 #else
915 return(0);
916 #endif
917 case XML_WITH_C14N:
918 #ifdef LIBXML_C14N_ENABLED
919 return(1);
920 #else
921 return(0);
922 #endif
923 case XML_WITH_CATALOG:
924 #ifdef LIBXML_CATALOG_ENABLED
925 return(1);
926 #else
927 return(0);
928 #endif
929 case XML_WITH_XPATH:
930 #ifdef LIBXML_XPATH_ENABLED
931 return(1);
932 #else
933 return(0);
934 #endif
935 case XML_WITH_XPTR:
936 #ifdef LIBXML_XPTR_ENABLED
937 return(1);
938 #else
939 return(0);
940 #endif
941 case XML_WITH_XINCLUDE:
942 #ifdef LIBXML_XINCLUDE_ENABLED
943 return(1);
944 #else
945 return(0);
946 #endif
947 case XML_WITH_ICONV:
948 #ifdef LIBXML_ICONV_ENABLED
949 return(1);
950 #else
951 return(0);
952 #endif
953 case XML_WITH_ISO8859X:
954 #ifdef LIBXML_ISO8859X_ENABLED
955 return(1);
956 #else
957 return(0);
958 #endif
959 case XML_WITH_UNICODE:
960 #ifdef LIBXML_UNICODE_ENABLED
961 return(1);
962 #else
963 return(0);
964 #endif
965 case XML_WITH_REGEXP:
966 #ifdef LIBXML_REGEXP_ENABLED
967 return(1);
968 #else
969 return(0);
970 #endif
971 case XML_WITH_AUTOMATA:
972 #ifdef LIBXML_AUTOMATA_ENABLED
973 return(1);
974 #else
975 return(0);
976 #endif
977 case XML_WITH_EXPR:
978 #ifdef LIBXML_EXPR_ENABLED
979 return(1);
980 #else
981 return(0);
982 #endif
983 case XML_WITH_SCHEMAS:
984 #ifdef LIBXML_SCHEMAS_ENABLED
985 return(1);
986 #else
987 return(0);
988 #endif
989 case XML_WITH_SCHEMATRON:
990 #ifdef LIBXML_SCHEMATRON_ENABLED
991 return(1);
992 #else
993 return(0);
994 #endif
995 case XML_WITH_MODULES:
996 #ifdef LIBXML_MODULES_ENABLED
997 return(1);
998 #else
999 return(0);
1000 #endif
1001 case XML_WITH_DEBUG:
1002 #ifdef LIBXML_DEBUG_ENABLED
1003 return(1);
1004 #else
1005 return(0);
1006 #endif
1007 case XML_WITH_DEBUG_MEM:
1008 #ifdef DEBUG_MEMORY_LOCATION
1009 return(1);
1010 #else
1011 return(0);
1012 #endif
1013 case XML_WITH_DEBUG_RUN:
1014 return(0);
1015 case XML_WITH_ZLIB:
1016 #ifdef LIBXML_ZLIB_ENABLED
1017 return(1);
1018 #else
1019 return(0);
1020 #endif
1021 case XML_WITH_LZMA:
1022 #ifdef LIBXML_LZMA_ENABLED
1023 return(1);
1024 #else
1025 return(0);
1026 #endif
1027 case XML_WITH_ICU:
1028 #ifdef LIBXML_ICU_ENABLED
1029 return(1);
1030 #else
1031 return(0);
1032 #endif
1033 default:
1034 break;
1036 return(0);
1039 /************************************************************************
1041 * SAX2 defaulted attributes handling *
1043 ************************************************************************/
1046 * xmlDetectSAX2:
1047 * @ctxt: an XML parser context
1049 * Do the SAX2 detection and specific initialization
1051 static void
1052 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1053 xmlSAXHandlerPtr sax;
1055 /* Avoid unused variable warning if features are disabled. */
1056 (void) sax;
1058 if (ctxt == NULL) return;
1059 sax = ctxt->sax;
1060 #ifdef LIBXML_SAX1_ENABLED
1061 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1062 ((sax->startElementNs != NULL) ||
1063 (sax->endElementNs != NULL) ||
1064 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1065 ctxt->sax2 = 1;
1066 #else
1067 ctxt->sax2 = 1;
1068 #endif /* LIBXML_SAX1_ENABLED */
1070 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1071 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1072 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1073 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1074 (ctxt->str_xml_ns == NULL)) {
1075 xmlErrMemory(ctxt, NULL);
1079 typedef struct _xmlDefAttrs xmlDefAttrs;
1080 typedef xmlDefAttrs *xmlDefAttrsPtr;
1081 struct _xmlDefAttrs {
1082 int nbAttrs; /* number of defaulted attributes on that element */
1083 int maxAttrs; /* the size of the array */
1084 #if __STDC_VERSION__ >= 199901L
1085 /* Using a C99 flexible array member avoids UBSan errors. */
1086 const xmlChar *values[]; /* array of localname/prefix/values/external */
1087 #else
1088 const xmlChar *values[5];
1089 #endif
1093 * xmlAttrNormalizeSpace:
1094 * @src: the source string
1095 * @dst: the target string
1097 * Normalize the space in non CDATA attribute values:
1098 * If the attribute type is not CDATA, then the XML processor MUST further
1099 * process the normalized attribute value by discarding any leading and
1100 * trailing space (#x20) characters, and by replacing sequences of space
1101 * (#x20) characters by a single space (#x20) character.
1102 * Note that the size of dst need to be at least src, and if one doesn't need
1103 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1104 * passing src as dst is just fine.
1106 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1107 * is needed.
1109 static xmlChar *
1110 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1112 if ((src == NULL) || (dst == NULL))
1113 return(NULL);
1115 while (*src == 0x20) src++;
1116 while (*src != 0) {
1117 if (*src == 0x20) {
1118 while (*src == 0x20) src++;
1119 if (*src != 0)
1120 *dst++ = 0x20;
1121 } else {
1122 *dst++ = *src++;
1125 *dst = 0;
1126 if (dst == src)
1127 return(NULL);
1128 return(dst);
1132 * xmlAttrNormalizeSpace2:
1133 * @src: the source string
1135 * Normalize the space in non CDATA attribute values, a slightly more complex
1136 * front end to avoid allocation problems when running on attribute values
1137 * coming from the input.
1139 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1140 * is needed.
1142 static const xmlChar *
1143 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1145 int i;
1146 int remove_head = 0;
1147 int need_realloc = 0;
1148 const xmlChar *cur;
1150 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1151 return(NULL);
1152 i = *len;
1153 if (i <= 0)
1154 return(NULL);
1156 cur = src;
1157 while (*cur == 0x20) {
1158 cur++;
1159 remove_head++;
1161 while (*cur != 0) {
1162 if (*cur == 0x20) {
1163 cur++;
1164 if ((*cur == 0x20) || (*cur == 0)) {
1165 need_realloc = 1;
1166 break;
1168 } else
1169 cur++;
1171 if (need_realloc) {
1172 xmlChar *ret;
1174 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1175 if (ret == NULL) {
1176 xmlErrMemory(ctxt, NULL);
1177 return(NULL);
1179 xmlAttrNormalizeSpace(ret, ret);
1180 *len = strlen((const char *)ret);
1181 return(ret);
1182 } else if (remove_head) {
1183 *len -= remove_head;
1184 memmove(src, src + remove_head, 1 + *len);
1185 return(src);
1187 return(NULL);
1191 * xmlAddDefAttrs:
1192 * @ctxt: an XML parser context
1193 * @fullname: the element fullname
1194 * @fullattr: the attribute fullname
1195 * @value: the attribute value
1197 * Add a defaulted attribute for an element
1199 static void
1200 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1201 const xmlChar *fullname,
1202 const xmlChar *fullattr,
1203 const xmlChar *value) {
1204 xmlDefAttrsPtr defaults;
1205 int len;
1206 const xmlChar *name;
1207 const xmlChar *prefix;
1210 * Allows to detect attribute redefinitions
1212 if (ctxt->attsSpecial != NULL) {
1213 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1214 return;
1217 if (ctxt->attsDefault == NULL) {
1218 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1219 if (ctxt->attsDefault == NULL)
1220 goto mem_error;
1224 * split the element name into prefix:localname , the string found
1225 * are within the DTD and then not associated to namespace names.
1227 name = xmlSplitQName3(fullname, &len);
1228 if (name == NULL) {
1229 name = xmlDictLookup(ctxt->dict, fullname, -1);
1230 prefix = NULL;
1231 } else {
1232 name = xmlDictLookup(ctxt->dict, name, -1);
1233 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1237 * make sure there is some storage
1239 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1240 if (defaults == NULL) {
1241 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1242 (4 * 5) * sizeof(const xmlChar *));
1243 if (defaults == NULL)
1244 goto mem_error;
1245 defaults->nbAttrs = 0;
1246 defaults->maxAttrs = 4;
1247 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1248 defaults, NULL) < 0) {
1249 xmlFree(defaults);
1250 goto mem_error;
1252 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1253 xmlDefAttrsPtr temp;
1255 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1256 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1257 if (temp == NULL)
1258 goto mem_error;
1259 defaults = temp;
1260 defaults->maxAttrs *= 2;
1261 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1262 defaults, NULL) < 0) {
1263 xmlFree(defaults);
1264 goto mem_error;
1269 * Split the element name into prefix:localname , the string found
1270 * are within the DTD and hen not associated to namespace names.
1272 name = xmlSplitQName3(fullattr, &len);
1273 if (name == NULL) {
1274 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1275 prefix = NULL;
1276 } else {
1277 name = xmlDictLookup(ctxt->dict, name, -1);
1278 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1281 defaults->values[5 * defaults->nbAttrs] = name;
1282 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1283 /* intern the string and precompute the end */
1284 len = xmlStrlen(value);
1285 value = xmlDictLookup(ctxt->dict, value, len);
1286 if (value == NULL)
1287 goto mem_error;
1288 defaults->values[5 * defaults->nbAttrs + 2] = value;
1289 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1290 if (ctxt->external)
1291 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1292 else
1293 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1294 defaults->nbAttrs++;
1296 return;
1298 mem_error:
1299 xmlErrMemory(ctxt, NULL);
1300 return;
1304 * xmlAddSpecialAttr:
1305 * @ctxt: an XML parser context
1306 * @fullname: the element fullname
1307 * @fullattr: the attribute fullname
1308 * @type: the attribute type
1310 * Register this attribute type
1312 static void
1313 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1314 const xmlChar *fullname,
1315 const xmlChar *fullattr,
1316 int type)
1318 if (ctxt->attsSpecial == NULL) {
1319 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1320 if (ctxt->attsSpecial == NULL)
1321 goto mem_error;
1324 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1325 return;
1327 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1328 (void *) (ptrdiff_t) type);
1329 return;
1331 mem_error:
1332 xmlErrMemory(ctxt, NULL);
1333 return;
1337 * xmlCleanSpecialAttrCallback:
1339 * Removes CDATA attributes from the special attribute table
1341 static void
1342 xmlCleanSpecialAttrCallback(void *payload, void *data,
1343 const xmlChar *fullname, const xmlChar *fullattr,
1344 const xmlChar *unused ATTRIBUTE_UNUSED) {
1345 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1347 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1348 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353 * xmlCleanSpecialAttr:
1354 * @ctxt: an XML parser context
1356 * Trim the list of attributes defined to remove all those of type
1357 * CDATA as they are not special. This call should be done when finishing
1358 * to parse the DTD and before starting to parse the document root.
1360 static void
1361 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1363 if (ctxt->attsSpecial == NULL)
1364 return;
1366 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1368 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1369 xmlHashFree(ctxt->attsSpecial, NULL);
1370 ctxt->attsSpecial = NULL;
1372 return;
1376 * xmlCheckLanguageID:
1377 * @lang: pointer to the string value
1379 * DEPRECATED: Internal function, do not use.
1381 * Checks that the value conforms to the LanguageID production:
1383 * NOTE: this is somewhat deprecated, those productions were removed from
1384 * the XML Second edition.
1386 * [33] LanguageID ::= Langcode ('-' Subcode)*
1387 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1388 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1389 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1390 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1391 * [38] Subcode ::= ([a-z] | [A-Z])+
1393 * The current REC reference the successors of RFC 1766, currently 5646
1395 * http://www.rfc-editor.org/rfc/rfc5646.txt
1396 * langtag = language
1397 * ["-" script]
1398 * ["-" region]
1399 * *("-" variant)
1400 * *("-" extension)
1401 * ["-" privateuse]
1402 * language = 2*3ALPHA ; shortest ISO 639 code
1403 * ["-" extlang] ; sometimes followed by
1404 * ; extended language subtags
1405 * / 4ALPHA ; or reserved for future use
1406 * / 5*8ALPHA ; or registered language subtag
1408 * extlang = 3ALPHA ; selected ISO 639 codes
1409 * *2("-" 3ALPHA) ; permanently reserved
1411 * script = 4ALPHA ; ISO 15924 code
1413 * region = 2ALPHA ; ISO 3166-1 code
1414 * / 3DIGIT ; UN M.49 code
1416 * variant = 5*8alphanum ; registered variants
1417 * / (DIGIT 3alphanum)
1419 * extension = singleton 1*("-" (2*8alphanum))
1421 * ; Single alphanumerics
1422 * ; "x" reserved for private use
1423 * singleton = DIGIT ; 0 - 9
1424 * / %x41-57 ; A - W
1425 * / %x59-5A ; Y - Z
1426 * / %x61-77 ; a - w
1427 * / %x79-7A ; y - z
1429 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1430 * The parser below doesn't try to cope with extension or privateuse
1431 * that could be added but that's not interoperable anyway
1433 * Returns 1 if correct 0 otherwise
1436 xmlCheckLanguageID(const xmlChar * lang)
1438 const xmlChar *cur = lang, *nxt;
1440 if (cur == NULL)
1441 return (0);
1442 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1443 ((cur[0] == 'I') && (cur[1] == '-')) ||
1444 ((cur[0] == 'x') && (cur[1] == '-')) ||
1445 ((cur[0] == 'X') && (cur[1] == '-'))) {
1447 * Still allow IANA code and user code which were coming
1448 * from the previous version of the XML-1.0 specification
1449 * it's deprecated but we should not fail
1451 cur += 2;
1452 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1453 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1454 cur++;
1455 return(cur[0] == 0);
1457 nxt = cur;
1458 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1459 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1460 nxt++;
1461 if (nxt - cur >= 4) {
1463 * Reserved
1465 if ((nxt - cur > 8) || (nxt[0] != 0))
1466 return(0);
1467 return(1);
1469 if (nxt - cur < 2)
1470 return(0);
1471 /* we got an ISO 639 code */
1472 if (nxt[0] == 0)
1473 return(1);
1474 if (nxt[0] != '-')
1475 return(0);
1477 nxt++;
1478 cur = nxt;
1479 /* now we can have extlang or script or region or variant */
1480 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1481 goto region_m49;
1483 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1484 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1485 nxt++;
1486 if (nxt - cur == 4)
1487 goto script;
1488 if (nxt - cur == 2)
1489 goto region;
1490 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1491 goto variant;
1492 if (nxt - cur != 3)
1493 return(0);
1494 /* we parsed an extlang */
1495 if (nxt[0] == 0)
1496 return(1);
1497 if (nxt[0] != '-')
1498 return(0);
1500 nxt++;
1501 cur = nxt;
1502 /* now we can have script or region or variant */
1503 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1504 goto region_m49;
1506 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1507 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1508 nxt++;
1509 if (nxt - cur == 2)
1510 goto region;
1511 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1512 goto variant;
1513 if (nxt - cur != 4)
1514 return(0);
1515 /* we parsed a script */
1516 script:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1522 nxt++;
1523 cur = nxt;
1524 /* now we can have region or variant */
1525 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526 goto region_m49;
1528 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530 nxt++;
1532 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1533 goto variant;
1534 if (nxt - cur != 2)
1535 return(0);
1536 /* we parsed a region */
1537 region:
1538 if (nxt[0] == 0)
1539 return(1);
1540 if (nxt[0] != '-')
1541 return(0);
1543 nxt++;
1544 cur = nxt;
1545 /* now we can just have a variant */
1546 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1547 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1548 nxt++;
1550 if ((nxt - cur < 5) || (nxt - cur > 8))
1551 return(0);
1553 /* we parsed a variant */
1554 variant:
1555 if (nxt[0] == 0)
1556 return(1);
1557 if (nxt[0] != '-')
1558 return(0);
1559 /* extensions and private use subtags not checked */
1560 return (1);
1562 region_m49:
1563 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1564 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1565 nxt += 3;
1566 goto region;
1568 return(0);
1571 /************************************************************************
1573 * Parser stacks related functions and macros *
1575 ************************************************************************/
1577 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1578 const xmlChar ** str);
1580 #ifdef SAX2
1582 * nsPush:
1583 * @ctxt: an XML parser context
1584 * @prefix: the namespace prefix or NULL
1585 * @URL: the namespace name
1587 * Pushes a new parser namespace on top of the ns stack
1589 * Returns -1 in case of error, -2 if the namespace should be discarded
1590 * and the index in the stack otherwise.
1592 static int
1593 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1595 if (ctxt->options & XML_PARSE_NSCLEAN) {
1596 int i;
1597 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1598 if (ctxt->nsTab[i] == prefix) {
1599 /* in scope */
1600 if (ctxt->nsTab[i + 1] == URL)
1601 return(-2);
1602 /* out of scope keep it */
1603 break;
1607 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1608 ctxt->nsMax = 10;
1609 ctxt->nsNr = 0;
1610 ctxt->nsTab = (const xmlChar **)
1611 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1612 if (ctxt->nsTab == NULL) {
1613 xmlErrMemory(ctxt, NULL);
1614 ctxt->nsMax = 0;
1615 return (-1);
1617 } else if (ctxt->nsNr >= ctxt->nsMax) {
1618 const xmlChar ** tmp;
1619 ctxt->nsMax *= 2;
1620 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1621 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1622 if (tmp == NULL) {
1623 xmlErrMemory(ctxt, NULL);
1624 ctxt->nsMax /= 2;
1625 return (-1);
1627 ctxt->nsTab = tmp;
1629 ctxt->nsTab[ctxt->nsNr++] = prefix;
1630 ctxt->nsTab[ctxt->nsNr++] = URL;
1631 return (ctxt->nsNr);
1634 * nsPop:
1635 * @ctxt: an XML parser context
1636 * @nr: the number to pop
1638 * Pops the top @nr parser prefix/namespace from the ns stack
1640 * Returns the number of namespaces removed
1642 static int
1643 nsPop(xmlParserCtxtPtr ctxt, int nr)
1645 int i;
1647 if (ctxt->nsTab == NULL) return(0);
1648 if (ctxt->nsNr < nr) {
1649 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1650 nr = ctxt->nsNr;
1652 if (ctxt->nsNr <= 0)
1653 return (0);
1655 for (i = 0;i < nr;i++) {
1656 ctxt->nsNr--;
1657 ctxt->nsTab[ctxt->nsNr] = NULL;
1659 return(nr);
1661 #endif
1663 static int
1664 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1665 const xmlChar **atts;
1666 int *attallocs;
1667 int maxatts;
1669 if (nr + 5 > ctxt->maxatts) {
1670 maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1671 atts = (const xmlChar **) xmlMalloc(
1672 maxatts * sizeof(const xmlChar *));
1673 if (atts == NULL) goto mem_error;
1674 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1675 (maxatts / 5) * sizeof(int));
1676 if (attallocs == NULL) {
1677 xmlFree(atts);
1678 goto mem_error;
1680 if (ctxt->maxatts > 0)
1681 memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1682 xmlFree(ctxt->atts);
1683 ctxt->atts = atts;
1684 ctxt->attallocs = attallocs;
1685 ctxt->maxatts = maxatts;
1687 return(ctxt->maxatts);
1688 mem_error:
1689 xmlErrMemory(ctxt, NULL);
1690 return(-1);
1694 * inputPush:
1695 * @ctxt: an XML parser context
1696 * @value: the parser input
1698 * Pushes a new parser input on top of the input stack
1700 * Returns -1 in case of error, the index in the stack otherwise
1703 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1705 if ((ctxt == NULL) || (value == NULL))
1706 return(-1);
1707 if (ctxt->inputNr >= ctxt->inputMax) {
1708 size_t newSize = ctxt->inputMax * 2;
1709 xmlParserInputPtr *tmp;
1711 tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712 newSize * sizeof(*tmp));
1713 if (tmp == NULL) {
1714 xmlErrMemory(ctxt, NULL);
1715 return (-1);
1717 ctxt->inputTab = tmp;
1718 ctxt->inputMax = newSize;
1720 ctxt->inputTab[ctxt->inputNr] = value;
1721 ctxt->input = value;
1722 return (ctxt->inputNr++);
1725 * inputPop:
1726 * @ctxt: an XML parser context
1728 * Pops the top parser input from the input stack
1730 * Returns the input just removed
1732 xmlParserInputPtr
1733 inputPop(xmlParserCtxtPtr ctxt)
1735 xmlParserInputPtr ret;
1737 if (ctxt == NULL)
1738 return(NULL);
1739 if (ctxt->inputNr <= 0)
1740 return (NULL);
1741 ctxt->inputNr--;
1742 if (ctxt->inputNr > 0)
1743 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1744 else
1745 ctxt->input = NULL;
1746 ret = ctxt->inputTab[ctxt->inputNr];
1747 ctxt->inputTab[ctxt->inputNr] = NULL;
1748 return (ret);
1751 * nodePush:
1752 * @ctxt: an XML parser context
1753 * @value: the element node
1755 * DEPRECATED: Internal function, do not use.
1757 * Pushes a new element node on top of the node stack
1759 * Returns -1 in case of error, the index in the stack otherwise
1762 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1764 if (ctxt == NULL) return(0);
1765 if (ctxt->nodeNr >= ctxt->nodeMax) {
1766 xmlNodePtr *tmp;
1768 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769 ctxt->nodeMax * 2 *
1770 sizeof(ctxt->nodeTab[0]));
1771 if (tmp == NULL) {
1772 xmlErrMemory(ctxt, NULL);
1773 return (-1);
1775 ctxt->nodeTab = tmp;
1776 ctxt->nodeMax *= 2;
1778 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1780 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1782 xmlParserMaxDepth);
1783 xmlHaltParser(ctxt);
1784 return(-1);
1786 ctxt->nodeTab[ctxt->nodeNr] = value;
1787 ctxt->node = value;
1788 return (ctxt->nodeNr++);
1792 * nodePop:
1793 * @ctxt: an XML parser context
1795 * DEPRECATED: Internal function, do not use.
1797 * Pops the top element node from the node stack
1799 * Returns the node just removed
1801 xmlNodePtr
1802 nodePop(xmlParserCtxtPtr ctxt)
1804 xmlNodePtr ret;
1806 if (ctxt == NULL) return(NULL);
1807 if (ctxt->nodeNr <= 0)
1808 return (NULL);
1809 ctxt->nodeNr--;
1810 if (ctxt->nodeNr > 0)
1811 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1812 else
1813 ctxt->node = NULL;
1814 ret = ctxt->nodeTab[ctxt->nodeNr];
1815 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1816 return (ret);
1820 * nameNsPush:
1821 * @ctxt: an XML parser context
1822 * @value: the element name
1823 * @prefix: the element prefix
1824 * @URI: the element namespace name
1825 * @line: the current line number for error messages
1826 * @nsNr: the number of namespaces pushed on the namespace table
1828 * Pushes a new element name/prefix/URL on top of the name stack
1830 * Returns -1 in case of error, the index in the stack otherwise
1832 static int
1833 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1834 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1836 xmlStartTag *tag;
1838 if (ctxt->nameNr >= ctxt->nameMax) {
1839 const xmlChar * *tmp;
1840 xmlStartTag *tmp2;
1841 ctxt->nameMax *= 2;
1842 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843 ctxt->nameMax *
1844 sizeof(ctxt->nameTab[0]));
1845 if (tmp == NULL) {
1846 ctxt->nameMax /= 2;
1847 goto mem_error;
1849 ctxt->nameTab = tmp;
1850 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1851 ctxt->nameMax *
1852 sizeof(ctxt->pushTab[0]));
1853 if (tmp2 == NULL) {
1854 ctxt->nameMax /= 2;
1855 goto mem_error;
1857 ctxt->pushTab = tmp2;
1858 } else if (ctxt->pushTab == NULL) {
1859 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1860 sizeof(ctxt->pushTab[0]));
1861 if (ctxt->pushTab == NULL)
1862 goto mem_error;
1864 ctxt->nameTab[ctxt->nameNr] = value;
1865 ctxt->name = value;
1866 tag = &ctxt->pushTab[ctxt->nameNr];
1867 tag->prefix = prefix;
1868 tag->URI = URI;
1869 tag->line = line;
1870 tag->nsNr = nsNr;
1871 return (ctxt->nameNr++);
1872 mem_error:
1873 xmlErrMemory(ctxt, NULL);
1874 return (-1);
1876 #ifdef LIBXML_PUSH_ENABLED
1878 * nameNsPop:
1879 * @ctxt: an XML parser context
1881 * Pops the top element/prefix/URI name from the name stack
1883 * Returns the name just removed
1885 static const xmlChar *
1886 nameNsPop(xmlParserCtxtPtr ctxt)
1888 const xmlChar *ret;
1890 if (ctxt->nameNr <= 0)
1891 return (NULL);
1892 ctxt->nameNr--;
1893 if (ctxt->nameNr > 0)
1894 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1895 else
1896 ctxt->name = NULL;
1897 ret = ctxt->nameTab[ctxt->nameNr];
1898 ctxt->nameTab[ctxt->nameNr] = NULL;
1899 return (ret);
1901 #endif /* LIBXML_PUSH_ENABLED */
1904 * namePush:
1905 * @ctxt: an XML parser context
1906 * @value: the element name
1908 * DEPRECATED: Internal function, do not use.
1910 * Pushes a new element name on top of the name stack
1912 * Returns -1 in case of error, the index in the stack otherwise
1915 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1917 if (ctxt == NULL) return (-1);
1919 if (ctxt->nameNr >= ctxt->nameMax) {
1920 const xmlChar * *tmp;
1921 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1922 ctxt->nameMax * 2 *
1923 sizeof(ctxt->nameTab[0]));
1924 if (tmp == NULL) {
1925 goto mem_error;
1927 ctxt->nameTab = tmp;
1928 ctxt->nameMax *= 2;
1930 ctxt->nameTab[ctxt->nameNr] = value;
1931 ctxt->name = value;
1932 return (ctxt->nameNr++);
1933 mem_error:
1934 xmlErrMemory(ctxt, NULL);
1935 return (-1);
1939 * namePop:
1940 * @ctxt: an XML parser context
1942 * DEPRECATED: Internal function, do not use.
1944 * Pops the top element name from the name stack
1946 * Returns the name just removed
1948 const xmlChar *
1949 namePop(xmlParserCtxtPtr ctxt)
1951 const xmlChar *ret;
1953 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1954 return (NULL);
1955 ctxt->nameNr--;
1956 if (ctxt->nameNr > 0)
1957 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1958 else
1959 ctxt->name = NULL;
1960 ret = ctxt->nameTab[ctxt->nameNr];
1961 ctxt->nameTab[ctxt->nameNr] = NULL;
1962 return (ret);
1965 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1966 if (ctxt->spaceNr >= ctxt->spaceMax) {
1967 int *tmp;
1969 ctxt->spaceMax *= 2;
1970 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1971 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1972 if (tmp == NULL) {
1973 xmlErrMemory(ctxt, NULL);
1974 ctxt->spaceMax /=2;
1975 return(-1);
1977 ctxt->spaceTab = tmp;
1979 ctxt->spaceTab[ctxt->spaceNr] = val;
1980 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1981 return(ctxt->spaceNr++);
1984 static int spacePop(xmlParserCtxtPtr ctxt) {
1985 int ret;
1986 if (ctxt->spaceNr <= 0) return(0);
1987 ctxt->spaceNr--;
1988 if (ctxt->spaceNr > 0)
1989 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1990 else
1991 ctxt->space = &ctxt->spaceTab[0];
1992 ret = ctxt->spaceTab[ctxt->spaceNr];
1993 ctxt->spaceTab[ctxt->spaceNr] = -1;
1994 return(ret);
1998 * Macros for accessing the content. Those should be used only by the parser,
1999 * and not exported.
2001 * Dirty macros, i.e. one often need to make assumption on the context to
2002 * use them
2004 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2005 * To be used with extreme caution since operations consuming
2006 * characters may move the input buffer to a different location !
2007 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2008 * This should be used internally by the parser
2009 * only to compare to ASCII values otherwise it would break when
2010 * running with UTF-8 encoding.
2011 * RAW same as CUR but in the input buffer, bypass any token
2012 * extraction that may have been done
2013 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2014 * to compare on ASCII based substring.
2015 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2016 * strings without newlines within the parser.
2017 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2018 * defined char within the parser.
2019 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2021 * NEXT Skip to the next character, this does the proper decoding
2022 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2023 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2024 * CUR_CHAR(l) returns the current unicode character (int), set l
2025 * to the number of xmlChars used for the encoding [0-5].
2026 * CUR_SCHAR same but operate on a string instead of the context
2027 * COPY_BUF copy the current unicode char to the target buffer, increment
2028 * the index
2029 * GROW, SHRINK handling of input buffers
2032 #define RAW (*ctxt->input->cur)
2033 #define CUR (*ctxt->input->cur)
2034 #define NXT(val) ctxt->input->cur[(val)]
2035 #define CUR_PTR ctxt->input->cur
2036 #define BASE_PTR ctxt->input->base
2038 #define CMP4( s, c1, c2, c3, c4 ) \
2039 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2040 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2041 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2042 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2043 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2044 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2045 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2046 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2047 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2048 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2049 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2050 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2051 ((unsigned char *) s)[ 8 ] == c9 )
2052 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2053 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2054 ((unsigned char *) s)[ 9 ] == c10 )
2056 #define SKIP(val) do { \
2057 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2058 if (*ctxt->input->cur == 0) \
2059 xmlParserGrow(ctxt); \
2060 } while (0)
2062 #define SKIPL(val) do { \
2063 int skipl; \
2064 for(skipl=0; skipl<val; skipl++) { \
2065 if (*(ctxt->input->cur) == '\n') { \
2066 ctxt->input->line++; ctxt->input->col = 1; \
2067 } else ctxt->input->col++; \
2068 ctxt->input->cur++; \
2070 if (*ctxt->input->cur == 0) \
2071 xmlParserGrow(ctxt); \
2072 } while (0)
2074 #define SHRINK if ((ctxt->progressive == 0) && \
2075 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2076 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2077 xmlParserShrink(ctxt);
2079 #define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081 xmlParserGrow(ctxt);
2083 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2085 #define NEXT xmlNextChar(ctxt)
2087 #define NEXT1 { \
2088 ctxt->input->col++; \
2089 ctxt->input->cur++; \
2090 if (*ctxt->input->cur == 0) \
2091 xmlParserGrow(ctxt); \
2094 #define NEXTL(l) do { \
2095 if (*(ctxt->input->cur) == '\n') { \
2096 ctxt->input->line++; ctxt->input->col = 1; \
2097 } else ctxt->input->col++; \
2098 ctxt->input->cur += l; \
2099 } while (0)
2101 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2102 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2104 #define COPY_BUF(l,b,i,v) \
2105 if (l == 1) b[i++] = v; \
2106 else i += xmlCopyCharMultiByte(&b[i],v)
2109 * xmlSkipBlankChars:
2110 * @ctxt: the XML parser context
2112 * DEPRECATED: Internal function, do not use.
2114 * skip all blanks character found at that point in the input streams.
2115 * It pops up finished entities in the process if allowable at that point.
2117 * Returns the number of space chars skipped
2121 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2122 int res = 0;
2125 * It's Okay to use CUR/NEXT here since all the blanks are on
2126 * the ASCII range.
2128 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2129 (ctxt->instate == XML_PARSER_START)) {
2130 const xmlChar *cur;
2132 * if we are in the document content, go really fast
2134 cur = ctxt->input->cur;
2135 while (IS_BLANK_CH(*cur)) {
2136 if (*cur == '\n') {
2137 ctxt->input->line++; ctxt->input->col = 1;
2138 } else {
2139 ctxt->input->col++;
2141 cur++;
2142 if (res < INT_MAX)
2143 res++;
2144 if (*cur == 0) {
2145 ctxt->input->cur = cur;
2146 xmlParserGrow(ctxt);
2147 cur = ctxt->input->cur;
2150 ctxt->input->cur = cur;
2151 } else {
2152 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2154 while (ctxt->instate != XML_PARSER_EOF) {
2155 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2156 NEXT;
2157 } else if (CUR == '%') {
2159 * Need to handle support of entities branching here
2161 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2162 break;
2163 xmlParsePEReference(ctxt);
2164 } else if (CUR == 0) {
2165 unsigned long consumed;
2166 xmlEntityPtr ent;
2168 if (ctxt->inputNr <= 1)
2169 break;
2171 consumed = ctxt->input->consumed;
2172 xmlSaturatedAddSizeT(&consumed,
2173 ctxt->input->cur - ctxt->input->base);
2176 * Add to sizeentities when parsing an external entity
2177 * for the first time.
2179 ent = ctxt->input->entity;
2180 if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2181 ((ent->flags & XML_ENT_PARSED) == 0)) {
2182 ent->flags |= XML_ENT_PARSED;
2184 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2187 xmlParserEntityCheck(ctxt, consumed);
2189 xmlPopInput(ctxt);
2190 } else {
2191 break;
2195 * Also increase the counter when entering or exiting a PERef.
2196 * The spec says: "When a parameter-entity reference is recognized
2197 * in the DTD and included, its replacement text MUST be enlarged
2198 * by the attachment of one leading and one following space (#x20)
2199 * character."
2201 if (res < INT_MAX)
2202 res++;
2205 return(res);
2208 /************************************************************************
2210 * Commodity functions to handle entities *
2212 ************************************************************************/
2215 * xmlPopInput:
2216 * @ctxt: an XML parser context
2218 * xmlPopInput: the current input pointed by ctxt->input came to an end
2219 * pop it and return the next char.
2221 * Returns the current xmlChar in the parser context
2223 xmlChar
2224 xmlPopInput(xmlParserCtxtPtr ctxt) {
2225 xmlParserInputPtr input;
2227 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2228 if (xmlParserDebugEntities)
2229 xmlGenericError(xmlGenericErrorContext,
2230 "Popping input %d\n", ctxt->inputNr);
2231 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2232 (ctxt->instate != XML_PARSER_EOF))
2233 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2234 "Unfinished entity outside the DTD");
2235 input = inputPop(ctxt);
2236 if (input->entity != NULL)
2237 input->entity->flags &= ~XML_ENT_EXPANDING;
2238 xmlFreeInputStream(input);
2239 if (*ctxt->input->cur == 0)
2240 xmlParserGrow(ctxt);
2241 return(CUR);
2245 * xmlPushInput:
2246 * @ctxt: an XML parser context
2247 * @input: an XML parser input fragment (entity, XML fragment ...).
2249 * xmlPushInput: switch to a new input stream which is stacked on top
2250 * of the previous one(s).
2251 * Returns -1 in case of error or the index in the input stack
2254 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2255 int ret;
2256 if (input == NULL) return(-1);
2258 if (xmlParserDebugEntities) {
2259 if ((ctxt->input != NULL) && (ctxt->input->filename))
2260 xmlGenericError(xmlGenericErrorContext,
2261 "%s(%d): ", ctxt->input->filename,
2262 ctxt->input->line);
2263 xmlGenericError(xmlGenericErrorContext,
2264 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2266 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2267 (ctxt->inputNr > 100)) {
2268 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2269 while (ctxt->inputNr > 1)
2270 xmlFreeInputStream(inputPop(ctxt));
2271 return(-1);
2273 ret = inputPush(ctxt, input);
2274 if (ctxt->instate == XML_PARSER_EOF)
2275 return(-1);
2276 GROW;
2277 return(ret);
2281 * xmlParseCharRef:
2282 * @ctxt: an XML parser context
2284 * DEPRECATED: Internal function, don't use.
2286 * Parse a numeric character reference. Always consumes '&'.
2288 * [66] CharRef ::= '&#' [0-9]+ ';' |
2289 * '&#x' [0-9a-fA-F]+ ';'
2291 * [ WFC: Legal Character ]
2292 * Characters referred to using character references must match the
2293 * production for Char.
2295 * Returns the value parsed (as an int), 0 in case of error
2298 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2299 int val = 0;
2300 int count = 0;
2303 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2305 if ((RAW == '&') && (NXT(1) == '#') &&
2306 (NXT(2) == 'x')) {
2307 SKIP(3);
2308 GROW;
2309 while (RAW != ';') { /* loop blocked by count */
2310 if (count++ > 20) {
2311 count = 0;
2312 GROW;
2313 if (ctxt->instate == XML_PARSER_EOF)
2314 return(0);
2316 if ((RAW >= '0') && (RAW <= '9'))
2317 val = val * 16 + (CUR - '0');
2318 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2319 val = val * 16 + (CUR - 'a') + 10;
2320 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2321 val = val * 16 + (CUR - 'A') + 10;
2322 else {
2323 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2324 val = 0;
2325 break;
2327 if (val > 0x110000)
2328 val = 0x110000;
2330 NEXT;
2331 count++;
2333 if (RAW == ';') {
2334 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2335 ctxt->input->col++;
2336 ctxt->input->cur++;
2338 } else if ((RAW == '&') && (NXT(1) == '#')) {
2339 SKIP(2);
2340 GROW;
2341 while (RAW != ';') { /* loop blocked by count */
2342 if (count++ > 20) {
2343 count = 0;
2344 GROW;
2345 if (ctxt->instate == XML_PARSER_EOF)
2346 return(0);
2348 if ((RAW >= '0') && (RAW <= '9'))
2349 val = val * 10 + (CUR - '0');
2350 else {
2351 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2352 val = 0;
2353 break;
2355 if (val > 0x110000)
2356 val = 0x110000;
2358 NEXT;
2359 count++;
2361 if (RAW == ';') {
2362 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2363 ctxt->input->col++;
2364 ctxt->input->cur++;
2366 } else {
2367 if (RAW == '&')
2368 SKIP(1);
2369 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2373 * [ WFC: Legal Character ]
2374 * Characters referred to using character references must match the
2375 * production for Char.
2377 if (val >= 0x110000) {
2378 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2379 "xmlParseCharRef: character reference out of bounds\n",
2380 val);
2381 } else if (IS_CHAR(val)) {
2382 return(val);
2383 } else {
2384 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2385 "xmlParseCharRef: invalid xmlChar value %d\n",
2386 val);
2388 return(0);
2392 * xmlParseStringCharRef:
2393 * @ctxt: an XML parser context
2394 * @str: a pointer to an index in the string
2396 * parse Reference declarations, variant parsing from a string rather
2397 * than an an input flow.
2399 * [66] CharRef ::= '&#' [0-9]+ ';' |
2400 * '&#x' [0-9a-fA-F]+ ';'
2402 * [ WFC: Legal Character ]
2403 * Characters referred to using character references must match the
2404 * production for Char.
2406 * Returns the value parsed (as an int), 0 in case of error, str will be
2407 * updated to the current value of the index
2409 static int
2410 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2411 const xmlChar *ptr;
2412 xmlChar cur;
2413 int val = 0;
2415 if ((str == NULL) || (*str == NULL)) return(0);
2416 ptr = *str;
2417 cur = *ptr;
2418 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2419 ptr += 3;
2420 cur = *ptr;
2421 while (cur != ';') { /* Non input consuming loop */
2422 if ((cur >= '0') && (cur <= '9'))
2423 val = val * 16 + (cur - '0');
2424 else if ((cur >= 'a') && (cur <= 'f'))
2425 val = val * 16 + (cur - 'a') + 10;
2426 else if ((cur >= 'A') && (cur <= 'F'))
2427 val = val * 16 + (cur - 'A') + 10;
2428 else {
2429 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2430 val = 0;
2431 break;
2433 if (val > 0x110000)
2434 val = 0x110000;
2436 ptr++;
2437 cur = *ptr;
2439 if (cur == ';')
2440 ptr++;
2441 } else if ((cur == '&') && (ptr[1] == '#')){
2442 ptr += 2;
2443 cur = *ptr;
2444 while (cur != ';') { /* Non input consuming loops */
2445 if ((cur >= '0') && (cur <= '9'))
2446 val = val * 10 + (cur - '0');
2447 else {
2448 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2449 val = 0;
2450 break;
2452 if (val > 0x110000)
2453 val = 0x110000;
2455 ptr++;
2456 cur = *ptr;
2458 if (cur == ';')
2459 ptr++;
2460 } else {
2461 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2462 return(0);
2464 *str = ptr;
2467 * [ WFC: Legal Character ]
2468 * Characters referred to using character references must match the
2469 * production for Char.
2471 if (val >= 0x110000) {
2472 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2473 "xmlParseStringCharRef: character reference out of bounds\n",
2474 val);
2475 } else if (IS_CHAR(val)) {
2476 return(val);
2477 } else {
2478 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2479 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2480 val);
2482 return(0);
2486 * xmlParserHandlePEReference:
2487 * @ctxt: the parser context
2489 * DEPRECATED: Internal function, do not use.
2491 * [69] PEReference ::= '%' Name ';'
2493 * [ WFC: No Recursion ]
2494 * A parsed entity must not contain a recursive
2495 * reference to itself, either directly or indirectly.
2497 * [ WFC: Entity Declared ]
2498 * In a document without any DTD, a document with only an internal DTD
2499 * subset which contains no parameter entity references, or a document
2500 * with "standalone='yes'", ... ... The declaration of a parameter
2501 * entity must precede any reference to it...
2503 * [ VC: Entity Declared ]
2504 * In a document with an external subset or external parameter entities
2505 * with "standalone='no'", ... ... The declaration of a parameter entity
2506 * must precede any reference to it...
2508 * [ WFC: In DTD ]
2509 * Parameter-entity references may only appear in the DTD.
2510 * NOTE: misleading but this is handled.
2512 * A PEReference may have been detected in the current input stream
2513 * the handling is done accordingly to
2514 * http://www.w3.org/TR/REC-xml#entproc
2515 * i.e.
2516 * - Included in literal in entity values
2517 * - Included as Parameter Entity reference within DTDs
2519 void
2520 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2521 switch(ctxt->instate) {
2522 case XML_PARSER_CDATA_SECTION:
2523 return;
2524 case XML_PARSER_COMMENT:
2525 return;
2526 case XML_PARSER_START_TAG:
2527 return;
2528 case XML_PARSER_END_TAG:
2529 return;
2530 case XML_PARSER_EOF:
2531 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2532 return;
2533 case XML_PARSER_PROLOG:
2534 case XML_PARSER_START:
2535 case XML_PARSER_MISC:
2536 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2537 return;
2538 case XML_PARSER_ENTITY_DECL:
2539 case XML_PARSER_CONTENT:
2540 case XML_PARSER_ATTRIBUTE_VALUE:
2541 case XML_PARSER_PI:
2542 case XML_PARSER_SYSTEM_LITERAL:
2543 case XML_PARSER_PUBLIC_LITERAL:
2544 /* we just ignore it there */
2545 return;
2546 case XML_PARSER_EPILOG:
2547 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2548 return;
2549 case XML_PARSER_ENTITY_VALUE:
2551 * NOTE: in the case of entity values, we don't do the
2552 * substitution here since we need the literal
2553 * entity value to be able to save the internal
2554 * subset of the document.
2555 * This will be handled by xmlStringDecodeEntities
2557 return;
2558 case XML_PARSER_DTD:
2560 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2561 * In the internal DTD subset, parameter-entity references
2562 * can occur only where markup declarations can occur, not
2563 * within markup declarations.
2564 * In that case this is handled in xmlParseMarkupDecl
2566 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2567 return;
2568 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2569 return;
2570 break;
2571 case XML_PARSER_IGNORE:
2572 return;
2575 xmlParsePEReference(ctxt);
2579 * Macro used to grow the current buffer.
2580 * buffer##_size is expected to be a size_t
2581 * mem_error: is expected to handle memory allocation failures
2583 #define growBuffer(buffer, n) { \
2584 xmlChar *tmp; \
2585 size_t new_size = buffer##_size * 2 + n; \
2586 if (new_size < buffer##_size) goto mem_error; \
2587 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2588 if (tmp == NULL) goto mem_error; \
2589 buffer = tmp; \
2590 buffer##_size = new_size; \
2594 * xmlStringDecodeEntitiesInt:
2595 * @ctxt: the parser context
2596 * @str: the input string
2597 * @len: the string length
2598 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2599 * @end: an end marker xmlChar, 0 if none
2600 * @end2: an end marker xmlChar, 0 if none
2601 * @end3: an end marker xmlChar, 0 if none
2602 * @check: whether to perform entity checks
2604 static xmlChar *
2605 xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2606 int what, xmlChar end, xmlChar end2, xmlChar end3,
2607 int check) {
2608 xmlChar *buffer = NULL;
2609 size_t buffer_size = 0;
2610 size_t nbchars = 0;
2612 xmlChar *current = NULL;
2613 xmlChar *rep = NULL;
2614 const xmlChar *last;
2615 xmlEntityPtr ent;
2616 int c,l;
2618 if (str == NULL)
2619 return(NULL);
2620 last = str + len;
2622 if (((ctxt->depth > 40) &&
2623 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2624 (ctxt->depth > 100)) {
2625 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2626 "Maximum entity nesting depth exceeded");
2627 return(NULL);
2631 * allocate a translation buffer.
2633 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2634 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2635 if (buffer == NULL) goto mem_error;
2638 * OK loop until we reach one of the ending char or a size limit.
2639 * we are operating on already parsed values.
2641 if (str < last)
2642 c = CUR_SCHAR(str, l);
2643 else
2644 c = 0;
2645 while ((c != 0) && (c != end) && /* non input consuming loop */
2646 (c != end2) && (c != end3) &&
2647 (ctxt->instate != XML_PARSER_EOF)) {
2649 if (c == 0) break;
2650 if ((c == '&') && (str[1] == '#')) {
2651 int val = xmlParseStringCharRef(ctxt, &str);
2652 if (val == 0)
2653 goto int_error;
2654 COPY_BUF(0,buffer,nbchars,val);
2655 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2656 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2658 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2659 if (xmlParserDebugEntities)
2660 xmlGenericError(xmlGenericErrorContext,
2661 "String decoding Entity Reference: %.30s\n",
2662 str);
2663 ent = xmlParseStringEntityRef(ctxt, &str);
2664 if ((ent != NULL) &&
2665 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2666 if (ent->content != NULL) {
2667 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2668 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2669 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2671 } else {
2672 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2673 "predefined entity has no content\n");
2674 goto int_error;
2676 } else if ((ent != NULL) && (ent->content != NULL)) {
2677 if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2678 goto int_error;
2680 if (ent->flags & XML_ENT_EXPANDING) {
2681 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2682 xmlHaltParser(ctxt);
2683 ent->content[0] = 0;
2684 goto int_error;
2687 ent->flags |= XML_ENT_EXPANDING;
2688 ctxt->depth++;
2689 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2690 ent->length, what, 0, 0, 0, check);
2691 ctxt->depth--;
2692 ent->flags &= ~XML_ENT_EXPANDING;
2694 if (rep == NULL) {
2695 ent->content[0] = 0;
2696 goto int_error;
2699 current = rep;
2700 while (*current != 0) { /* non input consuming loop */
2701 buffer[nbchars++] = *current++;
2702 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2703 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2706 xmlFree(rep);
2707 rep = NULL;
2708 } else if (ent != NULL) {
2709 int i = xmlStrlen(ent->name);
2710 const xmlChar *cur = ent->name;
2712 buffer[nbchars++] = '&';
2713 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2714 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2716 for (;i > 0;i--)
2717 buffer[nbchars++] = *cur++;
2718 buffer[nbchars++] = ';';
2720 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2721 if (xmlParserDebugEntities)
2722 xmlGenericError(xmlGenericErrorContext,
2723 "String decoding PE Reference: %.30s\n", str);
2724 ent = xmlParseStringPEReference(ctxt, &str);
2725 if (ent != NULL) {
2726 if (ent->content == NULL) {
2728 * Note: external parsed entities will not be loaded,
2729 * it is not required for a non-validating parser to
2730 * complete external PEReferences coming from the
2731 * internal subset
2733 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2734 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2735 (ctxt->validate != 0)) {
2736 xmlLoadEntityContent(ctxt, ent);
2737 } else {
2738 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2739 "not validating will not read content for PE entity %s\n",
2740 ent->name, NULL);
2744 if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2745 goto int_error;
2747 if (ent->flags & XML_ENT_EXPANDING) {
2748 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2749 xmlHaltParser(ctxt);
2750 if (ent->content != NULL)
2751 ent->content[0] = 0;
2752 goto int_error;
2755 ent->flags |= XML_ENT_EXPANDING;
2756 ctxt->depth++;
2757 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2758 ent->length, what, 0, 0, 0, check);
2759 ctxt->depth--;
2760 ent->flags &= ~XML_ENT_EXPANDING;
2762 if (rep == NULL) {
2763 if (ent->content != NULL)
2764 ent->content[0] = 0;
2765 goto int_error;
2767 current = rep;
2768 while (*current != 0) { /* non input consuming loop */
2769 buffer[nbchars++] = *current++;
2770 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2771 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2774 xmlFree(rep);
2775 rep = NULL;
2777 } else {
2778 COPY_BUF(l,buffer,nbchars,c);
2779 str += l;
2780 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2781 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2784 if (str < last)
2785 c = CUR_SCHAR(str, l);
2786 else
2787 c = 0;
2789 buffer[nbchars] = 0;
2790 return(buffer);
2792 mem_error:
2793 xmlErrMemory(ctxt, NULL);
2794 int_error:
2795 if (rep != NULL)
2796 xmlFree(rep);
2797 if (buffer != NULL)
2798 xmlFree(buffer);
2799 return(NULL);
2803 * xmlStringLenDecodeEntities:
2804 * @ctxt: the parser context
2805 * @str: the input string
2806 * @len: the string length
2807 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2808 * @end: an end marker xmlChar, 0 if none
2809 * @end2: an end marker xmlChar, 0 if none
2810 * @end3: an end marker xmlChar, 0 if none
2812 * DEPRECATED: Internal function, don't use.
2814 * Takes a entity string content and process to do the adequate substitutions.
2816 * [67] Reference ::= EntityRef | CharRef
2818 * [69] PEReference ::= '%' Name ';'
2820 * Returns A newly allocated string with the substitution done. The caller
2821 * must deallocate it !
2823 xmlChar *
2824 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2825 int what, xmlChar end, xmlChar end2,
2826 xmlChar end3) {
2827 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2828 return(NULL);
2829 return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2830 end, end2, end3, 0));
2834 * xmlStringDecodeEntities:
2835 * @ctxt: the parser context
2836 * @str: the input string
2837 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2838 * @end: an end marker xmlChar, 0 if none
2839 * @end2: an end marker xmlChar, 0 if none
2840 * @end3: an end marker xmlChar, 0 if none
2842 * DEPRECATED: Internal function, don't use.
2844 * Takes a entity string content and process to do the adequate substitutions.
2846 * [67] Reference ::= EntityRef | CharRef
2848 * [69] PEReference ::= '%' Name ';'
2850 * Returns A newly allocated string with the substitution done. The caller
2851 * must deallocate it !
2853 xmlChar *
2854 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2855 xmlChar end, xmlChar end2, xmlChar end3) {
2856 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2857 return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2858 end, end2, end3, 0));
2861 /************************************************************************
2863 * Commodity functions, cleanup needed ? *
2865 ************************************************************************/
2868 * areBlanks:
2869 * @ctxt: an XML parser context
2870 * @str: a xmlChar *
2871 * @len: the size of @str
2872 * @blank_chars: we know the chars are blanks
2874 * Is this a sequence of blank chars that one can ignore ?
2876 * Returns 1 if ignorable 0 otherwise.
2879 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2880 int blank_chars) {
2881 int i, ret;
2882 xmlNodePtr lastChild;
2885 * Don't spend time trying to differentiate them, the same callback is
2886 * used !
2888 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2889 return(0);
2892 * Check for xml:space value.
2894 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2895 (*(ctxt->space) == -2))
2896 return(0);
2899 * Check that the string is made of blanks
2901 if (blank_chars == 0) {
2902 for (i = 0;i < len;i++)
2903 if (!(IS_BLANK_CH(str[i]))) return(0);
2907 * Look if the element is mixed content in the DTD if available
2909 if (ctxt->node == NULL) return(0);
2910 if (ctxt->myDoc != NULL) {
2911 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2912 if (ret == 0) return(1);
2913 if (ret == 1) return(0);
2917 * Otherwise, heuristic :-\
2919 if ((RAW != '<') && (RAW != 0xD)) return(0);
2920 if ((ctxt->node->children == NULL) &&
2921 (RAW == '<') && (NXT(1) == '/')) return(0);
2923 lastChild = xmlGetLastChild(ctxt->node);
2924 if (lastChild == NULL) {
2925 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2926 (ctxt->node->content != NULL)) return(0);
2927 } else if (xmlNodeIsText(lastChild))
2928 return(0);
2929 else if ((ctxt->node->children != NULL) &&
2930 (xmlNodeIsText(ctxt->node->children)))
2931 return(0);
2932 return(1);
2935 /************************************************************************
2937 * Extra stuff for namespace support *
2938 * Relates to http://www.w3.org/TR/WD-xml-names *
2940 ************************************************************************/
2943 * xmlSplitQName:
2944 * @ctxt: an XML parser context
2945 * @name: an XML parser context
2946 * @prefix: a xmlChar **
2948 * parse an UTF8 encoded XML qualified name string
2950 * [NS 5] QName ::= (Prefix ':')? LocalPart
2952 * [NS 6] Prefix ::= NCName
2954 * [NS 7] LocalPart ::= NCName
2956 * Returns the local part, and prefix is updated
2957 * to get the Prefix if any.
2960 xmlChar *
2961 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2962 xmlChar buf[XML_MAX_NAMELEN + 5];
2963 xmlChar *buffer = NULL;
2964 int len = 0;
2965 int max = XML_MAX_NAMELEN;
2966 xmlChar *ret = NULL;
2967 const xmlChar *cur = name;
2968 int c;
2970 if (prefix == NULL) return(NULL);
2971 *prefix = NULL;
2973 if (cur == NULL) return(NULL);
2975 #ifndef XML_XML_NAMESPACE
2976 /* xml: prefix is not really a namespace */
2977 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2978 (cur[2] == 'l') && (cur[3] == ':'))
2979 return(xmlStrdup(name));
2980 #endif
2982 /* nasty but well=formed */
2983 if (cur[0] == ':')
2984 return(xmlStrdup(name));
2986 c = *cur++;
2987 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2988 buf[len++] = c;
2989 c = *cur++;
2991 if (len >= max) {
2993 * Okay someone managed to make a huge name, so he's ready to pay
2994 * for the processing speed.
2996 max = len * 2;
2998 buffer = (xmlChar *) xmlMallocAtomic(max);
2999 if (buffer == NULL) {
3000 xmlErrMemory(ctxt, NULL);
3001 return(NULL);
3003 memcpy(buffer, buf, len);
3004 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3005 if (len + 10 > max) {
3006 xmlChar *tmp;
3008 max *= 2;
3009 tmp = (xmlChar *) xmlRealloc(buffer, max);
3010 if (tmp == NULL) {
3011 xmlFree(buffer);
3012 xmlErrMemory(ctxt, NULL);
3013 return(NULL);
3015 buffer = tmp;
3017 buffer[len++] = c;
3018 c = *cur++;
3020 buffer[len] = 0;
3023 if ((c == ':') && (*cur == 0)) {
3024 if (buffer != NULL)
3025 xmlFree(buffer);
3026 *prefix = NULL;
3027 return(xmlStrdup(name));
3030 if (buffer == NULL)
3031 ret = xmlStrndup(buf, len);
3032 else {
3033 ret = buffer;
3034 buffer = NULL;
3035 max = XML_MAX_NAMELEN;
3039 if (c == ':') {
3040 c = *cur;
3041 *prefix = ret;
3042 if (c == 0) {
3043 return(xmlStrndup(BAD_CAST "", 0));
3045 len = 0;
3048 * Check that the first character is proper to start
3049 * a new name
3051 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3052 ((c >= 0x41) && (c <= 0x5A)) ||
3053 (c == '_') || (c == ':'))) {
3054 int l;
3055 int first = CUR_SCHAR(cur, l);
3057 if (!IS_LETTER(first) && (first != '_')) {
3058 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3059 "Name %s is not XML Namespace compliant\n",
3060 name);
3063 cur++;
3065 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3066 buf[len++] = c;
3067 c = *cur++;
3069 if (len >= max) {
3071 * Okay someone managed to make a huge name, so he's ready to pay
3072 * for the processing speed.
3074 max = len * 2;
3076 buffer = (xmlChar *) xmlMallocAtomic(max);
3077 if (buffer == NULL) {
3078 xmlErrMemory(ctxt, NULL);
3079 return(NULL);
3081 memcpy(buffer, buf, len);
3082 while (c != 0) { /* tested bigname2.xml */
3083 if (len + 10 > max) {
3084 xmlChar *tmp;
3086 max *= 2;
3087 tmp = (xmlChar *) xmlRealloc(buffer, max);
3088 if (tmp == NULL) {
3089 xmlErrMemory(ctxt, NULL);
3090 xmlFree(buffer);
3091 return(NULL);
3093 buffer = tmp;
3095 buffer[len++] = c;
3096 c = *cur++;
3098 buffer[len] = 0;
3101 if (buffer == NULL)
3102 ret = xmlStrndup(buf, len);
3103 else {
3104 ret = buffer;
3108 return(ret);
3111 /************************************************************************
3113 * The parser itself *
3114 * Relates to http://www.w3.org/TR/REC-xml *
3116 ************************************************************************/
3118 /************************************************************************
3120 * Routines to parse Name, NCName and NmToken *
3122 ************************************************************************/
3123 #ifdef DEBUG
3124 static unsigned long nbParseName = 0;
3125 static unsigned long nbParseNmToken = 0;
3126 static unsigned long nbParseNCName = 0;
3127 static unsigned long nbParseNCNameComplex = 0;
3128 static unsigned long nbParseNameComplex = 0;
3129 static unsigned long nbParseStringName = 0;
3130 #endif
3133 * The two following functions are related to the change of accepted
3134 * characters for Name and NmToken in the Revision 5 of XML-1.0
3135 * They correspond to the modified production [4] and the new production [4a]
3136 * changes in that revision. Also note that the macros used for the
3137 * productions Letter, Digit, CombiningChar and Extender are not needed
3138 * anymore.
3139 * We still keep compatibility to pre-revision5 parsing semantic if the
3140 * new XML_PARSE_OLD10 option is given to the parser.
3142 static int
3143 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3144 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3146 * Use the new checks of production [4] [4a] amd [5] of the
3147 * Update 5 of XML-1.0
3149 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3150 (((c >= 'a') && (c <= 'z')) ||
3151 ((c >= 'A') && (c <= 'Z')) ||
3152 (c == '_') || (c == ':') ||
3153 ((c >= 0xC0) && (c <= 0xD6)) ||
3154 ((c >= 0xD8) && (c <= 0xF6)) ||
3155 ((c >= 0xF8) && (c <= 0x2FF)) ||
3156 ((c >= 0x370) && (c <= 0x37D)) ||
3157 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3158 ((c >= 0x200C) && (c <= 0x200D)) ||
3159 ((c >= 0x2070) && (c <= 0x218F)) ||
3160 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3161 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3162 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3163 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3164 ((c >= 0x10000) && (c <= 0xEFFFF))))
3165 return(1);
3166 } else {
3167 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3168 return(1);
3170 return(0);
3173 static int
3174 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3175 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3177 * Use the new checks of production [4] [4a] amd [5] of the
3178 * Update 5 of XML-1.0
3180 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3181 (((c >= 'a') && (c <= 'z')) ||
3182 ((c >= 'A') && (c <= 'Z')) ||
3183 ((c >= '0') && (c <= '9')) || /* !start */
3184 (c == '_') || (c == ':') ||
3185 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3186 ((c >= 0xC0) && (c <= 0xD6)) ||
3187 ((c >= 0xD8) && (c <= 0xF6)) ||
3188 ((c >= 0xF8) && (c <= 0x2FF)) ||
3189 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3190 ((c >= 0x370) && (c <= 0x37D)) ||
3191 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3192 ((c >= 0x200C) && (c <= 0x200D)) ||
3193 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3194 ((c >= 0x2070) && (c <= 0x218F)) ||
3195 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3196 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3197 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3198 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3199 ((c >= 0x10000) && (c <= 0xEFFFF))))
3200 return(1);
3201 } else {
3202 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3203 (c == '.') || (c == '-') ||
3204 (c == '_') || (c == ':') ||
3205 (IS_COMBINING(c)) ||
3206 (IS_EXTENDER(c)))
3207 return(1);
3209 return(0);
3212 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3213 int *len, int *alloc, int normalize);
3215 static const xmlChar *
3216 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3217 int len = 0, l;
3218 int c;
3219 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3220 XML_MAX_TEXT_LENGTH :
3221 XML_MAX_NAME_LENGTH;
3223 #ifdef DEBUG
3224 nbParseNameComplex++;
3225 #endif
3228 * Handler for more complex cases
3230 c = CUR_CHAR(l);
3231 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3233 * Use the new checks of production [4] [4a] amd [5] of the
3234 * Update 5 of XML-1.0
3236 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3237 (!(((c >= 'a') && (c <= 'z')) ||
3238 ((c >= 'A') && (c <= 'Z')) ||
3239 (c == '_') || (c == ':') ||
3240 ((c >= 0xC0) && (c <= 0xD6)) ||
3241 ((c >= 0xD8) && (c <= 0xF6)) ||
3242 ((c >= 0xF8) && (c <= 0x2FF)) ||
3243 ((c >= 0x370) && (c <= 0x37D)) ||
3244 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3245 ((c >= 0x200C) && (c <= 0x200D)) ||
3246 ((c >= 0x2070) && (c <= 0x218F)) ||
3247 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3248 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3249 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3250 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3251 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3252 return(NULL);
3254 len += l;
3255 NEXTL(l);
3256 c = CUR_CHAR(l);
3257 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3258 (((c >= 'a') && (c <= 'z')) ||
3259 ((c >= 'A') && (c <= 'Z')) ||
3260 ((c >= '0') && (c <= '9')) || /* !start */
3261 (c == '_') || (c == ':') ||
3262 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3263 ((c >= 0xC0) && (c <= 0xD6)) ||
3264 ((c >= 0xD8) && (c <= 0xF6)) ||
3265 ((c >= 0xF8) && (c <= 0x2FF)) ||
3266 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3267 ((c >= 0x370) && (c <= 0x37D)) ||
3268 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3269 ((c >= 0x200C) && (c <= 0x200D)) ||
3270 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3271 ((c >= 0x2070) && (c <= 0x218F)) ||
3272 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3273 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3274 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3275 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3276 ((c >= 0x10000) && (c <= 0xEFFFF))
3277 )) {
3278 if (len <= INT_MAX - l)
3279 len += l;
3280 NEXTL(l);
3281 c = CUR_CHAR(l);
3283 } else {
3284 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3285 (!IS_LETTER(c) && (c != '_') &&
3286 (c != ':'))) {
3287 return(NULL);
3289 len += l;
3290 NEXTL(l);
3291 c = CUR_CHAR(l);
3293 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3294 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3295 (c == '.') || (c == '-') ||
3296 (c == '_') || (c == ':') ||
3297 (IS_COMBINING(c)) ||
3298 (IS_EXTENDER(c)))) {
3299 if (len <= INT_MAX - l)
3300 len += l;
3301 NEXTL(l);
3302 c = CUR_CHAR(l);
3305 if (ctxt->instate == XML_PARSER_EOF)
3306 return(NULL);
3307 if (len > maxLength) {
3308 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3309 return(NULL);
3311 if (ctxt->input->cur - ctxt->input->base < len) {
3313 * There were a couple of bugs where PERefs lead to to a change
3314 * of the buffer. Check the buffer size to avoid passing an invalid
3315 * pointer to xmlDictLookup.
3317 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3318 "unexpected change of input buffer");
3319 return (NULL);
3321 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3322 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3323 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3327 * xmlParseName:
3328 * @ctxt: an XML parser context
3330 * DEPRECATED: Internal function, don't use.
3332 * parse an XML name.
3334 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3335 * CombiningChar | Extender
3337 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3339 * [6] Names ::= Name (#x20 Name)*
3341 * Returns the Name parsed or NULL
3344 const xmlChar *
3345 xmlParseName(xmlParserCtxtPtr ctxt) {
3346 const xmlChar *in;
3347 const xmlChar *ret;
3348 size_t count = 0;
3349 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3350 XML_MAX_TEXT_LENGTH :
3351 XML_MAX_NAME_LENGTH;
3353 GROW;
3354 if (ctxt->instate == XML_PARSER_EOF)
3355 return(NULL);
3357 #ifdef DEBUG
3358 nbParseName++;
3359 #endif
3362 * Accelerator for simple ASCII names
3364 in = ctxt->input->cur;
3365 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3366 ((*in >= 0x41) && (*in <= 0x5A)) ||
3367 (*in == '_') || (*in == ':')) {
3368 in++;
3369 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3370 ((*in >= 0x41) && (*in <= 0x5A)) ||
3371 ((*in >= 0x30) && (*in <= 0x39)) ||
3372 (*in == '_') || (*in == '-') ||
3373 (*in == ':') || (*in == '.'))
3374 in++;
3375 if ((*in > 0) && (*in < 0x80)) {
3376 count = in - ctxt->input->cur;
3377 if (count > maxLength) {
3378 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3379 return(NULL);
3381 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3382 ctxt->input->cur = in;
3383 ctxt->input->col += count;
3384 if (ret == NULL)
3385 xmlErrMemory(ctxt, NULL);
3386 return(ret);
3389 /* accelerator for special cases */
3390 return(xmlParseNameComplex(ctxt));
3393 static const xmlChar *
3394 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3395 int len = 0, l;
3396 int c;
3397 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3398 XML_MAX_TEXT_LENGTH :
3399 XML_MAX_NAME_LENGTH;
3400 size_t startPosition = 0;
3402 #ifdef DEBUG
3403 nbParseNCNameComplex++;
3404 #endif
3407 * Handler for more complex cases
3409 startPosition = CUR_PTR - BASE_PTR;
3410 c = CUR_CHAR(l);
3411 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3412 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3413 return(NULL);
3416 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3417 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3418 if (len <= INT_MAX - l)
3419 len += l;
3420 NEXTL(l);
3421 c = CUR_CHAR(l);
3423 if (ctxt->instate == XML_PARSER_EOF)
3424 return(NULL);
3425 if (len > maxLength) {
3426 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3427 return(NULL);
3429 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3433 * xmlParseNCName:
3434 * @ctxt: an XML parser context
3435 * @len: length of the string parsed
3437 * parse an XML name.
3439 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3440 * CombiningChar | Extender
3442 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3444 * Returns the Name parsed or NULL
3447 static const xmlChar *
3448 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3449 const xmlChar *in, *e;
3450 const xmlChar *ret;
3451 size_t count = 0;
3452 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3453 XML_MAX_TEXT_LENGTH :
3454 XML_MAX_NAME_LENGTH;
3456 #ifdef DEBUG
3457 nbParseNCName++;
3458 #endif
3461 * Accelerator for simple ASCII names
3463 in = ctxt->input->cur;
3464 e = ctxt->input->end;
3465 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3466 ((*in >= 0x41) && (*in <= 0x5A)) ||
3467 (*in == '_')) && (in < e)) {
3468 in++;
3469 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3470 ((*in >= 0x41) && (*in <= 0x5A)) ||
3471 ((*in >= 0x30) && (*in <= 0x39)) ||
3472 (*in == '_') || (*in == '-') ||
3473 (*in == '.')) && (in < e))
3474 in++;
3475 if (in >= e)
3476 goto complex;
3477 if ((*in > 0) && (*in < 0x80)) {
3478 count = in - ctxt->input->cur;
3479 if (count > maxLength) {
3480 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481 return(NULL);
3483 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3484 ctxt->input->cur = in;
3485 ctxt->input->col += count;
3486 if (ret == NULL) {
3487 xmlErrMemory(ctxt, NULL);
3489 return(ret);
3492 complex:
3493 return(xmlParseNCNameComplex(ctxt));
3497 * xmlParseNameAndCompare:
3498 * @ctxt: an XML parser context
3500 * parse an XML name and compares for match
3501 * (specialized for endtag parsing)
3503 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3504 * and the name for mismatch
3507 static const xmlChar *
3508 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3509 register const xmlChar *cmp = other;
3510 register const xmlChar *in;
3511 const xmlChar *ret;
3513 GROW;
3514 if (ctxt->instate == XML_PARSER_EOF)
3515 return(NULL);
3517 in = ctxt->input->cur;
3518 while (*in != 0 && *in == *cmp) {
3519 ++in;
3520 ++cmp;
3522 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3523 /* success */
3524 ctxt->input->col += in - ctxt->input->cur;
3525 ctxt->input->cur = in;
3526 return (const xmlChar*) 1;
3528 /* failure (or end of input buffer), check with full function */
3529 ret = xmlParseName (ctxt);
3530 /* strings coming from the dictionary direct compare possible */
3531 if (ret == other) {
3532 return (const xmlChar*) 1;
3534 return ret;
3538 * xmlParseStringName:
3539 * @ctxt: an XML parser context
3540 * @str: a pointer to the string pointer (IN/OUT)
3542 * parse an XML name.
3544 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3545 * CombiningChar | Extender
3547 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3549 * [6] Names ::= Name (#x20 Name)*
3551 * Returns the Name parsed or NULL. The @str pointer
3552 * is updated to the current location in the string.
3555 static xmlChar *
3556 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3557 xmlChar buf[XML_MAX_NAMELEN + 5];
3558 const xmlChar *cur = *str;
3559 int len = 0, l;
3560 int c;
3561 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3562 XML_MAX_TEXT_LENGTH :
3563 XML_MAX_NAME_LENGTH;
3565 #ifdef DEBUG
3566 nbParseStringName++;
3567 #endif
3569 c = CUR_SCHAR(cur, l);
3570 if (!xmlIsNameStartChar(ctxt, c)) {
3571 return(NULL);
3574 COPY_BUF(l,buf,len,c);
3575 cur += l;
3576 c = CUR_SCHAR(cur, l);
3577 while (xmlIsNameChar(ctxt, c)) {
3578 COPY_BUF(l,buf,len,c);
3579 cur += l;
3580 c = CUR_SCHAR(cur, l);
3581 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583 * Okay someone managed to make a huge name, so he's ready to pay
3584 * for the processing speed.
3586 xmlChar *buffer;
3587 int max = len * 2;
3589 buffer = (xmlChar *) xmlMallocAtomic(max);
3590 if (buffer == NULL) {
3591 xmlErrMemory(ctxt, NULL);
3592 return(NULL);
3594 memcpy(buffer, buf, len);
3595 while (xmlIsNameChar(ctxt, c)) {
3596 if (len + 10 > max) {
3597 xmlChar *tmp;
3599 max *= 2;
3600 tmp = (xmlChar *) xmlRealloc(buffer, max);
3601 if (tmp == NULL) {
3602 xmlErrMemory(ctxt, NULL);
3603 xmlFree(buffer);
3604 return(NULL);
3606 buffer = tmp;
3608 COPY_BUF(l,buffer,len,c);
3609 cur += l;
3610 c = CUR_SCHAR(cur, l);
3611 if (len > maxLength) {
3612 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3613 xmlFree(buffer);
3614 return(NULL);
3617 buffer[len] = 0;
3618 *str = cur;
3619 return(buffer);
3622 if (len > maxLength) {
3623 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3624 return(NULL);
3626 *str = cur;
3627 return(xmlStrndup(buf, len));
3631 * xmlParseNmtoken:
3632 * @ctxt: an XML parser context
3634 * DEPRECATED: Internal function, don't use.
3636 * parse an XML Nmtoken.
3638 * [7] Nmtoken ::= (NameChar)+
3640 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3642 * Returns the Nmtoken parsed or NULL
3645 xmlChar *
3646 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3647 xmlChar buf[XML_MAX_NAMELEN + 5];
3648 int len = 0, l;
3649 int c;
3650 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3651 XML_MAX_TEXT_LENGTH :
3652 XML_MAX_NAME_LENGTH;
3654 #ifdef DEBUG
3655 nbParseNmToken++;
3656 #endif
3658 c = CUR_CHAR(l);
3660 while (xmlIsNameChar(ctxt, c)) {
3661 COPY_BUF(l,buf,len,c);
3662 NEXTL(l);
3663 c = CUR_CHAR(l);
3664 if (len >= XML_MAX_NAMELEN) {
3666 * Okay someone managed to make a huge token, so he's ready to pay
3667 * for the processing speed.
3669 xmlChar *buffer;
3670 int max = len * 2;
3672 buffer = (xmlChar *) xmlMallocAtomic(max);
3673 if (buffer == NULL) {
3674 xmlErrMemory(ctxt, NULL);
3675 return(NULL);
3677 memcpy(buffer, buf, len);
3678 while (xmlIsNameChar(ctxt, c)) {
3679 if (len + 10 > max) {
3680 xmlChar *tmp;
3682 max *= 2;
3683 tmp = (xmlChar *) xmlRealloc(buffer, max);
3684 if (tmp == NULL) {
3685 xmlErrMemory(ctxt, NULL);
3686 xmlFree(buffer);
3687 return(NULL);
3689 buffer = tmp;
3691 COPY_BUF(l,buffer,len,c);
3692 if (len > maxLength) {
3693 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3694 xmlFree(buffer);
3695 return(NULL);
3697 NEXTL(l);
3698 c = CUR_CHAR(l);
3700 buffer[len] = 0;
3701 if (ctxt->instate == XML_PARSER_EOF) {
3702 xmlFree(buffer);
3703 return(NULL);
3705 return(buffer);
3708 if (ctxt->instate == XML_PARSER_EOF)
3709 return(NULL);
3710 if (len == 0)
3711 return(NULL);
3712 if (len > maxLength) {
3713 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3714 return(NULL);
3716 return(xmlStrndup(buf, len));
3720 * xmlParseEntityValue:
3721 * @ctxt: an XML parser context
3722 * @orig: if non-NULL store a copy of the original entity value
3724 * DEPRECATED: Internal function, don't use.
3726 * parse a value for ENTITY declarations
3728 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3729 * "'" ([^%&'] | PEReference | Reference)* "'"
3731 * Returns the EntityValue parsed with reference substituted or NULL
3734 xmlChar *
3735 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3736 xmlChar *buf = NULL;
3737 int len = 0;
3738 int size = XML_PARSER_BUFFER_SIZE;
3739 int c, l;
3740 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3741 XML_MAX_HUGE_LENGTH :
3742 XML_MAX_TEXT_LENGTH;
3743 xmlChar stop;
3744 xmlChar *ret = NULL;
3745 const xmlChar *cur = NULL;
3746 xmlParserInputPtr input;
3748 if (RAW == '"') stop = '"';
3749 else if (RAW == '\'') stop = '\'';
3750 else {
3751 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3752 return(NULL);
3754 buf = (xmlChar *) xmlMallocAtomic(size);
3755 if (buf == NULL) {
3756 xmlErrMemory(ctxt, NULL);
3757 return(NULL);
3761 * The content of the entity definition is copied in a buffer.
3764 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3765 input = ctxt->input;
3766 GROW;
3767 if (ctxt->instate == XML_PARSER_EOF)
3768 goto error;
3769 NEXT;
3770 c = CUR_CHAR(l);
3772 * NOTE: 4.4.5 Included in Literal
3773 * When a parameter entity reference appears in a literal entity
3774 * value, ... a single or double quote character in the replacement
3775 * text is always treated as a normal data character and will not
3776 * terminate the literal.
3777 * In practice it means we stop the loop only when back at parsing
3778 * the initial entity and the quote is found
3780 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3781 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3782 if (len + 5 >= size) {
3783 xmlChar *tmp;
3785 size *= 2;
3786 tmp = (xmlChar *) xmlRealloc(buf, size);
3787 if (tmp == NULL) {
3788 xmlErrMemory(ctxt, NULL);
3789 goto error;
3791 buf = tmp;
3793 COPY_BUF(l,buf,len,c);
3794 NEXTL(l);
3796 GROW;
3797 c = CUR_CHAR(l);
3798 if (c == 0) {
3799 GROW;
3800 c = CUR_CHAR(l);
3803 if (len > maxLength) {
3804 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3805 "entity value too long\n");
3806 goto error;
3809 buf[len] = 0;
3810 if (ctxt->instate == XML_PARSER_EOF)
3811 goto error;
3812 if (c != stop) {
3813 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3814 goto error;
3816 NEXT;
3819 * Raise problem w.r.t. '&' and '%' being used in non-entities
3820 * reference constructs. Note Charref will be handled in
3821 * xmlStringDecodeEntities()
3823 cur = buf;
3824 while (*cur != 0) { /* non input consuming */
3825 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3826 xmlChar *name;
3827 xmlChar tmp = *cur;
3828 int nameOk = 0;
3830 cur++;
3831 name = xmlParseStringName(ctxt, &cur);
3832 if (name != NULL) {
3833 nameOk = 1;
3834 xmlFree(name);
3836 if ((nameOk == 0) || (*cur != ';')) {
3837 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3838 "EntityValue: '%c' forbidden except for entities references\n",
3839 tmp);
3840 goto error;
3842 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3843 (ctxt->inputNr == 1)) {
3844 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3845 goto error;
3847 if (*cur == 0)
3848 break;
3850 cur++;
3854 * Then PEReference entities are substituted.
3856 * NOTE: 4.4.7 Bypassed
3857 * When a general entity reference appears in the EntityValue in
3858 * an entity declaration, it is bypassed and left as is.
3859 * so XML_SUBSTITUTE_REF is not set here.
3861 ++ctxt->depth;
3862 ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3863 0, 0, 0, /* check */ 1);
3864 --ctxt->depth;
3866 if (orig != NULL) {
3867 *orig = buf;
3868 buf = NULL;
3871 error:
3872 if (buf != NULL)
3873 xmlFree(buf);
3874 return(ret);
3878 * xmlParseAttValueComplex:
3879 * @ctxt: an XML parser context
3880 * @len: the resulting attribute len
3881 * @normalize: whether to apply the inner normalization
3883 * parse a value for an attribute, this is the fallback function
3884 * of xmlParseAttValue() when the attribute parsing requires handling
3885 * of non-ASCII characters, or normalization compaction.
3887 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3889 static xmlChar *
3890 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3891 xmlChar limit = 0;
3892 xmlChar *buf = NULL;
3893 xmlChar *rep = NULL;
3894 size_t len = 0;
3895 size_t buf_size = 0;
3896 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3897 XML_MAX_HUGE_LENGTH :
3898 XML_MAX_TEXT_LENGTH;
3899 int c, l, in_space = 0;
3900 xmlChar *current = NULL;
3901 xmlEntityPtr ent;
3903 if (NXT(0) == '"') {
3904 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3905 limit = '"';
3906 NEXT;
3907 } else if (NXT(0) == '\'') {
3908 limit = '\'';
3909 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3910 NEXT;
3911 } else {
3912 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3913 return(NULL);
3917 * allocate a translation buffer.
3919 buf_size = XML_PARSER_BUFFER_SIZE;
3920 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3921 if (buf == NULL) goto mem_error;
3924 * OK loop until we reach one of the ending char or a size limit.
3926 c = CUR_CHAR(l);
3927 while (((NXT(0) != limit) && /* checked */
3928 (IS_CHAR(c)) && (c != '<')) &&
3929 (ctxt->instate != XML_PARSER_EOF)) {
3930 if (c == '&') {
3931 in_space = 0;
3932 if (NXT(1) == '#') {
3933 int val = xmlParseCharRef(ctxt);
3935 if (val == '&') {
3936 if (ctxt->replaceEntities) {
3937 if (len + 10 > buf_size) {
3938 growBuffer(buf, 10);
3940 buf[len++] = '&';
3941 } else {
3943 * The reparsing will be done in xmlStringGetNodeList()
3944 * called by the attribute() function in SAX.c
3946 if (len + 10 > buf_size) {
3947 growBuffer(buf, 10);
3949 buf[len++] = '&';
3950 buf[len++] = '#';
3951 buf[len++] = '3';
3952 buf[len++] = '8';
3953 buf[len++] = ';';
3955 } else if (val != 0) {
3956 if (len + 10 > buf_size) {
3957 growBuffer(buf, 10);
3959 len += xmlCopyChar(0, &buf[len], val);
3961 } else {
3962 ent = xmlParseEntityRef(ctxt);
3963 if ((ent != NULL) &&
3964 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3965 if (len + 10 > buf_size) {
3966 growBuffer(buf, 10);
3968 if ((ctxt->replaceEntities == 0) &&
3969 (ent->content[0] == '&')) {
3970 buf[len++] = '&';
3971 buf[len++] = '#';
3972 buf[len++] = '3';
3973 buf[len++] = '8';
3974 buf[len++] = ';';
3975 } else {
3976 buf[len++] = ent->content[0];
3978 } else if ((ent != NULL) &&
3979 (ctxt->replaceEntities != 0)) {
3980 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3981 if (xmlParserEntityCheck(ctxt, ent->length))
3982 goto error;
3984 ++ctxt->depth;
3985 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
3986 ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
3987 /* check */ 1);
3988 --ctxt->depth;
3989 if (rep != NULL) {
3990 current = rep;
3991 while (*current != 0) { /* non input consuming */
3992 if ((*current == 0xD) || (*current == 0xA) ||
3993 (*current == 0x9)) {
3994 buf[len++] = 0x20;
3995 current++;
3996 } else
3997 buf[len++] = *current++;
3998 if (len + 10 > buf_size) {
3999 growBuffer(buf, 10);
4002 xmlFree(rep);
4003 rep = NULL;
4005 } else {
4006 if (len + 10 > buf_size) {
4007 growBuffer(buf, 10);
4009 if (ent->content != NULL)
4010 buf[len++] = ent->content[0];
4012 } else if (ent != NULL) {
4013 int i = xmlStrlen(ent->name);
4014 const xmlChar *cur = ent->name;
4017 * We also check for recursion and amplification
4018 * when entities are not substituted. They're
4019 * often expanded later.
4021 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4022 (ent->content != NULL)) {
4023 if ((ent->flags & XML_ENT_CHECKED) == 0) {
4024 unsigned long oldCopy = ctxt->sizeentcopy;
4026 ctxt->sizeentcopy = ent->length;
4028 ++ctxt->depth;
4029 rep = xmlStringDecodeEntitiesInt(ctxt,
4030 ent->content, ent->length,
4031 XML_SUBSTITUTE_REF, 0, 0, 0,
4032 /* check */ 1);
4033 --ctxt->depth;
4036 * If we're parsing DTD content, the entity
4037 * might reference other entities which
4038 * weren't defined yet, so the check isn't
4039 * reliable.
4041 if (ctxt->inSubset == 0) {
4042 ent->flags |= XML_ENT_CHECKED;
4043 ent->expandedSize = ctxt->sizeentcopy;
4046 if (rep != NULL) {
4047 xmlFree(rep);
4048 rep = NULL;
4049 } else {
4050 ent->content[0] = 0;
4053 if (xmlParserEntityCheck(ctxt, oldCopy))
4054 goto error;
4055 } else {
4056 if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4057 goto error;
4062 * Just output the reference
4064 buf[len++] = '&';
4065 while (len + i + 10 > buf_size) {
4066 growBuffer(buf, i + 10);
4068 for (;i > 0;i--)
4069 buf[len++] = *cur++;
4070 buf[len++] = ';';
4073 } else {
4074 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4075 if ((len != 0) || (!normalize)) {
4076 if ((!normalize) || (!in_space)) {
4077 COPY_BUF(l,buf,len,0x20);
4078 while (len + 10 > buf_size) {
4079 growBuffer(buf, 10);
4082 in_space = 1;
4084 } else {
4085 in_space = 0;
4086 COPY_BUF(l,buf,len,c);
4087 if (len + 10 > buf_size) {
4088 growBuffer(buf, 10);
4091 NEXTL(l);
4093 GROW;
4094 c = CUR_CHAR(l);
4095 if (len > maxLength) {
4096 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4097 "AttValue length too long\n");
4098 goto mem_error;
4101 if (ctxt->instate == XML_PARSER_EOF)
4102 goto error;
4104 if ((in_space) && (normalize)) {
4105 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4107 buf[len] = 0;
4108 if (RAW == '<') {
4109 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4110 } else if (RAW != limit) {
4111 if ((c != 0) && (!IS_CHAR(c))) {
4112 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4113 "invalid character in attribute value\n");
4114 } else {
4115 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4116 "AttValue: ' expected\n");
4118 } else
4119 NEXT;
4121 if (attlen != NULL) *attlen = len;
4122 return(buf);
4124 mem_error:
4125 xmlErrMemory(ctxt, NULL);
4126 error:
4127 if (buf != NULL)
4128 xmlFree(buf);
4129 if (rep != NULL)
4130 xmlFree(rep);
4131 return(NULL);
4135 * xmlParseAttValue:
4136 * @ctxt: an XML parser context
4138 * DEPRECATED: Internal function, don't use.
4140 * parse a value for an attribute
4141 * Note: the parser won't do substitution of entities here, this
4142 * will be handled later in xmlStringGetNodeList
4144 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4145 * "'" ([^<&'] | Reference)* "'"
4147 * 3.3.3 Attribute-Value Normalization:
4148 * Before the value of an attribute is passed to the application or
4149 * checked for validity, the XML processor must normalize it as follows:
4150 * - a character reference is processed by appending the referenced
4151 * character to the attribute value
4152 * - an entity reference is processed by recursively processing the
4153 * replacement text of the entity
4154 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4155 * appending #x20 to the normalized value, except that only a single
4156 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4157 * parsed entity or the literal entity value of an internal parsed entity
4158 * - other characters are processed by appending them to the normalized value
4159 * If the declared value is not CDATA, then the XML processor must further
4160 * process the normalized attribute value by discarding any leading and
4161 * trailing space (#x20) characters, and by replacing sequences of space
4162 * (#x20) characters by a single space (#x20) character.
4163 * All attributes for which no declaration has been read should be treated
4164 * by a non-validating parser as if declared CDATA.
4166 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4170 xmlChar *
4171 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4172 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4173 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4177 * xmlParseSystemLiteral:
4178 * @ctxt: an XML parser context
4180 * DEPRECATED: Internal function, don't use.
4182 * parse an XML Literal
4184 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4186 * Returns the SystemLiteral parsed or NULL
4189 xmlChar *
4190 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4191 xmlChar *buf = NULL;
4192 int len = 0;
4193 int size = XML_PARSER_BUFFER_SIZE;
4194 int cur, l;
4195 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4196 XML_MAX_TEXT_LENGTH :
4197 XML_MAX_NAME_LENGTH;
4198 xmlChar stop;
4199 int state = ctxt->instate;
4201 if (RAW == '"') {
4202 NEXT;
4203 stop = '"';
4204 } else if (RAW == '\'') {
4205 NEXT;
4206 stop = '\'';
4207 } else {
4208 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4209 return(NULL);
4212 buf = (xmlChar *) xmlMallocAtomic(size);
4213 if (buf == NULL) {
4214 xmlErrMemory(ctxt, NULL);
4215 return(NULL);
4217 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4218 cur = CUR_CHAR(l);
4219 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4220 if (len + 5 >= size) {
4221 xmlChar *tmp;
4223 size *= 2;
4224 tmp = (xmlChar *) xmlRealloc(buf, size);
4225 if (tmp == NULL) {
4226 xmlFree(buf);
4227 xmlErrMemory(ctxt, NULL);
4228 ctxt->instate = (xmlParserInputState) state;
4229 return(NULL);
4231 buf = tmp;
4233 COPY_BUF(l,buf,len,cur);
4234 if (len > maxLength) {
4235 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4236 xmlFree(buf);
4237 ctxt->instate = (xmlParserInputState) state;
4238 return(NULL);
4240 NEXTL(l);
4241 cur = CUR_CHAR(l);
4243 buf[len] = 0;
4244 if (ctxt->instate == XML_PARSER_EOF) {
4245 xmlFree(buf);
4246 return(NULL);
4248 ctxt->instate = (xmlParserInputState) state;
4249 if (!IS_CHAR(cur)) {
4250 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4251 } else {
4252 NEXT;
4254 return(buf);
4258 * xmlParsePubidLiteral:
4259 * @ctxt: an XML parser context
4261 * DEPRECATED: Internal function, don't use.
4263 * parse an XML public literal
4265 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4267 * Returns the PubidLiteral parsed or NULL.
4270 xmlChar *
4271 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4272 xmlChar *buf = NULL;
4273 int len = 0;
4274 int size = XML_PARSER_BUFFER_SIZE;
4275 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4276 XML_MAX_TEXT_LENGTH :
4277 XML_MAX_NAME_LENGTH;
4278 xmlChar cur;
4279 xmlChar stop;
4280 xmlParserInputState oldstate = ctxt->instate;
4282 if (RAW == '"') {
4283 NEXT;
4284 stop = '"';
4285 } else if (RAW == '\'') {
4286 NEXT;
4287 stop = '\'';
4288 } else {
4289 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4290 return(NULL);
4292 buf = (xmlChar *) xmlMallocAtomic(size);
4293 if (buf == NULL) {
4294 xmlErrMemory(ctxt, NULL);
4295 return(NULL);
4297 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4298 cur = CUR;
4299 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4300 if (len + 1 >= size) {
4301 xmlChar *tmp;
4303 size *= 2;
4304 tmp = (xmlChar *) xmlRealloc(buf, size);
4305 if (tmp == NULL) {
4306 xmlErrMemory(ctxt, NULL);
4307 xmlFree(buf);
4308 return(NULL);
4310 buf = tmp;
4312 buf[len++] = cur;
4313 if (len > maxLength) {
4314 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4315 xmlFree(buf);
4316 return(NULL);
4318 NEXT;
4319 cur = CUR;
4321 buf[len] = 0;
4322 if (ctxt->instate == XML_PARSER_EOF) {
4323 xmlFree(buf);
4324 return(NULL);
4326 if (cur != stop) {
4327 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4328 } else {
4329 NEXTL(1);
4331 ctxt->instate = oldstate;
4332 return(buf);
4335 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4338 * used for the test in the inner loop of the char data testing
4340 static const unsigned char test_char_data[256] = {
4341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4343 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4346 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4347 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4348 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4349 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4350 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4351 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4352 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4353 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4354 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4355 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4356 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4376 * xmlParseCharDataInternal:
4377 * @ctxt: an XML parser context
4378 * @partial: buffer may contain partial UTF-8 sequences
4380 * Parse character data. Always makes progress if the first char isn't
4381 * '<' or '&'.
4383 * The right angle bracket (>) may be represented using the string "&gt;",
4384 * and must, for compatibility, be escaped using "&gt;" or a character
4385 * reference when it appears in the string "]]>" in content, when that
4386 * string is not marking the end of a CDATA section.
4388 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4390 static void
4391 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4392 const xmlChar *in;
4393 int nbchar = 0;
4394 int line = ctxt->input->line;
4395 int col = ctxt->input->col;
4396 int ccol;
4398 GROW;
4400 * Accelerated common case where input don't need to be
4401 * modified before passing it to the handler.
4403 in = ctxt->input->cur;
4404 do {
4405 get_more_space:
4406 while (*in == 0x20) { in++; ctxt->input->col++; }
4407 if (*in == 0xA) {
4408 do {
4409 ctxt->input->line++; ctxt->input->col = 1;
4410 in++;
4411 } while (*in == 0xA);
4412 goto get_more_space;
4414 if (*in == '<') {
4415 nbchar = in - ctxt->input->cur;
4416 if (nbchar > 0) {
4417 const xmlChar *tmp = ctxt->input->cur;
4418 ctxt->input->cur = in;
4420 if ((ctxt->sax != NULL) &&
4421 (ctxt->sax->ignorableWhitespace !=
4422 ctxt->sax->characters)) {
4423 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4424 if (ctxt->sax->ignorableWhitespace != NULL)
4425 ctxt->sax->ignorableWhitespace(ctxt->userData,
4426 tmp, nbchar);
4427 } else {
4428 if (ctxt->sax->characters != NULL)
4429 ctxt->sax->characters(ctxt->userData,
4430 tmp, nbchar);
4431 if (*ctxt->space == -1)
4432 *ctxt->space = -2;
4434 } else if ((ctxt->sax != NULL) &&
4435 (ctxt->sax->characters != NULL)) {
4436 ctxt->sax->characters(ctxt->userData,
4437 tmp, nbchar);
4440 return;
4443 get_more:
4444 ccol = ctxt->input->col;
4445 while (test_char_data[*in]) {
4446 in++;
4447 ccol++;
4449 ctxt->input->col = ccol;
4450 if (*in == 0xA) {
4451 do {
4452 ctxt->input->line++; ctxt->input->col = 1;
4453 in++;
4454 } while (*in == 0xA);
4455 goto get_more;
4457 if (*in == ']') {
4458 if ((in[1] == ']') && (in[2] == '>')) {
4459 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4460 if (ctxt->instate != XML_PARSER_EOF)
4461 ctxt->input->cur = in + 1;
4462 return;
4464 in++;
4465 ctxt->input->col++;
4466 goto get_more;
4468 nbchar = in - ctxt->input->cur;
4469 if (nbchar > 0) {
4470 if ((ctxt->sax != NULL) &&
4471 (ctxt->sax->ignorableWhitespace !=
4472 ctxt->sax->characters) &&
4473 (IS_BLANK_CH(*ctxt->input->cur))) {
4474 const xmlChar *tmp = ctxt->input->cur;
4475 ctxt->input->cur = in;
4477 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4478 if (ctxt->sax->ignorableWhitespace != NULL)
4479 ctxt->sax->ignorableWhitespace(ctxt->userData,
4480 tmp, nbchar);
4481 } else {
4482 if (ctxt->sax->characters != NULL)
4483 ctxt->sax->characters(ctxt->userData,
4484 tmp, nbchar);
4485 if (*ctxt->space == -1)
4486 *ctxt->space = -2;
4488 line = ctxt->input->line;
4489 col = ctxt->input->col;
4490 } else if (ctxt->sax != NULL) {
4491 if (ctxt->sax->characters != NULL)
4492 ctxt->sax->characters(ctxt->userData,
4493 ctxt->input->cur, nbchar);
4494 line = ctxt->input->line;
4495 col = ctxt->input->col;
4498 ctxt->input->cur = in;
4499 if (*in == 0xD) {
4500 in++;
4501 if (*in == 0xA) {
4502 ctxt->input->cur = in;
4503 in++;
4504 ctxt->input->line++; ctxt->input->col = 1;
4505 continue; /* while */
4507 in--;
4509 if (*in == '<') {
4510 return;
4512 if (*in == '&') {
4513 return;
4515 SHRINK;
4516 GROW;
4517 if (ctxt->instate == XML_PARSER_EOF)
4518 return;
4519 in = ctxt->input->cur;
4520 } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4521 (*in == 0x09) || (*in == 0x0a));
4522 ctxt->input->line = line;
4523 ctxt->input->col = col;
4524 xmlParseCharDataComplex(ctxt, partial);
4528 * xmlParseCharDataComplex:
4529 * @ctxt: an XML parser context
4530 * @cdata: int indicating whether we are within a CDATA section
4532 * Always makes progress if the first char isn't '<' or '&'.
4534 * parse a CharData section.this is the fallback function
4535 * of xmlParseCharData() when the parsing requires handling
4536 * of non-ASCII characters.
4538 static void
4539 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4540 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4541 int nbchar = 0;
4542 int cur, l;
4544 cur = CUR_CHAR(l);
4545 while ((cur != '<') && /* checked */
4546 (cur != '&') &&
4547 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4548 if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4549 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4551 COPY_BUF(l,buf,nbchar,cur);
4552 /* move current position before possible calling of ctxt->sax->characters */
4553 NEXTL(l);
4554 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4555 buf[nbchar] = 0;
4558 * OK the segment is to be consumed as chars.
4560 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4561 if (areBlanks(ctxt, buf, nbchar, 0)) {
4562 if (ctxt->sax->ignorableWhitespace != NULL)
4563 ctxt->sax->ignorableWhitespace(ctxt->userData,
4564 buf, nbchar);
4565 } else {
4566 if (ctxt->sax->characters != NULL)
4567 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4568 if ((ctxt->sax->characters !=
4569 ctxt->sax->ignorableWhitespace) &&
4570 (*ctxt->space == -1))
4571 *ctxt->space = -2;
4574 nbchar = 0;
4575 /* something really bad happened in the SAX callback */
4576 if (ctxt->instate != XML_PARSER_CONTENT)
4577 return;
4578 SHRINK;
4580 cur = CUR_CHAR(l);
4582 if (ctxt->instate == XML_PARSER_EOF)
4583 return;
4584 if (nbchar != 0) {
4585 buf[nbchar] = 0;
4587 * OK the segment is to be consumed as chars.
4589 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4590 if (areBlanks(ctxt, buf, nbchar, 0)) {
4591 if (ctxt->sax->ignorableWhitespace != NULL)
4592 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4593 } else {
4594 if (ctxt->sax->characters != NULL)
4595 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4596 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4597 (*ctxt->space == -1))
4598 *ctxt->space = -2;
4603 * cur == 0 can mean
4605 * - XML_PARSER_EOF or memory error. This is checked above.
4606 * - An actual 0 character.
4607 * - End of buffer.
4608 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4610 if (ctxt->input->cur < ctxt->input->end) {
4611 if ((cur == 0) && (CUR != 0)) {
4612 if (partial == 0) {
4613 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4614 "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4615 NEXTL(1);
4617 } else if ((cur != '<') && (cur != '&')) {
4618 /* Generate the error and skip the offending character */
4619 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4620 "PCDATA invalid Char value %d\n", cur);
4621 NEXTL(l);
4627 * xmlParseCharData:
4628 * @ctxt: an XML parser context
4629 * @cdata: unused
4631 * DEPRECATED: Internal function, don't use.
4633 void
4634 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4635 xmlParseCharDataInternal(ctxt, 0);
4639 * xmlParseExternalID:
4640 * @ctxt: an XML parser context
4641 * @publicID: a xmlChar** receiving PubidLiteral
4642 * @strict: indicate whether we should restrict parsing to only
4643 * production [75], see NOTE below
4645 * DEPRECATED: Internal function, don't use.
4647 * Parse an External ID or a Public ID
4649 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4650 * 'PUBLIC' S PubidLiteral S SystemLiteral
4652 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4653 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4655 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4657 * Returns the function returns SystemLiteral and in the second
4658 * case publicID receives PubidLiteral, is strict is off
4659 * it is possible to return NULL and have publicID set.
4662 xmlChar *
4663 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4664 xmlChar *URI = NULL;
4666 *publicID = NULL;
4667 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4668 SKIP(6);
4669 if (SKIP_BLANKS == 0) {
4670 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4671 "Space required after 'SYSTEM'\n");
4673 URI = xmlParseSystemLiteral(ctxt);
4674 if (URI == NULL) {
4675 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4677 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4678 SKIP(6);
4679 if (SKIP_BLANKS == 0) {
4680 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4681 "Space required after 'PUBLIC'\n");
4683 *publicID = xmlParsePubidLiteral(ctxt);
4684 if (*publicID == NULL) {
4685 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4687 if (strict) {
4689 * We don't handle [83] so "S SystemLiteral" is required.
4691 if (SKIP_BLANKS == 0) {
4692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693 "Space required after the Public Identifier\n");
4695 } else {
4697 * We handle [83] so we return immediately, if
4698 * "S SystemLiteral" is not detected. We skip blanks if no
4699 * system literal was found, but this is harmless since we must
4700 * be at the end of a NotationDecl.
4702 if (SKIP_BLANKS == 0) return(NULL);
4703 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4705 URI = xmlParseSystemLiteral(ctxt);
4706 if (URI == NULL) {
4707 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4710 return(URI);
4714 * xmlParseCommentComplex:
4715 * @ctxt: an XML parser context
4716 * @buf: the already parsed part of the buffer
4717 * @len: number of bytes in the buffer
4718 * @size: allocated size of the buffer
4720 * Skip an XML (SGML) comment <!-- .... -->
4721 * The spec says that "For compatibility, the string "--" (double-hyphen)
4722 * must not occur within comments. "
4723 * This is the slow routine in case the accelerator for ascii didn't work
4725 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4727 static void
4728 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4729 size_t len, size_t size) {
4730 int q, ql;
4731 int r, rl;
4732 int cur, l;
4733 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4734 XML_MAX_HUGE_LENGTH :
4735 XML_MAX_TEXT_LENGTH;
4736 int inputid;
4738 inputid = ctxt->input->id;
4740 if (buf == NULL) {
4741 len = 0;
4742 size = XML_PARSER_BUFFER_SIZE;
4743 buf = (xmlChar *) xmlMallocAtomic(size);
4744 if (buf == NULL) {
4745 xmlErrMemory(ctxt, NULL);
4746 return;
4749 q = CUR_CHAR(ql);
4750 if (q == 0)
4751 goto not_terminated;
4752 if (!IS_CHAR(q)) {
4753 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4754 "xmlParseComment: invalid xmlChar value %d\n",
4756 xmlFree (buf);
4757 return;
4759 NEXTL(ql);
4760 r = CUR_CHAR(rl);
4761 if (r == 0)
4762 goto not_terminated;
4763 if (!IS_CHAR(r)) {
4764 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4765 "xmlParseComment: invalid xmlChar value %d\n",
4767 xmlFree (buf);
4768 return;
4770 NEXTL(rl);
4771 cur = CUR_CHAR(l);
4772 if (cur == 0)
4773 goto not_terminated;
4774 while (IS_CHAR(cur) && /* checked */
4775 ((cur != '>') ||
4776 (r != '-') || (q != '-'))) {
4777 if ((r == '-') && (q == '-')) {
4778 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4780 if (len + 5 >= size) {
4781 xmlChar *new_buf;
4782 size_t new_size;
4784 new_size = size * 2;
4785 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4786 if (new_buf == NULL) {
4787 xmlFree (buf);
4788 xmlErrMemory(ctxt, NULL);
4789 return;
4791 buf = new_buf;
4792 size = new_size;
4794 COPY_BUF(ql,buf,len,q);
4795 if (len > maxLength) {
4796 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4797 "Comment too big found", NULL);
4798 xmlFree (buf);
4799 return;
4802 q = r;
4803 ql = rl;
4804 r = cur;
4805 rl = l;
4807 NEXTL(l);
4808 cur = CUR_CHAR(l);
4811 buf[len] = 0;
4812 if (ctxt->instate == XML_PARSER_EOF) {
4813 xmlFree(buf);
4814 return;
4816 if (cur == 0) {
4817 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4818 "Comment not terminated \n<!--%.50s\n", buf);
4819 } else if (!IS_CHAR(cur)) {
4820 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4821 "xmlParseComment: invalid xmlChar value %d\n",
4822 cur);
4823 } else {
4824 if (inputid != ctxt->input->id) {
4825 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4826 "Comment doesn't start and stop in the same"
4827 " entity\n");
4829 NEXT;
4830 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4831 (!ctxt->disableSAX))
4832 ctxt->sax->comment(ctxt->userData, buf);
4834 xmlFree(buf);
4835 return;
4836 not_terminated:
4837 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4838 "Comment not terminated\n", NULL);
4839 xmlFree(buf);
4840 return;
4844 * xmlParseComment:
4845 * @ctxt: an XML parser context
4847 * DEPRECATED: Internal function, don't use.
4849 * Parse an XML (SGML) comment. Always consumes '<!'.
4851 * The spec says that "For compatibility, the string "--" (double-hyphen)
4852 * must not occur within comments. "
4854 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4856 void
4857 xmlParseComment(xmlParserCtxtPtr ctxt) {
4858 xmlChar *buf = NULL;
4859 size_t size = XML_PARSER_BUFFER_SIZE;
4860 size_t len = 0;
4861 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4862 XML_MAX_HUGE_LENGTH :
4863 XML_MAX_TEXT_LENGTH;
4864 xmlParserInputState state;
4865 const xmlChar *in;
4866 size_t nbchar = 0;
4867 int ccol;
4868 int inputid;
4871 * Check that there is a comment right here.
4873 if ((RAW != '<') || (NXT(1) != '!'))
4874 return;
4875 SKIP(2);
4876 if ((RAW != '-') || (NXT(1) != '-'))
4877 return;
4878 state = ctxt->instate;
4879 ctxt->instate = XML_PARSER_COMMENT;
4880 inputid = ctxt->input->id;
4881 SKIP(2);
4882 GROW;
4885 * Accelerated common case where input don't need to be
4886 * modified before passing it to the handler.
4888 in = ctxt->input->cur;
4889 do {
4890 if (*in == 0xA) {
4891 do {
4892 ctxt->input->line++; ctxt->input->col = 1;
4893 in++;
4894 } while (*in == 0xA);
4896 get_more:
4897 ccol = ctxt->input->col;
4898 while (((*in > '-') && (*in <= 0x7F)) ||
4899 ((*in >= 0x20) && (*in < '-')) ||
4900 (*in == 0x09)) {
4901 in++;
4902 ccol++;
4904 ctxt->input->col = ccol;
4905 if (*in == 0xA) {
4906 do {
4907 ctxt->input->line++; ctxt->input->col = 1;
4908 in++;
4909 } while (*in == 0xA);
4910 goto get_more;
4912 nbchar = in - ctxt->input->cur;
4914 * save current set of data
4916 if (nbchar > 0) {
4917 if ((ctxt->sax != NULL) &&
4918 (ctxt->sax->comment != NULL)) {
4919 if (buf == NULL) {
4920 if ((*in == '-') && (in[1] == '-'))
4921 size = nbchar + 1;
4922 else
4923 size = XML_PARSER_BUFFER_SIZE + nbchar;
4924 buf = (xmlChar *) xmlMallocAtomic(size);
4925 if (buf == NULL) {
4926 xmlErrMemory(ctxt, NULL);
4927 ctxt->instate = state;
4928 return;
4930 len = 0;
4931 } else if (len + nbchar + 1 >= size) {
4932 xmlChar *new_buf;
4933 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4934 new_buf = (xmlChar *) xmlRealloc(buf, size);
4935 if (new_buf == NULL) {
4936 xmlFree (buf);
4937 xmlErrMemory(ctxt, NULL);
4938 ctxt->instate = state;
4939 return;
4941 buf = new_buf;
4943 memcpy(&buf[len], ctxt->input->cur, nbchar);
4944 len += nbchar;
4945 buf[len] = 0;
4948 if (len > maxLength) {
4949 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4950 "Comment too big found", NULL);
4951 xmlFree (buf);
4952 return;
4954 ctxt->input->cur = in;
4955 if (*in == 0xA) {
4956 in++;
4957 ctxt->input->line++; ctxt->input->col = 1;
4959 if (*in == 0xD) {
4960 in++;
4961 if (*in == 0xA) {
4962 ctxt->input->cur = in;
4963 in++;
4964 ctxt->input->line++; ctxt->input->col = 1;
4965 goto get_more;
4967 in--;
4969 SHRINK;
4970 GROW;
4971 if (ctxt->instate == XML_PARSER_EOF) {
4972 xmlFree(buf);
4973 return;
4975 in = ctxt->input->cur;
4976 if (*in == '-') {
4977 if (in[1] == '-') {
4978 if (in[2] == '>') {
4979 if (ctxt->input->id != inputid) {
4980 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4981 "comment doesn't start and stop in the"
4982 " same entity\n");
4984 SKIP(3);
4985 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4986 (!ctxt->disableSAX)) {
4987 if (buf != NULL)
4988 ctxt->sax->comment(ctxt->userData, buf);
4989 else
4990 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4992 if (buf != NULL)
4993 xmlFree(buf);
4994 if (ctxt->instate != XML_PARSER_EOF)
4995 ctxt->instate = state;
4996 return;
4998 if (buf != NULL) {
4999 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5000 "Double hyphen within comment: "
5001 "<!--%.50s\n",
5002 buf);
5003 } else
5004 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5005 "Double hyphen within comment\n", NULL);
5006 if (ctxt->instate == XML_PARSER_EOF) {
5007 xmlFree(buf);
5008 return;
5010 in++;
5011 ctxt->input->col++;
5013 in++;
5014 ctxt->input->col++;
5015 goto get_more;
5017 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5018 xmlParseCommentComplex(ctxt, buf, len, size);
5019 ctxt->instate = state;
5020 return;
5025 * xmlParsePITarget:
5026 * @ctxt: an XML parser context
5028 * DEPRECATED: Internal function, don't use.
5030 * parse the name of a PI
5032 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5034 * Returns the PITarget name or NULL
5037 const xmlChar *
5038 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5039 const xmlChar *name;
5041 name = xmlParseName(ctxt);
5042 if ((name != NULL) &&
5043 ((name[0] == 'x') || (name[0] == 'X')) &&
5044 ((name[1] == 'm') || (name[1] == 'M')) &&
5045 ((name[2] == 'l') || (name[2] == 'L'))) {
5046 int i;
5047 if ((name[0] == 'x') && (name[1] == 'm') &&
5048 (name[2] == 'l') && (name[3] == 0)) {
5049 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5050 "XML declaration allowed only at the start of the document\n");
5051 return(name);
5052 } else if (name[3] == 0) {
5053 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5054 return(name);
5056 for (i = 0;;i++) {
5057 if (xmlW3CPIs[i] == NULL) break;
5058 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5059 return(name);
5061 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5062 "xmlParsePITarget: invalid name prefix 'xml'\n",
5063 NULL, NULL);
5065 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5066 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5067 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5069 return(name);
5072 #ifdef LIBXML_CATALOG_ENABLED
5074 * xmlParseCatalogPI:
5075 * @ctxt: an XML parser context
5076 * @catalog: the PI value string
5078 * parse an XML Catalog Processing Instruction.
5080 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5082 * Occurs only if allowed by the user and if happening in the Misc
5083 * part of the document before any doctype information
5084 * This will add the given catalog to the parsing context in order
5085 * to be used if there is a resolution need further down in the document
5088 static void
5089 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5090 xmlChar *URL = NULL;
5091 const xmlChar *tmp, *base;
5092 xmlChar marker;
5094 tmp = catalog;
5095 while (IS_BLANK_CH(*tmp)) tmp++;
5096 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5097 goto error;
5098 tmp += 7;
5099 while (IS_BLANK_CH(*tmp)) tmp++;
5100 if (*tmp != '=') {
5101 return;
5103 tmp++;
5104 while (IS_BLANK_CH(*tmp)) tmp++;
5105 marker = *tmp;
5106 if ((marker != '\'') && (marker != '"'))
5107 goto error;
5108 tmp++;
5109 base = tmp;
5110 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5111 if (*tmp == 0)
5112 goto error;
5113 URL = xmlStrndup(base, tmp - base);
5114 tmp++;
5115 while (IS_BLANK_CH(*tmp)) tmp++;
5116 if (*tmp != 0)
5117 goto error;
5119 if (URL != NULL) {
5120 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5121 xmlFree(URL);
5123 return;
5125 error:
5126 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5127 "Catalog PI syntax error: %s\n",
5128 catalog, NULL);
5129 if (URL != NULL)
5130 xmlFree(URL);
5132 #endif
5135 * xmlParsePI:
5136 * @ctxt: an XML parser context
5138 * DEPRECATED: Internal function, don't use.
5140 * parse an XML Processing Instruction.
5142 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5144 * The processing is transferred to SAX once parsed.
5147 void
5148 xmlParsePI(xmlParserCtxtPtr ctxt) {
5149 xmlChar *buf = NULL;
5150 size_t len = 0;
5151 size_t size = XML_PARSER_BUFFER_SIZE;
5152 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5153 XML_MAX_HUGE_LENGTH :
5154 XML_MAX_TEXT_LENGTH;
5155 int cur, l;
5156 const xmlChar *target;
5157 xmlParserInputState state;
5159 if ((RAW == '<') && (NXT(1) == '?')) {
5160 int inputid = ctxt->input->id;
5161 state = ctxt->instate;
5162 ctxt->instate = XML_PARSER_PI;
5164 * this is a Processing Instruction.
5166 SKIP(2);
5169 * Parse the target name and check for special support like
5170 * namespace.
5172 target = xmlParsePITarget(ctxt);
5173 if (target != NULL) {
5174 if ((RAW == '?') && (NXT(1) == '>')) {
5175 if (inputid != ctxt->input->id) {
5176 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5177 "PI declaration doesn't start and stop in"
5178 " the same entity\n");
5180 SKIP(2);
5183 * SAX: PI detected.
5185 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5186 (ctxt->sax->processingInstruction != NULL))
5187 ctxt->sax->processingInstruction(ctxt->userData,
5188 target, NULL);
5189 if (ctxt->instate != XML_PARSER_EOF)
5190 ctxt->instate = state;
5191 return;
5193 buf = (xmlChar *) xmlMallocAtomic(size);
5194 if (buf == NULL) {
5195 xmlErrMemory(ctxt, NULL);
5196 ctxt->instate = state;
5197 return;
5199 if (SKIP_BLANKS == 0) {
5200 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5201 "ParsePI: PI %s space expected\n", target);
5203 cur = CUR_CHAR(l);
5204 while (IS_CHAR(cur) && /* checked */
5205 ((cur != '?') || (NXT(1) != '>'))) {
5206 if (len + 5 >= size) {
5207 xmlChar *tmp;
5208 size_t new_size = size * 2;
5209 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5210 if (tmp == NULL) {
5211 xmlErrMemory(ctxt, NULL);
5212 xmlFree(buf);
5213 ctxt->instate = state;
5214 return;
5216 buf = tmp;
5217 size = new_size;
5219 COPY_BUF(l,buf,len,cur);
5220 if (len > maxLength) {
5221 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5222 "PI %s too big found", target);
5223 xmlFree(buf);
5224 ctxt->instate = state;
5225 return;
5227 NEXTL(l);
5228 cur = CUR_CHAR(l);
5230 buf[len] = 0;
5231 if (ctxt->instate == XML_PARSER_EOF) {
5232 xmlFree(buf);
5233 return;
5235 if (cur != '?') {
5236 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5237 "ParsePI: PI %s never end ...\n", target);
5238 } else {
5239 if (inputid != ctxt->input->id) {
5240 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5241 "PI declaration doesn't start and stop in"
5242 " the same entity\n");
5244 SKIP(2);
5246 #ifdef LIBXML_CATALOG_ENABLED
5247 if (((state == XML_PARSER_MISC) ||
5248 (state == XML_PARSER_START)) &&
5249 (xmlStrEqual(target, XML_CATALOG_PI))) {
5250 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5251 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5252 (allow == XML_CATA_ALLOW_ALL))
5253 xmlParseCatalogPI(ctxt, buf);
5255 #endif
5259 * SAX: PI detected.
5261 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5262 (ctxt->sax->processingInstruction != NULL))
5263 ctxt->sax->processingInstruction(ctxt->userData,
5264 target, buf);
5266 xmlFree(buf);
5267 } else {
5268 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5270 if (ctxt->instate != XML_PARSER_EOF)
5271 ctxt->instate = state;
5276 * xmlParseNotationDecl:
5277 * @ctxt: an XML parser context
5279 * DEPRECATED: Internal function, don't use.
5281 * Parse a notation declaration. Always consumes '<!'.
5283 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5285 * Hence there is actually 3 choices:
5286 * 'PUBLIC' S PubidLiteral
5287 * 'PUBLIC' S PubidLiteral S SystemLiteral
5288 * and 'SYSTEM' S SystemLiteral
5290 * See the NOTE on xmlParseExternalID().
5293 void
5294 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5295 const xmlChar *name;
5296 xmlChar *Pubid;
5297 xmlChar *Systemid;
5299 if ((CUR != '<') || (NXT(1) != '!'))
5300 return;
5301 SKIP(2);
5303 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5304 int inputid = ctxt->input->id;
5305 SKIP(8);
5306 if (SKIP_BLANKS == 0) {
5307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5308 "Space required after '<!NOTATION'\n");
5309 return;
5312 name = xmlParseName(ctxt);
5313 if (name == NULL) {
5314 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5315 return;
5317 if (xmlStrchr(name, ':') != NULL) {
5318 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5319 "colons are forbidden from notation names '%s'\n",
5320 name, NULL, NULL);
5322 if (SKIP_BLANKS == 0) {
5323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5324 "Space required after the NOTATION name'\n");
5325 return;
5329 * Parse the IDs.
5331 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5332 SKIP_BLANKS;
5334 if (RAW == '>') {
5335 if (inputid != ctxt->input->id) {
5336 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5337 "Notation declaration doesn't start and stop"
5338 " in the same entity\n");
5340 NEXT;
5341 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5342 (ctxt->sax->notationDecl != NULL))
5343 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5344 } else {
5345 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5347 if (Systemid != NULL) xmlFree(Systemid);
5348 if (Pubid != NULL) xmlFree(Pubid);
5353 * xmlParseEntityDecl:
5354 * @ctxt: an XML parser context
5356 * DEPRECATED: Internal function, don't use.
5358 * Parse an entity declaration. Always consumes '<!'.
5360 * [70] EntityDecl ::= GEDecl | PEDecl
5362 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5364 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5366 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5368 * [74] PEDef ::= EntityValue | ExternalID
5370 * [76] NDataDecl ::= S 'NDATA' S Name
5372 * [ VC: Notation Declared ]
5373 * The Name must match the declared name of a notation.
5376 void
5377 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5378 const xmlChar *name = NULL;
5379 xmlChar *value = NULL;
5380 xmlChar *URI = NULL, *literal = NULL;
5381 const xmlChar *ndata = NULL;
5382 int isParameter = 0;
5383 xmlChar *orig = NULL;
5385 if ((CUR != '<') || (NXT(1) != '!'))
5386 return;
5387 SKIP(2);
5389 /* GROW; done in the caller */
5390 if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5391 int inputid = ctxt->input->id;
5392 SKIP(6);
5393 if (SKIP_BLANKS == 0) {
5394 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5395 "Space required after '<!ENTITY'\n");
5398 if (RAW == '%') {
5399 NEXT;
5400 if (SKIP_BLANKS == 0) {
5401 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5402 "Space required after '%%'\n");
5404 isParameter = 1;
5407 name = xmlParseName(ctxt);
5408 if (name == NULL) {
5409 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5410 "xmlParseEntityDecl: no name\n");
5411 return;
5413 if (xmlStrchr(name, ':') != NULL) {
5414 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5415 "colons are forbidden from entities names '%s'\n",
5416 name, NULL, NULL);
5418 if (SKIP_BLANKS == 0) {
5419 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5420 "Space required after the entity name\n");
5423 ctxt->instate = XML_PARSER_ENTITY_DECL;
5425 * handle the various case of definitions...
5427 if (isParameter) {
5428 if ((RAW == '"') || (RAW == '\'')) {
5429 value = xmlParseEntityValue(ctxt, &orig);
5430 if (value) {
5431 if ((ctxt->sax != NULL) &&
5432 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5433 ctxt->sax->entityDecl(ctxt->userData, name,
5434 XML_INTERNAL_PARAMETER_ENTITY,
5435 NULL, NULL, value);
5437 } else {
5438 URI = xmlParseExternalID(ctxt, &literal, 1);
5439 if ((URI == NULL) && (literal == NULL)) {
5440 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5442 if (URI) {
5443 xmlURIPtr uri;
5445 uri = xmlParseURI((const char *) URI);
5446 if (uri == NULL) {
5447 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5448 "Invalid URI: %s\n", URI);
5450 * This really ought to be a well formedness error
5451 * but the XML Core WG decided otherwise c.f. issue
5452 * E26 of the XML erratas.
5454 } else {
5455 if (uri->fragment != NULL) {
5457 * Okay this is foolish to block those but not
5458 * invalid URIs.
5460 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5461 } else {
5462 if ((ctxt->sax != NULL) &&
5463 (!ctxt->disableSAX) &&
5464 (ctxt->sax->entityDecl != NULL))
5465 ctxt->sax->entityDecl(ctxt->userData, name,
5466 XML_EXTERNAL_PARAMETER_ENTITY,
5467 literal, URI, NULL);
5469 xmlFreeURI(uri);
5473 } else {
5474 if ((RAW == '"') || (RAW == '\'')) {
5475 value = xmlParseEntityValue(ctxt, &orig);
5476 if ((ctxt->sax != NULL) &&
5477 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5478 ctxt->sax->entityDecl(ctxt->userData, name,
5479 XML_INTERNAL_GENERAL_ENTITY,
5480 NULL, NULL, value);
5482 * For expat compatibility in SAX mode.
5484 if ((ctxt->myDoc == NULL) ||
5485 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5486 if (ctxt->myDoc == NULL) {
5487 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5488 if (ctxt->myDoc == NULL) {
5489 xmlErrMemory(ctxt, "New Doc failed");
5490 goto done;
5492 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5494 if (ctxt->myDoc->intSubset == NULL)
5495 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5496 BAD_CAST "fake", NULL, NULL);
5498 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5499 NULL, NULL, value);
5501 } else {
5502 URI = xmlParseExternalID(ctxt, &literal, 1);
5503 if ((URI == NULL) && (literal == NULL)) {
5504 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5506 if (URI) {
5507 xmlURIPtr uri;
5509 uri = xmlParseURI((const char *)URI);
5510 if (uri == NULL) {
5511 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5512 "Invalid URI: %s\n", URI);
5514 * This really ought to be a well formedness error
5515 * but the XML Core WG decided otherwise c.f. issue
5516 * E26 of the XML erratas.
5518 } else {
5519 if (uri->fragment != NULL) {
5521 * Okay this is foolish to block those but not
5522 * invalid URIs.
5524 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5526 xmlFreeURI(uri);
5529 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5531 "Space required before 'NDATA'\n");
5533 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5534 SKIP(5);
5535 if (SKIP_BLANKS == 0) {
5536 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5537 "Space required after 'NDATA'\n");
5539 ndata = xmlParseName(ctxt);
5540 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5541 (ctxt->sax->unparsedEntityDecl != NULL))
5542 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5543 literal, URI, ndata);
5544 } else {
5545 if ((ctxt->sax != NULL) &&
5546 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5547 ctxt->sax->entityDecl(ctxt->userData, name,
5548 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5549 literal, URI, NULL);
5551 * For expat compatibility in SAX mode.
5552 * assuming the entity replacement was asked for
5554 if ((ctxt->replaceEntities != 0) &&
5555 ((ctxt->myDoc == NULL) ||
5556 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5557 if (ctxt->myDoc == NULL) {
5558 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5559 if (ctxt->myDoc == NULL) {
5560 xmlErrMemory(ctxt, "New Doc failed");
5561 goto done;
5563 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5566 if (ctxt->myDoc->intSubset == NULL)
5567 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5568 BAD_CAST "fake", NULL, NULL);
5569 xmlSAX2EntityDecl(ctxt, name,
5570 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5571 literal, URI, NULL);
5576 if (ctxt->instate == XML_PARSER_EOF)
5577 goto done;
5578 SKIP_BLANKS;
5579 if (RAW != '>') {
5580 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5581 "xmlParseEntityDecl: entity %s not terminated\n", name);
5582 xmlHaltParser(ctxt);
5583 } else {
5584 if (inputid != ctxt->input->id) {
5585 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5586 "Entity declaration doesn't start and stop in"
5587 " the same entity\n");
5589 NEXT;
5591 if (orig != NULL) {
5593 * Ugly mechanism to save the raw entity value.
5595 xmlEntityPtr cur = NULL;
5597 if (isParameter) {
5598 if ((ctxt->sax != NULL) &&
5599 (ctxt->sax->getParameterEntity != NULL))
5600 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5601 } else {
5602 if ((ctxt->sax != NULL) &&
5603 (ctxt->sax->getEntity != NULL))
5604 cur = ctxt->sax->getEntity(ctxt->userData, name);
5605 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5606 cur = xmlSAX2GetEntity(ctxt, name);
5609 if ((cur != NULL) && (cur->orig == NULL)) {
5610 cur->orig = orig;
5611 orig = NULL;
5615 done:
5616 if (value != NULL) xmlFree(value);
5617 if (URI != NULL) xmlFree(URI);
5618 if (literal != NULL) xmlFree(literal);
5619 if (orig != NULL) xmlFree(orig);
5624 * xmlParseDefaultDecl:
5625 * @ctxt: an XML parser context
5626 * @value: Receive a possible fixed default value for the attribute
5628 * DEPRECATED: Internal function, don't use.
5630 * Parse an attribute default declaration
5632 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5634 * [ VC: Required Attribute ]
5635 * if the default declaration is the keyword #REQUIRED, then the
5636 * attribute must be specified for all elements of the type in the
5637 * attribute-list declaration.
5639 * [ VC: Attribute Default Legal ]
5640 * The declared default value must meet the lexical constraints of
5641 * the declared attribute type c.f. xmlValidateAttributeDecl()
5643 * [ VC: Fixed Attribute Default ]
5644 * if an attribute has a default value declared with the #FIXED
5645 * keyword, instances of that attribute must match the default value.
5647 * [ WFC: No < in Attribute Values ]
5648 * handled in xmlParseAttValue()
5650 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5651 * or XML_ATTRIBUTE_FIXED.
5655 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5656 int val;
5657 xmlChar *ret;
5659 *value = NULL;
5660 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5661 SKIP(9);
5662 return(XML_ATTRIBUTE_REQUIRED);
5664 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5665 SKIP(8);
5666 return(XML_ATTRIBUTE_IMPLIED);
5668 val = XML_ATTRIBUTE_NONE;
5669 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5670 SKIP(6);
5671 val = XML_ATTRIBUTE_FIXED;
5672 if (SKIP_BLANKS == 0) {
5673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5674 "Space required after '#FIXED'\n");
5677 ret = xmlParseAttValue(ctxt);
5678 ctxt->instate = XML_PARSER_DTD;
5679 if (ret == NULL) {
5680 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5681 "Attribute default value declaration error\n");
5682 } else
5683 *value = ret;
5684 return(val);
5688 * xmlParseNotationType:
5689 * @ctxt: an XML parser context
5691 * DEPRECATED: Internal function, don't use.
5693 * parse an Notation attribute type.
5695 * Note: the leading 'NOTATION' S part has already being parsed...
5697 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5699 * [ VC: Notation Attributes ]
5700 * Values of this type must match one of the notation names included
5701 * in the declaration; all notation names in the declaration must be declared.
5703 * Returns: the notation attribute tree built while parsing
5706 xmlEnumerationPtr
5707 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5708 const xmlChar *name;
5709 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5711 if (RAW != '(') {
5712 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5713 return(NULL);
5715 do {
5716 NEXT;
5717 SKIP_BLANKS;
5718 name = xmlParseName(ctxt);
5719 if (name == NULL) {
5720 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5721 "Name expected in NOTATION declaration\n");
5722 xmlFreeEnumeration(ret);
5723 return(NULL);
5725 tmp = ret;
5726 while (tmp != NULL) {
5727 if (xmlStrEqual(name, tmp->name)) {
5728 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5729 "standalone: attribute notation value token %s duplicated\n",
5730 name, NULL);
5731 if (!xmlDictOwns(ctxt->dict, name))
5732 xmlFree((xmlChar *) name);
5733 break;
5735 tmp = tmp->next;
5737 if (tmp == NULL) {
5738 cur = xmlCreateEnumeration(name);
5739 if (cur == NULL) {
5740 xmlFreeEnumeration(ret);
5741 return(NULL);
5743 if (last == NULL) ret = last = cur;
5744 else {
5745 last->next = cur;
5746 last = cur;
5749 SKIP_BLANKS;
5750 } while (RAW == '|');
5751 if (RAW != ')') {
5752 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5753 xmlFreeEnumeration(ret);
5754 return(NULL);
5756 NEXT;
5757 return(ret);
5761 * xmlParseEnumerationType:
5762 * @ctxt: an XML parser context
5764 * DEPRECATED: Internal function, don't use.
5766 * parse an Enumeration attribute type.
5768 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5770 * [ VC: Enumeration ]
5771 * Values of this type must match one of the Nmtoken tokens in
5772 * the declaration
5774 * Returns: the enumeration attribute tree built while parsing
5777 xmlEnumerationPtr
5778 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5779 xmlChar *name;
5780 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5782 if (RAW != '(') {
5783 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5784 return(NULL);
5786 do {
5787 NEXT;
5788 SKIP_BLANKS;
5789 name = xmlParseNmtoken(ctxt);
5790 if (name == NULL) {
5791 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5792 return(ret);
5794 tmp = ret;
5795 while (tmp != NULL) {
5796 if (xmlStrEqual(name, tmp->name)) {
5797 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5798 "standalone: attribute enumeration value token %s duplicated\n",
5799 name, NULL);
5800 if (!xmlDictOwns(ctxt->dict, name))
5801 xmlFree(name);
5802 break;
5804 tmp = tmp->next;
5806 if (tmp == NULL) {
5807 cur = xmlCreateEnumeration(name);
5808 if (!xmlDictOwns(ctxt->dict, name))
5809 xmlFree(name);
5810 if (cur == NULL) {
5811 xmlFreeEnumeration(ret);
5812 return(NULL);
5814 if (last == NULL) ret = last = cur;
5815 else {
5816 last->next = cur;
5817 last = cur;
5820 SKIP_BLANKS;
5821 } while (RAW == '|');
5822 if (RAW != ')') {
5823 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5824 return(ret);
5826 NEXT;
5827 return(ret);
5831 * xmlParseEnumeratedType:
5832 * @ctxt: an XML parser context
5833 * @tree: the enumeration tree built while parsing
5835 * DEPRECATED: Internal function, don't use.
5837 * parse an Enumerated attribute type.
5839 * [57] EnumeratedType ::= NotationType | Enumeration
5841 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5844 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5848 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5849 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5850 SKIP(8);
5851 if (SKIP_BLANKS == 0) {
5852 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5853 "Space required after 'NOTATION'\n");
5854 return(0);
5856 *tree = xmlParseNotationType(ctxt);
5857 if (*tree == NULL) return(0);
5858 return(XML_ATTRIBUTE_NOTATION);
5860 *tree = xmlParseEnumerationType(ctxt);
5861 if (*tree == NULL) return(0);
5862 return(XML_ATTRIBUTE_ENUMERATION);
5866 * xmlParseAttributeType:
5867 * @ctxt: an XML parser context
5868 * @tree: the enumeration tree built while parsing
5870 * DEPRECATED: Internal function, don't use.
5872 * parse the Attribute list def for an element
5874 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5876 * [55] StringType ::= 'CDATA'
5878 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5879 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5881 * Validity constraints for attribute values syntax are checked in
5882 * xmlValidateAttributeValue()
5884 * [ VC: ID ]
5885 * Values of type ID must match the Name production. A name must not
5886 * appear more than once in an XML document as a value of this type;
5887 * i.e., ID values must uniquely identify the elements which bear them.
5889 * [ VC: One ID per Element Type ]
5890 * No element type may have more than one ID attribute specified.
5892 * [ VC: ID Attribute Default ]
5893 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5895 * [ VC: IDREF ]
5896 * Values of type IDREF must match the Name production, and values
5897 * of type IDREFS must match Names; each IDREF Name must match the value
5898 * of an ID attribute on some element in the XML document; i.e. IDREF
5899 * values must match the value of some ID attribute.
5901 * [ VC: Entity Name ]
5902 * Values of type ENTITY must match the Name production, values
5903 * of type ENTITIES must match Names; each Entity Name must match the
5904 * name of an unparsed entity declared in the DTD.
5906 * [ VC: Name Token ]
5907 * Values of type NMTOKEN must match the Nmtoken production; values
5908 * of type NMTOKENS must match Nmtokens.
5910 * Returns the attribute type
5913 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5914 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5915 SKIP(5);
5916 return(XML_ATTRIBUTE_CDATA);
5917 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5918 SKIP(6);
5919 return(XML_ATTRIBUTE_IDREFS);
5920 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5921 SKIP(5);
5922 return(XML_ATTRIBUTE_IDREF);
5923 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5924 SKIP(2);
5925 return(XML_ATTRIBUTE_ID);
5926 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5927 SKIP(6);
5928 return(XML_ATTRIBUTE_ENTITY);
5929 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5930 SKIP(8);
5931 return(XML_ATTRIBUTE_ENTITIES);
5932 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5933 SKIP(8);
5934 return(XML_ATTRIBUTE_NMTOKENS);
5935 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5936 SKIP(7);
5937 return(XML_ATTRIBUTE_NMTOKEN);
5939 return(xmlParseEnumeratedType(ctxt, tree));
5943 * xmlParseAttributeListDecl:
5944 * @ctxt: an XML parser context
5946 * DEPRECATED: Internal function, don't use.
5948 * Parse an attribute list declaration for an element. Always consumes '<!'.
5950 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5952 * [53] AttDef ::= S Name S AttType S DefaultDecl
5955 void
5956 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5957 const xmlChar *elemName;
5958 const xmlChar *attrName;
5959 xmlEnumerationPtr tree;
5961 if ((CUR != '<') || (NXT(1) != '!'))
5962 return;
5963 SKIP(2);
5965 if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5966 int inputid = ctxt->input->id;
5968 SKIP(7);
5969 if (SKIP_BLANKS == 0) {
5970 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5971 "Space required after '<!ATTLIST'\n");
5973 elemName = xmlParseName(ctxt);
5974 if (elemName == NULL) {
5975 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5976 "ATTLIST: no name for Element\n");
5977 return;
5979 SKIP_BLANKS;
5980 GROW;
5981 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5982 int type;
5983 int def;
5984 xmlChar *defaultValue = NULL;
5986 GROW;
5987 tree = NULL;
5988 attrName = xmlParseName(ctxt);
5989 if (attrName == NULL) {
5990 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5991 "ATTLIST: no name for Attribute\n");
5992 break;
5994 GROW;
5995 if (SKIP_BLANKS == 0) {
5996 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5997 "Space required after the attribute name\n");
5998 break;
6001 type = xmlParseAttributeType(ctxt, &tree);
6002 if (type <= 0) {
6003 break;
6006 GROW;
6007 if (SKIP_BLANKS == 0) {
6008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6009 "Space required after the attribute type\n");
6010 if (tree != NULL)
6011 xmlFreeEnumeration(tree);
6012 break;
6015 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6016 if (def <= 0) {
6017 if (defaultValue != NULL)
6018 xmlFree(defaultValue);
6019 if (tree != NULL)
6020 xmlFreeEnumeration(tree);
6021 break;
6023 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6024 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6026 GROW;
6027 if (RAW != '>') {
6028 if (SKIP_BLANKS == 0) {
6029 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6030 "Space required after the attribute default value\n");
6031 if (defaultValue != NULL)
6032 xmlFree(defaultValue);
6033 if (tree != NULL)
6034 xmlFreeEnumeration(tree);
6035 break;
6038 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6039 (ctxt->sax->attributeDecl != NULL))
6040 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6041 type, def, defaultValue, tree);
6042 else if (tree != NULL)
6043 xmlFreeEnumeration(tree);
6045 if ((ctxt->sax2) && (defaultValue != NULL) &&
6046 (def != XML_ATTRIBUTE_IMPLIED) &&
6047 (def != XML_ATTRIBUTE_REQUIRED)) {
6048 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6050 if (ctxt->sax2) {
6051 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6053 if (defaultValue != NULL)
6054 xmlFree(defaultValue);
6055 GROW;
6057 if (RAW == '>') {
6058 if (inputid != ctxt->input->id) {
6059 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6060 "Attribute list declaration doesn't start and"
6061 " stop in the same entity\n");
6063 NEXT;
6069 * xmlParseElementMixedContentDecl:
6070 * @ctxt: an XML parser context
6071 * @inputchk: the input used for the current entity, needed for boundary checks
6073 * DEPRECATED: Internal function, don't use.
6075 * parse the declaration for a Mixed Element content
6076 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6078 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6079 * '(' S? '#PCDATA' S? ')'
6081 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6083 * [ VC: No Duplicate Types ]
6084 * The same name must not appear more than once in a single
6085 * mixed-content declaration.
6087 * returns: the list of the xmlElementContentPtr describing the element choices
6089 xmlElementContentPtr
6090 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6091 xmlElementContentPtr ret = NULL, cur = NULL, n;
6092 const xmlChar *elem = NULL;
6094 GROW;
6095 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6096 SKIP(7);
6097 SKIP_BLANKS;
6098 if (RAW == ')') {
6099 if (ctxt->input->id != inputchk) {
6100 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6101 "Element content declaration doesn't start and"
6102 " stop in the same entity\n");
6104 NEXT;
6105 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6106 if (ret == NULL)
6107 return(NULL);
6108 if (RAW == '*') {
6109 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6110 NEXT;
6112 return(ret);
6114 if ((RAW == '(') || (RAW == '|')) {
6115 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6116 if (ret == NULL) return(NULL);
6118 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6119 NEXT;
6120 if (elem == NULL) {
6121 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6122 if (ret == NULL) {
6123 xmlFreeDocElementContent(ctxt->myDoc, cur);
6124 return(NULL);
6126 ret->c1 = cur;
6127 if (cur != NULL)
6128 cur->parent = ret;
6129 cur = ret;
6130 } else {
6131 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6132 if (n == NULL) {
6133 xmlFreeDocElementContent(ctxt->myDoc, ret);
6134 return(NULL);
6136 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6137 if (n->c1 != NULL)
6138 n->c1->parent = n;
6139 cur->c2 = n;
6140 if (n != NULL)
6141 n->parent = cur;
6142 cur = n;
6144 SKIP_BLANKS;
6145 elem = xmlParseName(ctxt);
6146 if (elem == NULL) {
6147 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6148 "xmlParseElementMixedContentDecl : Name expected\n");
6149 xmlFreeDocElementContent(ctxt->myDoc, ret);
6150 return(NULL);
6152 SKIP_BLANKS;
6153 GROW;
6155 if ((RAW == ')') && (NXT(1) == '*')) {
6156 if (elem != NULL) {
6157 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6158 XML_ELEMENT_CONTENT_ELEMENT);
6159 if (cur->c2 != NULL)
6160 cur->c2->parent = cur;
6162 if (ret != NULL)
6163 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6164 if (ctxt->input->id != inputchk) {
6165 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6166 "Element content declaration doesn't start and"
6167 " stop in the same entity\n");
6169 SKIP(2);
6170 } else {
6171 xmlFreeDocElementContent(ctxt->myDoc, ret);
6172 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6173 return(NULL);
6176 } else {
6177 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6179 return(ret);
6183 * xmlParseElementChildrenContentDeclPriv:
6184 * @ctxt: an XML parser context
6185 * @inputchk: the input used for the current entity, needed for boundary checks
6186 * @depth: the level of recursion
6188 * parse the declaration for a Mixed Element content
6189 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6192 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6194 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6196 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6198 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6200 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6201 * TODO Parameter-entity replacement text must be properly nested
6202 * with parenthesized groups. That is to say, if either of the
6203 * opening or closing parentheses in a choice, seq, or Mixed
6204 * construct is contained in the replacement text for a parameter
6205 * entity, both must be contained in the same replacement text. For
6206 * interoperability, if a parameter-entity reference appears in a
6207 * choice, seq, or Mixed construct, its replacement text should not
6208 * be empty, and neither the first nor last non-blank character of
6209 * the replacement text should be a connector (| or ,).
6211 * Returns the tree of xmlElementContentPtr describing the element
6212 * hierarchy.
6214 static xmlElementContentPtr
6215 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6216 int depth) {
6217 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6218 const xmlChar *elem;
6219 xmlChar type = 0;
6221 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6222 (depth > 2048)) {
6223 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6224 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6225 depth);
6226 return(NULL);
6228 SKIP_BLANKS;
6229 GROW;
6230 if (RAW == '(') {
6231 int inputid = ctxt->input->id;
6233 /* Recurse on first child */
6234 NEXT;
6235 SKIP_BLANKS;
6236 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6237 depth + 1);
6238 if (cur == NULL)
6239 return(NULL);
6240 SKIP_BLANKS;
6241 GROW;
6242 } else {
6243 elem = xmlParseName(ctxt);
6244 if (elem == NULL) {
6245 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6246 return(NULL);
6248 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6249 if (cur == NULL) {
6250 xmlErrMemory(ctxt, NULL);
6251 return(NULL);
6253 GROW;
6254 if (RAW == '?') {
6255 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6256 NEXT;
6257 } else if (RAW == '*') {
6258 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6259 NEXT;
6260 } else if (RAW == '+') {
6261 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6262 NEXT;
6263 } else {
6264 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6266 GROW;
6268 SKIP_BLANKS;
6269 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6271 * Each loop we parse one separator and one element.
6273 if (RAW == ',') {
6274 if (type == 0) type = CUR;
6277 * Detect "Name | Name , Name" error
6279 else if (type != CUR) {
6280 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6281 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6282 type);
6283 if ((last != NULL) && (last != ret))
6284 xmlFreeDocElementContent(ctxt->myDoc, last);
6285 if (ret != NULL)
6286 xmlFreeDocElementContent(ctxt->myDoc, ret);
6287 return(NULL);
6289 NEXT;
6291 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6292 if (op == NULL) {
6293 if ((last != NULL) && (last != ret))
6294 xmlFreeDocElementContent(ctxt->myDoc, last);
6295 xmlFreeDocElementContent(ctxt->myDoc, ret);
6296 return(NULL);
6298 if (last == NULL) {
6299 op->c1 = ret;
6300 if (ret != NULL)
6301 ret->parent = op;
6302 ret = cur = op;
6303 } else {
6304 cur->c2 = op;
6305 if (op != NULL)
6306 op->parent = cur;
6307 op->c1 = last;
6308 if (last != NULL)
6309 last->parent = op;
6310 cur =op;
6311 last = NULL;
6313 } else if (RAW == '|') {
6314 if (type == 0) type = CUR;
6317 * Detect "Name , Name | Name" error
6319 else if (type != CUR) {
6320 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6321 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6322 type);
6323 if ((last != NULL) && (last != ret))
6324 xmlFreeDocElementContent(ctxt->myDoc, last);
6325 if (ret != NULL)
6326 xmlFreeDocElementContent(ctxt->myDoc, ret);
6327 return(NULL);
6329 NEXT;
6331 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6332 if (op == NULL) {
6333 if ((last != NULL) && (last != ret))
6334 xmlFreeDocElementContent(ctxt->myDoc, last);
6335 if (ret != NULL)
6336 xmlFreeDocElementContent(ctxt->myDoc, ret);
6337 return(NULL);
6339 if (last == NULL) {
6340 op->c1 = ret;
6341 if (ret != NULL)
6342 ret->parent = op;
6343 ret = cur = op;
6344 } else {
6345 cur->c2 = op;
6346 if (op != NULL)
6347 op->parent = cur;
6348 op->c1 = last;
6349 if (last != NULL)
6350 last->parent = op;
6351 cur =op;
6352 last = NULL;
6354 } else {
6355 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6356 if ((last != NULL) && (last != ret))
6357 xmlFreeDocElementContent(ctxt->myDoc, last);
6358 if (ret != NULL)
6359 xmlFreeDocElementContent(ctxt->myDoc, ret);
6360 return(NULL);
6362 GROW;
6363 SKIP_BLANKS;
6364 GROW;
6365 if (RAW == '(') {
6366 int inputid = ctxt->input->id;
6367 /* Recurse on second child */
6368 NEXT;
6369 SKIP_BLANKS;
6370 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6371 depth + 1);
6372 if (last == NULL) {
6373 if (ret != NULL)
6374 xmlFreeDocElementContent(ctxt->myDoc, ret);
6375 return(NULL);
6377 SKIP_BLANKS;
6378 } else {
6379 elem = xmlParseName(ctxt);
6380 if (elem == NULL) {
6381 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6382 if (ret != NULL)
6383 xmlFreeDocElementContent(ctxt->myDoc, ret);
6384 return(NULL);
6386 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6387 if (last == NULL) {
6388 if (ret != NULL)
6389 xmlFreeDocElementContent(ctxt->myDoc, ret);
6390 return(NULL);
6392 if (RAW == '?') {
6393 last->ocur = XML_ELEMENT_CONTENT_OPT;
6394 NEXT;
6395 } else if (RAW == '*') {
6396 last->ocur = XML_ELEMENT_CONTENT_MULT;
6397 NEXT;
6398 } else if (RAW == '+') {
6399 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6400 NEXT;
6401 } else {
6402 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6405 SKIP_BLANKS;
6406 GROW;
6408 if ((cur != NULL) && (last != NULL)) {
6409 cur->c2 = last;
6410 if (last != NULL)
6411 last->parent = cur;
6413 if (ctxt->input->id != inputchk) {
6414 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6415 "Element content declaration doesn't start and stop in"
6416 " the same entity\n");
6418 NEXT;
6419 if (RAW == '?') {
6420 if (ret != NULL) {
6421 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6422 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6423 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6424 else
6425 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6427 NEXT;
6428 } else if (RAW == '*') {
6429 if (ret != NULL) {
6430 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6431 cur = ret;
6433 * Some normalization:
6434 * (a | b* | c?)* == (a | b | c)*
6436 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6437 if ((cur->c1 != NULL) &&
6438 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6439 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6440 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6441 if ((cur->c2 != NULL) &&
6442 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6443 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6444 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6445 cur = cur->c2;
6448 NEXT;
6449 } else if (RAW == '+') {
6450 if (ret != NULL) {
6451 int found = 0;
6453 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6454 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6455 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6456 else
6457 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6459 * Some normalization:
6460 * (a | b*)+ == (a | b)*
6461 * (a | b?)+ == (a | b)*
6463 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6464 if ((cur->c1 != NULL) &&
6465 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6466 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6467 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6468 found = 1;
6470 if ((cur->c2 != NULL) &&
6471 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6472 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6473 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6474 found = 1;
6476 cur = cur->c2;
6478 if (found)
6479 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6481 NEXT;
6483 return(ret);
6487 * xmlParseElementChildrenContentDecl:
6488 * @ctxt: an XML parser context
6489 * @inputchk: the input used for the current entity, needed for boundary checks
6491 * DEPRECATED: Internal function, don't use.
6493 * parse the declaration for a Mixed Element content
6494 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6496 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6498 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6500 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6502 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6504 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6505 * TODO Parameter-entity replacement text must be properly nested
6506 * with parenthesized groups. That is to say, if either of the
6507 * opening or closing parentheses in a choice, seq, or Mixed
6508 * construct is contained in the replacement text for a parameter
6509 * entity, both must be contained in the same replacement text. For
6510 * interoperability, if a parameter-entity reference appears in a
6511 * choice, seq, or Mixed construct, its replacement text should not
6512 * be empty, and neither the first nor last non-blank character of
6513 * the replacement text should be a connector (| or ,).
6515 * Returns the tree of xmlElementContentPtr describing the element
6516 * hierarchy.
6518 xmlElementContentPtr
6519 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6520 /* stub left for API/ABI compat */
6521 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6525 * xmlParseElementContentDecl:
6526 * @ctxt: an XML parser context
6527 * @name: the name of the element being defined.
6528 * @result: the Element Content pointer will be stored here if any
6530 * DEPRECATED: Internal function, don't use.
6532 * parse the declaration for an Element content either Mixed or Children,
6533 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6535 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6537 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6541 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6542 xmlElementContentPtr *result) {
6544 xmlElementContentPtr tree = NULL;
6545 int inputid = ctxt->input->id;
6546 int res;
6548 *result = NULL;
6550 if (RAW != '(') {
6551 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6552 "xmlParseElementContentDecl : %s '(' expected\n", name);
6553 return(-1);
6555 NEXT;
6556 GROW;
6557 if (ctxt->instate == XML_PARSER_EOF)
6558 return(-1);
6559 SKIP_BLANKS;
6560 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6561 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6562 res = XML_ELEMENT_TYPE_MIXED;
6563 } else {
6564 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6565 res = XML_ELEMENT_TYPE_ELEMENT;
6567 SKIP_BLANKS;
6568 *result = tree;
6569 return(res);
6573 * xmlParseElementDecl:
6574 * @ctxt: an XML parser context
6576 * DEPRECATED: Internal function, don't use.
6578 * Parse an element declaration. Always consumes '<!'.
6580 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6582 * [ VC: Unique Element Type Declaration ]
6583 * No element type may be declared more than once
6585 * Returns the type of the element, or -1 in case of error
6588 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6589 const xmlChar *name;
6590 int ret = -1;
6591 xmlElementContentPtr content = NULL;
6593 if ((CUR != '<') || (NXT(1) != '!'))
6594 return(ret);
6595 SKIP(2);
6597 /* GROW; done in the caller */
6598 if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6599 int inputid = ctxt->input->id;
6601 SKIP(7);
6602 if (SKIP_BLANKS == 0) {
6603 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6604 "Space required after 'ELEMENT'\n");
6605 return(-1);
6607 name = xmlParseName(ctxt);
6608 if (name == NULL) {
6609 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6610 "xmlParseElementDecl: no name for Element\n");
6611 return(-1);
6613 if (SKIP_BLANKS == 0) {
6614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6615 "Space required after the element name\n");
6617 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6618 SKIP(5);
6620 * Element must always be empty.
6622 ret = XML_ELEMENT_TYPE_EMPTY;
6623 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6624 (NXT(2) == 'Y')) {
6625 SKIP(3);
6627 * Element is a generic container.
6629 ret = XML_ELEMENT_TYPE_ANY;
6630 } else if (RAW == '(') {
6631 ret = xmlParseElementContentDecl(ctxt, name, &content);
6632 } else {
6634 * [ WFC: PEs in Internal Subset ] error handling.
6636 if ((RAW == '%') && (ctxt->external == 0) &&
6637 (ctxt->inputNr == 1)) {
6638 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6639 "PEReference: forbidden within markup decl in internal subset\n");
6640 } else {
6641 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6642 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6644 return(-1);
6647 SKIP_BLANKS;
6649 if (RAW != '>') {
6650 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6651 if (content != NULL) {
6652 xmlFreeDocElementContent(ctxt->myDoc, content);
6654 } else {
6655 if (inputid != ctxt->input->id) {
6656 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6657 "Element declaration doesn't start and stop in"
6658 " the same entity\n");
6661 NEXT;
6662 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6663 (ctxt->sax->elementDecl != NULL)) {
6664 if (content != NULL)
6665 content->parent = NULL;
6666 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6667 content);
6668 if ((content != NULL) && (content->parent == NULL)) {
6670 * this is a trick: if xmlAddElementDecl is called,
6671 * instead of copying the full tree it is plugged directly
6672 * if called from the parser. Avoid duplicating the
6673 * interfaces or change the API/ABI
6675 xmlFreeDocElementContent(ctxt->myDoc, content);
6677 } else if (content != NULL) {
6678 xmlFreeDocElementContent(ctxt->myDoc, content);
6682 return(ret);
6686 * xmlParseConditionalSections
6687 * @ctxt: an XML parser context
6689 * Parse a conditional section. Always consumes '<!['.
6691 * [61] conditionalSect ::= includeSect | ignoreSect
6692 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6693 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6694 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6695 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6698 static void
6699 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6700 int *inputIds = NULL;
6701 size_t inputIdsSize = 0;
6702 size_t depth = 0;
6704 while (ctxt->instate != XML_PARSER_EOF) {
6705 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6706 int id = ctxt->input->id;
6708 SKIP(3);
6709 SKIP_BLANKS;
6711 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6712 SKIP(7);
6713 SKIP_BLANKS;
6714 if (RAW != '[') {
6715 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6716 xmlHaltParser(ctxt);
6717 goto error;
6719 if (ctxt->input->id != id) {
6720 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6721 "All markup of the conditional section is"
6722 " not in the same entity\n");
6724 NEXT;
6726 if (inputIdsSize <= depth) {
6727 int *tmp;
6729 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6730 tmp = (int *) xmlRealloc(inputIds,
6731 inputIdsSize * sizeof(int));
6732 if (tmp == NULL) {
6733 xmlErrMemory(ctxt, NULL);
6734 goto error;
6736 inputIds = tmp;
6738 inputIds[depth] = id;
6739 depth++;
6740 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6741 size_t ignoreDepth = 0;
6743 SKIP(6);
6744 SKIP_BLANKS;
6745 if (RAW != '[') {
6746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6747 xmlHaltParser(ctxt);
6748 goto error;
6750 if (ctxt->input->id != id) {
6751 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6752 "All markup of the conditional section is"
6753 " not in the same entity\n");
6755 NEXT;
6757 while (RAW != 0) {
6758 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759 SKIP(3);
6760 ignoreDepth++;
6761 /* Check for integer overflow */
6762 if (ignoreDepth == 0) {
6763 xmlErrMemory(ctxt, NULL);
6764 goto error;
6766 } else if ((RAW == ']') && (NXT(1) == ']') &&
6767 (NXT(2) == '>')) {
6768 if (ignoreDepth == 0)
6769 break;
6770 SKIP(3);
6771 ignoreDepth--;
6772 } else {
6773 NEXT;
6777 if (RAW == 0) {
6778 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6779 goto error;
6781 if (ctxt->input->id != id) {
6782 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6783 "All markup of the conditional section is"
6784 " not in the same entity\n");
6786 SKIP(3);
6787 } else {
6788 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6789 xmlHaltParser(ctxt);
6790 goto error;
6792 } else if ((depth > 0) &&
6793 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6794 depth--;
6795 if (ctxt->input->id != inputIds[depth]) {
6796 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6797 "All markup of the conditional section is not"
6798 " in the same entity\n");
6800 SKIP(3);
6801 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6802 xmlParseMarkupDecl(ctxt);
6803 } else {
6804 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6805 xmlHaltParser(ctxt);
6806 goto error;
6809 if (depth == 0)
6810 break;
6812 SKIP_BLANKS;
6813 SHRINK;
6814 GROW;
6817 error:
6818 xmlFree(inputIds);
6822 * xmlParseMarkupDecl:
6823 * @ctxt: an XML parser context
6825 * DEPRECATED: Internal function, don't use.
6827 * Parse markup declarations. Always consumes '<!' or '<?'.
6829 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6830 * NotationDecl | PI | Comment
6832 * [ VC: Proper Declaration/PE Nesting ]
6833 * Parameter-entity replacement text must be properly nested with
6834 * markup declarations. That is to say, if either the first character
6835 * or the last character of a markup declaration (markupdecl above) is
6836 * contained in the replacement text for a parameter-entity reference,
6837 * both must be contained in the same replacement text.
6839 * [ WFC: PEs in Internal Subset ]
6840 * In the internal DTD subset, parameter-entity references can occur
6841 * only where markup declarations can occur, not within markup declarations.
6842 * (This does not apply to references that occur in external parameter
6843 * entities or to the external subset.)
6845 void
6846 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6847 GROW;
6848 if (CUR == '<') {
6849 if (NXT(1) == '!') {
6850 switch (NXT(2)) {
6851 case 'E':
6852 if (NXT(3) == 'L')
6853 xmlParseElementDecl(ctxt);
6854 else if (NXT(3) == 'N')
6855 xmlParseEntityDecl(ctxt);
6856 else
6857 SKIP(2);
6858 break;
6859 case 'A':
6860 xmlParseAttributeListDecl(ctxt);
6861 break;
6862 case 'N':
6863 xmlParseNotationDecl(ctxt);
6864 break;
6865 case '-':
6866 xmlParseComment(ctxt);
6867 break;
6868 default:
6869 /* there is an error but it will be detected later */
6870 SKIP(2);
6871 break;
6873 } else if (NXT(1) == '?') {
6874 xmlParsePI(ctxt);
6879 * detect requirement to exit there and act accordingly
6880 * and avoid having instate overridden later on
6882 if (ctxt->instate == XML_PARSER_EOF)
6883 return;
6885 ctxt->instate = XML_PARSER_DTD;
6889 * xmlParseTextDecl:
6890 * @ctxt: an XML parser context
6892 * DEPRECATED: Internal function, don't use.
6894 * parse an XML declaration header for external entities
6896 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6899 void
6900 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6901 xmlChar *version;
6902 const xmlChar *encoding;
6903 int oldstate;
6906 * We know that '<?xml' is here.
6908 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6909 SKIP(5);
6910 } else {
6911 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6912 return;
6915 /* Avoid expansion of parameter entities when skipping blanks. */
6916 oldstate = ctxt->instate;
6917 ctxt->instate = XML_PARSER_START;
6919 if (SKIP_BLANKS == 0) {
6920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6921 "Space needed after '<?xml'\n");
6925 * We may have the VersionInfo here.
6927 version = xmlParseVersionInfo(ctxt);
6928 if (version == NULL)
6929 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6930 else {
6931 if (SKIP_BLANKS == 0) {
6932 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6933 "Space needed here\n");
6936 ctxt->input->version = version;
6939 * We must have the encoding declaration
6941 encoding = xmlParseEncodingDecl(ctxt);
6942 if (ctxt->instate == XML_PARSER_EOF)
6943 return;
6944 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6946 * The XML REC instructs us to stop parsing right here
6948 ctxt->instate = oldstate;
6949 return;
6951 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6952 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6953 "Missing encoding in text declaration\n");
6956 SKIP_BLANKS;
6957 if ((RAW == '?') && (NXT(1) == '>')) {
6958 SKIP(2);
6959 } else if (RAW == '>') {
6960 /* Deprecated old WD ... */
6961 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6962 NEXT;
6963 } else {
6964 int c;
6966 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6967 while ((c = CUR) != 0) {
6968 NEXT;
6969 if (c == '>')
6970 break;
6974 ctxt->instate = oldstate;
6978 * xmlParseExternalSubset:
6979 * @ctxt: an XML parser context
6980 * @ExternalID: the external identifier
6981 * @SystemID: the system identifier (or URL)
6983 * parse Markup declarations from an external subset
6985 * [30] extSubset ::= textDecl? extSubsetDecl
6987 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6989 void
6990 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6991 const xmlChar *SystemID) {
6992 xmlDetectSAX2(ctxt);
6993 GROW;
6995 if ((ctxt->encoding == NULL) &&
6996 (ctxt->input->end - ctxt->input->cur >= 4)) {
6997 xmlChar start[4];
6998 xmlCharEncoding enc;
7000 start[0] = RAW;
7001 start[1] = NXT(1);
7002 start[2] = NXT(2);
7003 start[3] = NXT(3);
7004 enc = xmlDetectCharEncoding(start, 4);
7005 if (enc != XML_CHAR_ENCODING_NONE)
7006 xmlSwitchEncoding(ctxt, enc);
7009 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7010 xmlParseTextDecl(ctxt);
7011 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7013 * The XML REC instructs us to stop parsing right here
7015 xmlHaltParser(ctxt);
7016 return;
7019 if (ctxt->myDoc == NULL) {
7020 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7021 if (ctxt->myDoc == NULL) {
7022 xmlErrMemory(ctxt, "New Doc failed");
7023 return;
7025 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7027 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7028 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7030 ctxt->instate = XML_PARSER_DTD;
7031 ctxt->external = 1;
7032 SKIP_BLANKS;
7033 while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7034 GROW;
7035 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7036 xmlParseConditionalSections(ctxt);
7037 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7038 xmlParseMarkupDecl(ctxt);
7039 } else {
7040 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7041 xmlHaltParser(ctxt);
7042 return;
7044 SKIP_BLANKS;
7045 SHRINK;
7048 if (RAW != 0) {
7049 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7055 * xmlParseReference:
7056 * @ctxt: an XML parser context
7058 * DEPRECATED: Internal function, don't use.
7060 * parse and handle entity references in content, depending on the SAX
7061 * interface, this may end-up in a call to character() if this is a
7062 * CharRef, a predefined entity, if there is no reference() callback.
7063 * or if the parser was asked to switch to that mode.
7065 * Always consumes '&'.
7067 * [67] Reference ::= EntityRef | CharRef
7069 void
7070 xmlParseReference(xmlParserCtxtPtr ctxt) {
7071 xmlEntityPtr ent;
7072 xmlChar *val;
7073 int was_checked;
7074 xmlNodePtr list = NULL;
7075 xmlParserErrors ret = XML_ERR_OK;
7078 if (RAW != '&')
7079 return;
7082 * Simple case of a CharRef
7084 if (NXT(1) == '#') {
7085 int i = 0;
7086 xmlChar out[16];
7087 int hex = NXT(2);
7088 int value = xmlParseCharRef(ctxt);
7090 if (value == 0)
7091 return;
7092 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7094 * So we are using non-UTF-8 buffers
7095 * Check that the char fit on 8bits, if not
7096 * generate a CharRef.
7098 if (value <= 0xFF) {
7099 out[0] = value;
7100 out[1] = 0;
7101 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7102 (!ctxt->disableSAX))
7103 ctxt->sax->characters(ctxt->userData, out, 1);
7104 } else {
7105 if ((hex == 'x') || (hex == 'X'))
7106 snprintf((char *)out, sizeof(out), "#x%X", value);
7107 else
7108 snprintf((char *)out, sizeof(out), "#%d", value);
7109 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7110 (!ctxt->disableSAX))
7111 ctxt->sax->reference(ctxt->userData, out);
7113 } else {
7115 * Just encode the value in UTF-8
7117 COPY_BUF(0 ,out, i, value);
7118 out[i] = 0;
7119 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7120 (!ctxt->disableSAX))
7121 ctxt->sax->characters(ctxt->userData, out, i);
7123 return;
7127 * We are seeing an entity reference
7129 ent = xmlParseEntityRef(ctxt);
7130 if (ent == NULL) return;
7131 if (!ctxt->wellFormed)
7132 return;
7133 was_checked = ent->flags & XML_ENT_PARSED;
7135 /* special case of predefined entities */
7136 if ((ent->name == NULL) ||
7137 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7138 val = ent->content;
7139 if (val == NULL) return;
7141 * inline the entity.
7143 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7144 (!ctxt->disableSAX))
7145 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7146 return;
7150 * The first reference to the entity trigger a parsing phase
7151 * where the ent->children is filled with the result from
7152 * the parsing.
7153 * Note: external parsed entities will not be loaded, it is not
7154 * required for a non-validating parser, unless the parsing option
7155 * of validating, or substituting entities were given. Doing so is
7156 * far more secure as the parser will only process data coming from
7157 * the document entity by default.
7159 if (((ent->flags & XML_ENT_PARSED) == 0) &&
7160 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7161 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7162 unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7165 * This is a bit hackish but this seems the best
7166 * way to make sure both SAX and DOM entity support
7167 * behaves okay.
7169 void *user_data;
7170 if (ctxt->userData == ctxt)
7171 user_data = NULL;
7172 else
7173 user_data = ctxt->userData;
7175 /* Avoid overflow as much as possible */
7176 ctxt->sizeentcopy = 0;
7178 if (ent->flags & XML_ENT_EXPANDING) {
7179 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7180 xmlHaltParser(ctxt);
7181 return;
7184 ent->flags |= XML_ENT_EXPANDING;
7187 * Check that this entity is well formed
7188 * 4.3.2: An internal general parsed entity is well-formed
7189 * if its replacement text matches the production labeled
7190 * content.
7192 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7193 ctxt->depth++;
7194 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7195 user_data, &list);
7196 ctxt->depth--;
7198 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7199 ctxt->depth++;
7200 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7201 user_data, ctxt->depth, ent->URI,
7202 ent->ExternalID, &list);
7203 ctxt->depth--;
7204 } else {
7205 ret = XML_ERR_ENTITY_PE_INTERNAL;
7206 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7207 "invalid entity type found\n", NULL);
7210 ent->flags &= ~XML_ENT_EXPANDING;
7211 ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7212 ent->expandedSize = ctxt->sizeentcopy;
7213 if (ret == XML_ERR_ENTITY_LOOP) {
7214 xmlHaltParser(ctxt);
7215 xmlFreeNodeList(list);
7216 return;
7218 if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7219 xmlFreeNodeList(list);
7220 return;
7223 if ((ret == XML_ERR_OK) && (list != NULL)) {
7224 ent->children = list;
7226 * Prune it directly in the generated document
7227 * except for single text nodes.
7229 if ((ctxt->replaceEntities == 0) ||
7230 (ctxt->parseMode == XML_PARSE_READER) ||
7231 ((list->type == XML_TEXT_NODE) &&
7232 (list->next == NULL))) {
7233 ent->owner = 1;
7234 while (list != NULL) {
7235 list->parent = (xmlNodePtr) ent;
7236 if (list->doc != ent->doc)
7237 xmlSetTreeDoc(list, ent->doc);
7238 if (list->next == NULL)
7239 ent->last = list;
7240 list = list->next;
7242 list = NULL;
7243 } else {
7244 ent->owner = 0;
7245 while (list != NULL) {
7246 list->parent = (xmlNodePtr) ctxt->node;
7247 list->doc = ctxt->myDoc;
7248 if (list->next == NULL)
7249 ent->last = list;
7250 list = list->next;
7252 list = ent->children;
7253 #ifdef LIBXML_LEGACY_ENABLED
7254 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7255 xmlAddEntityReference(ent, list, NULL);
7256 #endif /* LIBXML_LEGACY_ENABLED */
7258 } else if ((ret != XML_ERR_OK) &&
7259 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7260 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7261 "Entity '%s' failed to parse\n", ent->name);
7262 if (ent->content != NULL)
7263 ent->content[0] = 0;
7264 } else if (list != NULL) {
7265 xmlFreeNodeList(list);
7266 list = NULL;
7269 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7270 was_checked = 0;
7274 * Now that the entity content has been gathered
7275 * provide it to the application, this can take different forms based
7276 * on the parsing modes.
7278 if (ent->children == NULL) {
7280 * Probably running in SAX mode and the callbacks don't
7281 * build the entity content. So unless we already went
7282 * though parsing for first checking go though the entity
7283 * content to generate callbacks associated to the entity
7285 if (was_checked != 0) {
7286 void *user_data;
7288 * This is a bit hackish but this seems the best
7289 * way to make sure both SAX and DOM entity support
7290 * behaves okay.
7292 if (ctxt->userData == ctxt)
7293 user_data = NULL;
7294 else
7295 user_data = ctxt->userData;
7297 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7298 ctxt->depth++;
7299 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7300 ent->content, user_data, NULL);
7301 ctxt->depth--;
7302 } else if (ent->etype ==
7303 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7304 unsigned long oldsizeentities = ctxt->sizeentities;
7306 ctxt->depth++;
7307 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7308 ctxt->sax, user_data, ctxt->depth,
7309 ent->URI, ent->ExternalID, NULL);
7310 ctxt->depth--;
7312 /* Undo the change to sizeentities */
7313 ctxt->sizeentities = oldsizeentities;
7314 } else {
7315 ret = XML_ERR_ENTITY_PE_INTERNAL;
7316 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7317 "invalid entity type found\n", NULL);
7319 if (ret == XML_ERR_ENTITY_LOOP) {
7320 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7321 return;
7323 if (xmlParserEntityCheck(ctxt, 0))
7324 return;
7326 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7327 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7329 * Entity reference callback comes second, it's somewhat
7330 * superfluous but a compatibility to historical behaviour
7332 ctxt->sax->reference(ctxt->userData, ent->name);
7334 return;
7338 * We also check for amplification if entities aren't substituted.
7339 * They might be expanded later.
7341 if ((was_checked != 0) &&
7342 (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7343 return;
7346 * If we didn't get any children for the entity being built
7348 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7349 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7351 * Create a node.
7353 ctxt->sax->reference(ctxt->userData, ent->name);
7354 return;
7357 if (ctxt->replaceEntities) {
7359 * There is a problem on the handling of _private for entities
7360 * (bug 155816): Should we copy the content of the field from
7361 * the entity (possibly overwriting some value set by the user
7362 * when a copy is created), should we leave it alone, or should
7363 * we try to take care of different situations? The problem
7364 * is exacerbated by the usage of this field by the xmlReader.
7365 * To fix this bug, we look at _private on the created node
7366 * and, if it's NULL, we copy in whatever was in the entity.
7367 * If it's not NULL we leave it alone. This is somewhat of a
7368 * hack - maybe we should have further tests to determine
7369 * what to do.
7371 if (ctxt->node != NULL) {
7373 * Seems we are generating the DOM content, do
7374 * a simple tree copy for all references except the first
7375 * In the first occurrence list contains the replacement.
7377 if (((list == NULL) && (ent->owner == 0)) ||
7378 (ctxt->parseMode == XML_PARSE_READER)) {
7379 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7382 * when operating on a reader, the entities definitions
7383 * are always owning the entities subtree.
7384 if (ctxt->parseMode == XML_PARSE_READER)
7385 ent->owner = 1;
7388 cur = ent->children;
7389 while (cur != NULL) {
7390 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7391 if (nw != NULL) {
7392 if (nw->_private == NULL)
7393 nw->_private = cur->_private;
7394 if (firstChild == NULL){
7395 firstChild = nw;
7397 nw = xmlAddChild(ctxt->node, nw);
7399 if (cur == ent->last) {
7401 * needed to detect some strange empty
7402 * node cases in the reader tests
7404 if ((ctxt->parseMode == XML_PARSE_READER) &&
7405 (nw != NULL) &&
7406 (nw->type == XML_ELEMENT_NODE) &&
7407 (nw->children == NULL))
7408 nw->extra = 1;
7410 break;
7412 cur = cur->next;
7414 #ifdef LIBXML_LEGACY_ENABLED
7415 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7416 xmlAddEntityReference(ent, firstChild, nw);
7417 #endif /* LIBXML_LEGACY_ENABLED */
7418 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7419 xmlNodePtr nw = NULL, cur, next, last,
7420 firstChild = NULL;
7423 * Copy the entity child list and make it the new
7424 * entity child list. The goal is to make sure any
7425 * ID or REF referenced will be the one from the
7426 * document content and not the entity copy.
7428 cur = ent->children;
7429 ent->children = NULL;
7430 last = ent->last;
7431 ent->last = NULL;
7432 while (cur != NULL) {
7433 next = cur->next;
7434 cur->next = NULL;
7435 cur->parent = NULL;
7436 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7437 if (nw != NULL) {
7438 if (nw->_private == NULL)
7439 nw->_private = cur->_private;
7440 if (firstChild == NULL){
7441 firstChild = cur;
7443 xmlAddChild((xmlNodePtr) ent, nw);
7445 xmlAddChild(ctxt->node, cur);
7446 if (cur == last)
7447 break;
7448 cur = next;
7450 if (ent->owner == 0)
7451 ent->owner = 1;
7452 #ifdef LIBXML_LEGACY_ENABLED
7453 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7454 xmlAddEntityReference(ent, firstChild, nw);
7455 #endif /* LIBXML_LEGACY_ENABLED */
7456 } else {
7457 const xmlChar *nbktext;
7460 * the name change is to avoid coalescing of the
7461 * node with a possible previous text one which
7462 * would make ent->children a dangling pointer
7464 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7465 -1);
7466 if (ent->children->type == XML_TEXT_NODE)
7467 ent->children->name = nbktext;
7468 if ((ent->last != ent->children) &&
7469 (ent->last->type == XML_TEXT_NODE))
7470 ent->last->name = nbktext;
7471 xmlAddChildList(ctxt->node, ent->children);
7475 * This is to avoid a nasty side effect, see
7476 * characters() in SAX.c
7478 ctxt->nodemem = 0;
7479 ctxt->nodelen = 0;
7480 return;
7486 * xmlParseEntityRef:
7487 * @ctxt: an XML parser context
7489 * DEPRECATED: Internal function, don't use.
7491 * Parse an entitiy reference. Always consumes '&'.
7493 * [68] EntityRef ::= '&' Name ';'
7495 * [ WFC: Entity Declared ]
7496 * In a document without any DTD, a document with only an internal DTD
7497 * subset which contains no parameter entity references, or a document
7498 * with "standalone='yes'", the Name given in the entity reference
7499 * must match that in an entity declaration, except that well-formed
7500 * documents need not declare any of the following entities: amp, lt,
7501 * gt, apos, quot. The declaration of a parameter entity must precede
7502 * any reference to it. Similarly, the declaration of a general entity
7503 * must precede any reference to it which appears in a default value in an
7504 * attribute-list declaration. Note that if entities are declared in the
7505 * external subset or in external parameter entities, a non-validating
7506 * processor is not obligated to read and process their declarations;
7507 * for such documents, the rule that an entity must be declared is a
7508 * well-formedness constraint only if standalone='yes'.
7510 * [ WFC: Parsed Entity ]
7511 * An entity reference must not contain the name of an unparsed entity
7513 * Returns the xmlEntityPtr if found, or NULL otherwise.
7515 xmlEntityPtr
7516 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7517 const xmlChar *name;
7518 xmlEntityPtr ent = NULL;
7520 GROW;
7521 if (ctxt->instate == XML_PARSER_EOF)
7522 return(NULL);
7524 if (RAW != '&')
7525 return(NULL);
7526 NEXT;
7527 name = xmlParseName(ctxt);
7528 if (name == NULL) {
7529 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7530 "xmlParseEntityRef: no name\n");
7531 return(NULL);
7533 if (RAW != ';') {
7534 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7535 return(NULL);
7537 NEXT;
7540 * Predefined entities override any extra definition
7542 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7543 ent = xmlGetPredefinedEntity(name);
7544 if (ent != NULL)
7545 return(ent);
7549 * Ask first SAX for entity resolution, otherwise try the
7550 * entities which may have stored in the parser context.
7552 if (ctxt->sax != NULL) {
7553 if (ctxt->sax->getEntity != NULL)
7554 ent = ctxt->sax->getEntity(ctxt->userData, name);
7555 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7556 (ctxt->options & XML_PARSE_OLDSAX))
7557 ent = xmlGetPredefinedEntity(name);
7558 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7559 (ctxt->userData==ctxt)) {
7560 ent = xmlSAX2GetEntity(ctxt, name);
7563 if (ctxt->instate == XML_PARSER_EOF)
7564 return(NULL);
7566 * [ WFC: Entity Declared ]
7567 * In a document without any DTD, a document with only an
7568 * internal DTD subset which contains no parameter entity
7569 * references, or a document with "standalone='yes'", the
7570 * Name given in the entity reference must match that in an
7571 * entity declaration, except that well-formed documents
7572 * need not declare any of the following entities: amp, lt,
7573 * gt, apos, quot.
7574 * The declaration of a parameter entity must precede any
7575 * reference to it.
7576 * Similarly, the declaration of a general entity must
7577 * precede any reference to it which appears in a default
7578 * value in an attribute-list declaration. Note that if
7579 * entities are declared in the external subset or in
7580 * external parameter entities, a non-validating processor
7581 * is not obligated to read and process their declarations;
7582 * for such documents, the rule that an entity must be
7583 * declared is a well-formedness constraint only if
7584 * standalone='yes'.
7586 if (ent == NULL) {
7587 if ((ctxt->standalone == 1) ||
7588 ((ctxt->hasExternalSubset == 0) &&
7589 (ctxt->hasPErefs == 0))) {
7590 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7591 "Entity '%s' not defined\n", name);
7592 } else {
7593 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7594 "Entity '%s' not defined\n", name);
7595 if ((ctxt->inSubset == 0) &&
7596 (ctxt->sax != NULL) &&
7597 (ctxt->sax->reference != NULL)) {
7598 ctxt->sax->reference(ctxt->userData, name);
7601 ctxt->valid = 0;
7605 * [ WFC: Parsed Entity ]
7606 * An entity reference must not contain the name of an
7607 * unparsed entity
7609 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7610 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7611 "Entity reference to unparsed entity %s\n", name);
7615 * [ WFC: No External Entity References ]
7616 * Attribute values cannot contain direct or indirect
7617 * entity references to external entities.
7619 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7620 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7621 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7622 "Attribute references external entity '%s'\n", name);
7625 * [ WFC: No < in Attribute Values ]
7626 * The replacement text of any entity referred to directly or
7627 * indirectly in an attribute value (other than "&lt;") must
7628 * not contain a <.
7630 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7631 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7632 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7633 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7634 ent->flags |= XML_ENT_CONTAINS_LT;
7635 ent->flags |= XML_ENT_CHECKED_LT;
7637 if (ent->flags & XML_ENT_CONTAINS_LT)
7638 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7639 "'<' in entity '%s' is not allowed in attributes "
7640 "values\n", name);
7644 * Internal check, no parameter entities here ...
7646 else {
7647 switch (ent->etype) {
7648 case XML_INTERNAL_PARAMETER_ENTITY:
7649 case XML_EXTERNAL_PARAMETER_ENTITY:
7650 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7651 "Attempt to reference the parameter entity '%s'\n",
7652 name);
7653 break;
7654 default:
7655 break;
7660 * [ WFC: No Recursion ]
7661 * A parsed entity must not contain a recursive reference
7662 * to itself, either directly or indirectly.
7663 * Done somewhere else
7665 return(ent);
7669 * xmlParseStringEntityRef:
7670 * @ctxt: an XML parser context
7671 * @str: a pointer to an index in the string
7673 * parse ENTITY references declarations, but this version parses it from
7674 * a string value.
7676 * [68] EntityRef ::= '&' Name ';'
7678 * [ WFC: Entity Declared ]
7679 * In a document without any DTD, a document with only an internal DTD
7680 * subset which contains no parameter entity references, or a document
7681 * with "standalone='yes'", the Name given in the entity reference
7682 * must match that in an entity declaration, except that well-formed
7683 * documents need not declare any of the following entities: amp, lt,
7684 * gt, apos, quot. The declaration of a parameter entity must precede
7685 * any reference to it. Similarly, the declaration of a general entity
7686 * must precede any reference to it which appears in a default value in an
7687 * attribute-list declaration. Note that if entities are declared in the
7688 * external subset or in external parameter entities, a non-validating
7689 * processor is not obligated to read and process their declarations;
7690 * for such documents, the rule that an entity must be declared is a
7691 * well-formedness constraint only if standalone='yes'.
7693 * [ WFC: Parsed Entity ]
7694 * An entity reference must not contain the name of an unparsed entity
7696 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7697 * is updated to the current location in the string.
7699 static xmlEntityPtr
7700 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7701 xmlChar *name;
7702 const xmlChar *ptr;
7703 xmlChar cur;
7704 xmlEntityPtr ent = NULL;
7706 if ((str == NULL) || (*str == NULL))
7707 return(NULL);
7708 ptr = *str;
7709 cur = *ptr;
7710 if (cur != '&')
7711 return(NULL);
7713 ptr++;
7714 name = xmlParseStringName(ctxt, &ptr);
7715 if (name == NULL) {
7716 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7717 "xmlParseStringEntityRef: no name\n");
7718 *str = ptr;
7719 return(NULL);
7721 if (*ptr != ';') {
7722 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7723 xmlFree(name);
7724 *str = ptr;
7725 return(NULL);
7727 ptr++;
7731 * Predefined entities override any extra definition
7733 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7734 ent = xmlGetPredefinedEntity(name);
7735 if (ent != NULL) {
7736 xmlFree(name);
7737 *str = ptr;
7738 return(ent);
7743 * Ask first SAX for entity resolution, otherwise try the
7744 * entities which may have stored in the parser context.
7746 if (ctxt->sax != NULL) {
7747 if (ctxt->sax->getEntity != NULL)
7748 ent = ctxt->sax->getEntity(ctxt->userData, name);
7749 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7750 ent = xmlGetPredefinedEntity(name);
7751 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7752 ent = xmlSAX2GetEntity(ctxt, name);
7755 if (ctxt->instate == XML_PARSER_EOF) {
7756 xmlFree(name);
7757 return(NULL);
7761 * [ WFC: Entity Declared ]
7762 * In a document without any DTD, a document with only an
7763 * internal DTD subset which contains no parameter entity
7764 * references, or a document with "standalone='yes'", the
7765 * Name given in the entity reference must match that in an
7766 * entity declaration, except that well-formed documents
7767 * need not declare any of the following entities: amp, lt,
7768 * gt, apos, quot.
7769 * The declaration of a parameter entity must precede any
7770 * reference to it.
7771 * Similarly, the declaration of a general entity must
7772 * precede any reference to it which appears in a default
7773 * value in an attribute-list declaration. Note that if
7774 * entities are declared in the external subset or in
7775 * external parameter entities, a non-validating processor
7776 * is not obligated to read and process their declarations;
7777 * for such documents, the rule that an entity must be
7778 * declared is a well-formedness constraint only if
7779 * standalone='yes'.
7781 if (ent == NULL) {
7782 if ((ctxt->standalone == 1) ||
7783 ((ctxt->hasExternalSubset == 0) &&
7784 (ctxt->hasPErefs == 0))) {
7785 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7786 "Entity '%s' not defined\n", name);
7787 } else {
7788 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7789 "Entity '%s' not defined\n",
7790 name);
7792 /* TODO ? check regressions ctxt->valid = 0; */
7796 * [ WFC: Parsed Entity ]
7797 * An entity reference must not contain the name of an
7798 * unparsed entity
7800 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7801 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7802 "Entity reference to unparsed entity %s\n", name);
7806 * [ WFC: No External Entity References ]
7807 * Attribute values cannot contain direct or indirect
7808 * entity references to external entities.
7810 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7811 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7812 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7813 "Attribute references external entity '%s'\n", name);
7816 * [ WFC: No < in Attribute Values ]
7817 * The replacement text of any entity referred to directly or
7818 * indirectly in an attribute value (other than "&lt;") must
7819 * not contain a <.
7821 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7822 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7823 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7824 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7825 ent->flags |= XML_ENT_CONTAINS_LT;
7826 ent->flags |= XML_ENT_CHECKED_LT;
7828 if (ent->flags & XML_ENT_CONTAINS_LT)
7829 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7830 "'<' in entity '%s' is not allowed in attributes "
7831 "values\n", name);
7835 * Internal check, no parameter entities here ...
7837 else {
7838 switch (ent->etype) {
7839 case XML_INTERNAL_PARAMETER_ENTITY:
7840 case XML_EXTERNAL_PARAMETER_ENTITY:
7841 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7842 "Attempt to reference the parameter entity '%s'\n",
7843 name);
7844 break;
7845 default:
7846 break;
7851 * [ WFC: No Recursion ]
7852 * A parsed entity must not contain a recursive reference
7853 * to itself, either directly or indirectly.
7854 * Done somewhere else
7857 xmlFree(name);
7858 *str = ptr;
7859 return(ent);
7863 * xmlParsePEReference:
7864 * @ctxt: an XML parser context
7866 * DEPRECATED: Internal function, don't use.
7868 * Parse a parameter entity reference. Always consumes '%'.
7870 * The entity content is handled directly by pushing it's content as
7871 * a new input stream.
7873 * [69] PEReference ::= '%' Name ';'
7875 * [ WFC: No Recursion ]
7876 * A parsed entity must not contain a recursive
7877 * reference to itself, either directly or indirectly.
7879 * [ WFC: Entity Declared ]
7880 * In a document without any DTD, a document with only an internal DTD
7881 * subset which contains no parameter entity references, or a document
7882 * with "standalone='yes'", ... ... The declaration of a parameter
7883 * entity must precede any reference to it...
7885 * [ VC: Entity Declared ]
7886 * In a document with an external subset or external parameter entities
7887 * with "standalone='no'", ... ... The declaration of a parameter entity
7888 * must precede any reference to it...
7890 * [ WFC: In DTD ]
7891 * Parameter-entity references may only appear in the DTD.
7892 * NOTE: misleading but this is handled.
7894 void
7895 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7897 const xmlChar *name;
7898 xmlEntityPtr entity = NULL;
7899 xmlParserInputPtr input;
7901 if (RAW != '%')
7902 return;
7903 NEXT;
7904 name = xmlParseName(ctxt);
7905 if (name == NULL) {
7906 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7907 return;
7909 if (xmlParserDebugEntities)
7910 xmlGenericError(xmlGenericErrorContext,
7911 "PEReference: %s\n", name);
7912 if (RAW != ';') {
7913 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7914 return;
7917 NEXT;
7920 * Request the entity from SAX
7922 if ((ctxt->sax != NULL) &&
7923 (ctxt->sax->getParameterEntity != NULL))
7924 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7925 if (ctxt->instate == XML_PARSER_EOF)
7926 return;
7927 if (entity == NULL) {
7929 * [ WFC: Entity Declared ]
7930 * In a document without any DTD, a document with only an
7931 * internal DTD subset which contains no parameter entity
7932 * references, or a document with "standalone='yes'", ...
7933 * ... The declaration of a parameter entity must precede
7934 * any reference to it...
7936 if ((ctxt->standalone == 1) ||
7937 ((ctxt->hasExternalSubset == 0) &&
7938 (ctxt->hasPErefs == 0))) {
7939 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7940 "PEReference: %%%s; not found\n",
7941 name);
7942 } else {
7944 * [ VC: Entity Declared ]
7945 * In a document with an external subset or external
7946 * parameter entities with "standalone='no'", ...
7947 * ... The declaration of a parameter entity must
7948 * precede any reference to it...
7950 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7951 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7952 "PEReference: %%%s; not found\n",
7953 name, NULL);
7954 } else
7955 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7956 "PEReference: %%%s; not found\n",
7957 name, NULL);
7958 ctxt->valid = 0;
7960 } else {
7962 * Internal checking in case the entity quest barfed
7964 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7965 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7966 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7967 "Internal: %%%s; is not a parameter entity\n",
7968 name, NULL);
7969 } else {
7970 xmlChar start[4];
7971 xmlCharEncoding enc;
7972 unsigned long parentConsumed;
7973 xmlEntityPtr oldEnt;
7975 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7976 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7977 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7978 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7979 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7980 (ctxt->replaceEntities == 0) &&
7981 (ctxt->validate == 0))
7982 return;
7984 if (entity->flags & XML_ENT_EXPANDING) {
7985 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7986 xmlHaltParser(ctxt);
7987 return;
7990 /* Must be computed from old input before pushing new input. */
7991 parentConsumed = ctxt->input->parentConsumed;
7992 oldEnt = ctxt->input->entity;
7993 if ((oldEnt == NULL) ||
7994 ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7995 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
7996 xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
7997 xmlSaturatedAddSizeT(&parentConsumed,
7998 ctxt->input->cur - ctxt->input->base);
8001 input = xmlNewEntityInputStream(ctxt, entity);
8002 if (xmlPushInput(ctxt, input) < 0) {
8003 xmlFreeInputStream(input);
8004 return;
8007 entity->flags |= XML_ENT_EXPANDING;
8009 input->parentConsumed = parentConsumed;
8011 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8013 * Get the 4 first bytes and decode the charset
8014 * if enc != XML_CHAR_ENCODING_NONE
8015 * plug some encoding conversion routines.
8016 * Note that, since we may have some non-UTF8
8017 * encoding (like UTF16, bug 135229), the 'length'
8018 * is not known, but we can calculate based upon
8019 * the amount of data in the buffer.
8021 GROW
8022 if (ctxt->instate == XML_PARSER_EOF)
8023 return;
8024 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8025 start[0] = RAW;
8026 start[1] = NXT(1);
8027 start[2] = NXT(2);
8028 start[3] = NXT(3);
8029 enc = xmlDetectCharEncoding(start, 4);
8030 if (enc != XML_CHAR_ENCODING_NONE) {
8031 xmlSwitchEncoding(ctxt, enc);
8035 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8036 (IS_BLANK_CH(NXT(5)))) {
8037 xmlParseTextDecl(ctxt);
8042 ctxt->hasPErefs = 1;
8046 * xmlLoadEntityContent:
8047 * @ctxt: an XML parser context
8048 * @entity: an unloaded system entity
8050 * Load the original content of the given system entity from the
8051 * ExternalID/SystemID given. This is to be used for Included in Literal
8052 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8054 * Returns 0 in case of success and -1 in case of failure
8056 static int
8057 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8058 xmlParserInputPtr input;
8059 xmlBufferPtr buf;
8060 int l, c;
8062 if ((ctxt == NULL) || (entity == NULL) ||
8063 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8064 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8065 (entity->content != NULL)) {
8066 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8067 "xmlLoadEntityContent parameter error");
8068 return(-1);
8071 if (xmlParserDebugEntities)
8072 xmlGenericError(xmlGenericErrorContext,
8073 "Reading %s entity content input\n", entity->name);
8075 buf = xmlBufferCreate();
8076 if (buf == NULL) {
8077 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8078 "xmlLoadEntityContent parameter error");
8079 return(-1);
8081 xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8083 input = xmlNewEntityInputStream(ctxt, entity);
8084 if (input == NULL) {
8085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8086 "xmlLoadEntityContent input error");
8087 xmlBufferFree(buf);
8088 return(-1);
8092 * Push the entity as the current input, read char by char
8093 * saving to the buffer until the end of the entity or an error
8095 if (xmlPushInput(ctxt, input) < 0) {
8096 xmlBufferFree(buf);
8097 xmlFreeInputStream(input);
8098 return(-1);
8101 GROW;
8102 c = CUR_CHAR(l);
8103 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8104 (IS_CHAR(c))) {
8105 xmlBufferAdd(buf, ctxt->input->cur, l);
8106 NEXTL(l);
8107 c = CUR_CHAR(l);
8109 if (ctxt->instate == XML_PARSER_EOF) {
8110 xmlBufferFree(buf);
8111 return(-1);
8114 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8115 xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8116 xmlPopInput(ctxt);
8117 } else if (!IS_CHAR(c)) {
8118 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8119 "xmlLoadEntityContent: invalid char value %d\n",
8121 xmlBufferFree(buf);
8122 return(-1);
8124 entity->content = buf->content;
8125 entity->length = buf->use;
8126 buf->content = NULL;
8127 xmlBufferFree(buf);
8129 return(0);
8133 * xmlParseStringPEReference:
8134 * @ctxt: an XML parser context
8135 * @str: a pointer to an index in the string
8137 * parse PEReference declarations
8139 * [69] PEReference ::= '%' Name ';'
8141 * [ WFC: No Recursion ]
8142 * A parsed entity must not contain a recursive
8143 * reference to itself, either directly or indirectly.
8145 * [ WFC: Entity Declared ]
8146 * In a document without any DTD, a document with only an internal DTD
8147 * subset which contains no parameter entity references, or a document
8148 * with "standalone='yes'", ... ... The declaration of a parameter
8149 * entity must precede any reference to it...
8151 * [ VC: Entity Declared ]
8152 * In a document with an external subset or external parameter entities
8153 * with "standalone='no'", ... ... The declaration of a parameter entity
8154 * must precede any reference to it...
8156 * [ WFC: In DTD ]
8157 * Parameter-entity references may only appear in the DTD.
8158 * NOTE: misleading but this is handled.
8160 * Returns the string of the entity content.
8161 * str is updated to the current value of the index
8163 static xmlEntityPtr
8164 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8165 const xmlChar *ptr;
8166 xmlChar cur;
8167 xmlChar *name;
8168 xmlEntityPtr entity = NULL;
8170 if ((str == NULL) || (*str == NULL)) return(NULL);
8171 ptr = *str;
8172 cur = *ptr;
8173 if (cur != '%')
8174 return(NULL);
8175 ptr++;
8176 name = xmlParseStringName(ctxt, &ptr);
8177 if (name == NULL) {
8178 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8179 "xmlParseStringPEReference: no name\n");
8180 *str = ptr;
8181 return(NULL);
8183 cur = *ptr;
8184 if (cur != ';') {
8185 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8186 xmlFree(name);
8187 *str = ptr;
8188 return(NULL);
8190 ptr++;
8193 * Request the entity from SAX
8195 if ((ctxt->sax != NULL) &&
8196 (ctxt->sax->getParameterEntity != NULL))
8197 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8198 if (ctxt->instate == XML_PARSER_EOF) {
8199 xmlFree(name);
8200 *str = ptr;
8201 return(NULL);
8203 if (entity == NULL) {
8205 * [ WFC: Entity Declared ]
8206 * In a document without any DTD, a document with only an
8207 * internal DTD subset which contains no parameter entity
8208 * references, or a document with "standalone='yes'", ...
8209 * ... The declaration of a parameter entity must precede
8210 * any reference to it...
8212 if ((ctxt->standalone == 1) ||
8213 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8214 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8215 "PEReference: %%%s; not found\n", name);
8216 } else {
8218 * [ VC: Entity Declared ]
8219 * In a document with an external subset or external
8220 * parameter entities with "standalone='no'", ...
8221 * ... The declaration of a parameter entity must
8222 * precede any reference to it...
8224 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8225 "PEReference: %%%s; not found\n",
8226 name, NULL);
8227 ctxt->valid = 0;
8229 } else {
8231 * Internal checking in case the entity quest barfed
8233 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8234 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8235 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8236 "%%%s; is not a parameter entity\n",
8237 name, NULL);
8240 ctxt->hasPErefs = 1;
8241 xmlFree(name);
8242 *str = ptr;
8243 return(entity);
8247 * xmlParseDocTypeDecl:
8248 * @ctxt: an XML parser context
8250 * DEPRECATED: Internal function, don't use.
8252 * parse a DOCTYPE declaration
8254 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8255 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8257 * [ VC: Root Element Type ]
8258 * The Name in the document type declaration must match the element
8259 * type of the root element.
8262 void
8263 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8264 const xmlChar *name = NULL;
8265 xmlChar *ExternalID = NULL;
8266 xmlChar *URI = NULL;
8269 * We know that '<!DOCTYPE' has been detected.
8271 SKIP(9);
8273 SKIP_BLANKS;
8276 * Parse the DOCTYPE name.
8278 name = xmlParseName(ctxt);
8279 if (name == NULL) {
8280 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8281 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8283 ctxt->intSubName = name;
8285 SKIP_BLANKS;
8288 * Check for SystemID and ExternalID
8290 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8292 if ((URI != NULL) || (ExternalID != NULL)) {
8293 ctxt->hasExternalSubset = 1;
8295 ctxt->extSubURI = URI;
8296 ctxt->extSubSystem = ExternalID;
8298 SKIP_BLANKS;
8301 * Create and update the internal subset.
8303 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8304 (!ctxt->disableSAX))
8305 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8306 if (ctxt->instate == XML_PARSER_EOF)
8307 return;
8310 * Is there any internal subset declarations ?
8311 * they are handled separately in xmlParseInternalSubset()
8313 if (RAW == '[')
8314 return;
8317 * We should be at the end of the DOCTYPE declaration.
8319 if (RAW != '>') {
8320 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8322 NEXT;
8326 * xmlParseInternalSubset:
8327 * @ctxt: an XML parser context
8329 * parse the internal subset declaration
8331 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8334 static void
8335 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8337 * Is there any DTD definition ?
8339 if (RAW == '[') {
8340 int baseInputNr = ctxt->inputNr;
8341 ctxt->instate = XML_PARSER_DTD;
8342 NEXT;
8344 * Parse the succession of Markup declarations and
8345 * PEReferences.
8346 * Subsequence (markupdecl | PEReference | S)*
8348 SKIP_BLANKS;
8349 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8350 (ctxt->instate != XML_PARSER_EOF)) {
8353 * Conditional sections are allowed from external entities included
8354 * by PE References in the internal subset.
8356 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8357 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8358 xmlParseConditionalSections(ctxt);
8359 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8360 xmlParseMarkupDecl(ctxt);
8361 } else if (RAW == '%') {
8362 xmlParsePEReference(ctxt);
8363 } else {
8364 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8365 "xmlParseInternalSubset: error detected in"
8366 " Markup declaration\n");
8367 xmlHaltParser(ctxt);
8368 return;
8370 SKIP_BLANKS;
8371 SHRINK;
8372 GROW;
8374 if (RAW == ']') {
8375 NEXT;
8376 SKIP_BLANKS;
8381 * We should be at the end of the DOCTYPE declaration.
8383 if (RAW != '>') {
8384 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8385 return;
8387 NEXT;
8390 #ifdef LIBXML_SAX1_ENABLED
8392 * xmlParseAttribute:
8393 * @ctxt: an XML parser context
8394 * @value: a xmlChar ** used to store the value of the attribute
8396 * DEPRECATED: Internal function, don't use.
8398 * parse an attribute
8400 * [41] Attribute ::= Name Eq AttValue
8402 * [ WFC: No External Entity References ]
8403 * Attribute values cannot contain direct or indirect entity references
8404 * to external entities.
8406 * [ WFC: No < in Attribute Values ]
8407 * The replacement text of any entity referred to directly or indirectly in
8408 * an attribute value (other than "&lt;") must not contain a <.
8410 * [ VC: Attribute Value Type ]
8411 * The attribute must have been declared; the value must be of the type
8412 * declared for it.
8414 * [25] Eq ::= S? '=' S?
8416 * With namespace:
8418 * [NS 11] Attribute ::= QName Eq AttValue
8420 * Also the case QName == xmlns:??? is handled independently as a namespace
8421 * definition.
8423 * Returns the attribute name, and the value in *value.
8426 const xmlChar *
8427 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8428 const xmlChar *name;
8429 xmlChar *val;
8431 *value = NULL;
8432 GROW;
8433 name = xmlParseName(ctxt);
8434 if (name == NULL) {
8435 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8436 "error parsing attribute name\n");
8437 return(NULL);
8441 * read the value
8443 SKIP_BLANKS;
8444 if (RAW == '=') {
8445 NEXT;
8446 SKIP_BLANKS;
8447 val = xmlParseAttValue(ctxt);
8448 ctxt->instate = XML_PARSER_CONTENT;
8449 } else {
8450 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8451 "Specification mandates value for attribute %s\n", name);
8452 return(name);
8456 * Check that xml:lang conforms to the specification
8457 * No more registered as an error, just generate a warning now
8458 * since this was deprecated in XML second edition
8460 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8461 if (!xmlCheckLanguageID(val)) {
8462 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8463 "Malformed value for xml:lang : %s\n",
8464 val, NULL);
8469 * Check that xml:space conforms to the specification
8471 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8472 if (xmlStrEqual(val, BAD_CAST "default"))
8473 *(ctxt->space) = 0;
8474 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8475 *(ctxt->space) = 1;
8476 else {
8477 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8478 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8479 val, NULL);
8483 *value = val;
8484 return(name);
8488 * xmlParseStartTag:
8489 * @ctxt: an XML parser context
8491 * DEPRECATED: Internal function, don't use.
8493 * Parse a start tag. Always consumes '<'.
8495 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8497 * [ WFC: Unique Att Spec ]
8498 * No attribute name may appear more than once in the same start-tag or
8499 * empty-element tag.
8501 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8503 * [ WFC: Unique Att Spec ]
8504 * No attribute name may appear more than once in the same start-tag or
8505 * empty-element tag.
8507 * With namespace:
8509 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8511 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8513 * Returns the element name parsed
8516 const xmlChar *
8517 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8518 const xmlChar *name;
8519 const xmlChar *attname;
8520 xmlChar *attvalue;
8521 const xmlChar **atts = ctxt->atts;
8522 int nbatts = 0;
8523 int maxatts = ctxt->maxatts;
8524 int i;
8526 if (RAW != '<') return(NULL);
8527 NEXT1;
8529 name = xmlParseName(ctxt);
8530 if (name == NULL) {
8531 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8532 "xmlParseStartTag: invalid element name\n");
8533 return(NULL);
8537 * Now parse the attributes, it ends up with the ending
8539 * (S Attribute)* S?
8541 SKIP_BLANKS;
8542 GROW;
8544 while (((RAW != '>') &&
8545 ((RAW != '/') || (NXT(1) != '>')) &&
8546 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8547 attname = xmlParseAttribute(ctxt, &attvalue);
8548 if (attname == NULL) {
8549 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8550 "xmlParseStartTag: problem parsing attributes\n");
8551 break;
8553 if (attvalue != NULL) {
8555 * [ WFC: Unique Att Spec ]
8556 * No attribute name may appear more than once in the same
8557 * start-tag or empty-element tag.
8559 for (i = 0; i < nbatts;i += 2) {
8560 if (xmlStrEqual(atts[i], attname)) {
8561 xmlErrAttributeDup(ctxt, NULL, attname);
8562 xmlFree(attvalue);
8563 goto failed;
8567 * Add the pair to atts
8569 if (atts == NULL) {
8570 maxatts = 22; /* allow for 10 attrs by default */
8571 atts = (const xmlChar **)
8572 xmlMalloc(maxatts * sizeof(xmlChar *));
8573 if (atts == NULL) {
8574 xmlErrMemory(ctxt, NULL);
8575 if (attvalue != NULL)
8576 xmlFree(attvalue);
8577 goto failed;
8579 ctxt->atts = atts;
8580 ctxt->maxatts = maxatts;
8581 } else if (nbatts + 4 > maxatts) {
8582 const xmlChar **n;
8584 maxatts *= 2;
8585 n = (const xmlChar **) xmlRealloc((void *) atts,
8586 maxatts * sizeof(const xmlChar *));
8587 if (n == NULL) {
8588 xmlErrMemory(ctxt, NULL);
8589 if (attvalue != NULL)
8590 xmlFree(attvalue);
8591 goto failed;
8593 atts = n;
8594 ctxt->atts = atts;
8595 ctxt->maxatts = maxatts;
8597 atts[nbatts++] = attname;
8598 atts[nbatts++] = attvalue;
8599 atts[nbatts] = NULL;
8600 atts[nbatts + 1] = NULL;
8601 } else {
8602 if (attvalue != NULL)
8603 xmlFree(attvalue);
8606 failed:
8608 GROW
8609 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8610 break;
8611 if (SKIP_BLANKS == 0) {
8612 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8613 "attributes construct error\n");
8615 SHRINK;
8616 GROW;
8620 * SAX: Start of Element !
8622 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8623 (!ctxt->disableSAX)) {
8624 if (nbatts > 0)
8625 ctxt->sax->startElement(ctxt->userData, name, atts);
8626 else
8627 ctxt->sax->startElement(ctxt->userData, name, NULL);
8630 if (atts != NULL) {
8631 /* Free only the content strings */
8632 for (i = 1;i < nbatts;i+=2)
8633 if (atts[i] != NULL)
8634 xmlFree((xmlChar *) atts[i]);
8636 return(name);
8640 * xmlParseEndTag1:
8641 * @ctxt: an XML parser context
8642 * @line: line of the start tag
8643 * @nsNr: number of namespaces on the start tag
8645 * Parse an end tag. Always consumes '</'.
8647 * [42] ETag ::= '</' Name S? '>'
8649 * With namespace
8651 * [NS 9] ETag ::= '</' QName S? '>'
8654 static void
8655 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8656 const xmlChar *name;
8658 GROW;
8659 if ((RAW != '<') || (NXT(1) != '/')) {
8660 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8661 "xmlParseEndTag: '</' not found\n");
8662 return;
8664 SKIP(2);
8666 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8669 * We should definitely be at the ending "S? '>'" part
8671 GROW;
8672 SKIP_BLANKS;
8673 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8674 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8675 } else
8676 NEXT1;
8679 * [ WFC: Element Type Match ]
8680 * The Name in an element's end-tag must match the element type in the
8681 * start-tag.
8684 if (name != (xmlChar*)1) {
8685 if (name == NULL) name = BAD_CAST "unparsable";
8686 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8687 "Opening and ending tag mismatch: %s line %d and %s\n",
8688 ctxt->name, line, name);
8692 * SAX: End of Tag
8694 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8695 (!ctxt->disableSAX))
8696 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8698 namePop(ctxt);
8699 spacePop(ctxt);
8700 return;
8704 * xmlParseEndTag:
8705 * @ctxt: an XML parser context
8707 * DEPRECATED: Internal function, don't use.
8709 * parse an end of tag
8711 * [42] ETag ::= '</' Name S? '>'
8713 * With namespace
8715 * [NS 9] ETag ::= '</' QName S? '>'
8718 void
8719 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8720 xmlParseEndTag1(ctxt, 0);
8722 #endif /* LIBXML_SAX1_ENABLED */
8724 /************************************************************************
8726 * SAX 2 specific operations *
8728 ************************************************************************/
8731 * xmlGetNamespace:
8732 * @ctxt: an XML parser context
8733 * @prefix: the prefix to lookup
8735 * Lookup the namespace name for the @prefix (which ca be NULL)
8736 * The prefix must come from the @ctxt->dict dictionary
8738 * Returns the namespace name or NULL if not bound
8740 static const xmlChar *
8741 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8742 int i;
8744 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8745 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8746 if (ctxt->nsTab[i] == prefix) {
8747 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8748 return(NULL);
8749 return(ctxt->nsTab[i + 1]);
8751 return(NULL);
8755 * xmlParseQName:
8756 * @ctxt: an XML parser context
8757 * @prefix: pointer to store the prefix part
8759 * parse an XML Namespace QName
8761 * [6] QName ::= (Prefix ':')? LocalPart
8762 * [7] Prefix ::= NCName
8763 * [8] LocalPart ::= NCName
8765 * Returns the Name parsed or NULL
8768 static const xmlChar *
8769 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8770 const xmlChar *l, *p;
8772 GROW;
8773 if (ctxt->instate == XML_PARSER_EOF)
8774 return(NULL);
8776 l = xmlParseNCName(ctxt);
8777 if (l == NULL) {
8778 if (CUR == ':') {
8779 l = xmlParseName(ctxt);
8780 if (l != NULL) {
8781 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8782 "Failed to parse QName '%s'\n", l, NULL, NULL);
8783 *prefix = NULL;
8784 return(l);
8787 return(NULL);
8789 if (CUR == ':') {
8790 NEXT;
8791 p = l;
8792 l = xmlParseNCName(ctxt);
8793 if (l == NULL) {
8794 xmlChar *tmp;
8796 if (ctxt->instate == XML_PARSER_EOF)
8797 return(NULL);
8798 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8799 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8800 l = xmlParseNmtoken(ctxt);
8801 if (l == NULL) {
8802 if (ctxt->instate == XML_PARSER_EOF)
8803 return(NULL);
8804 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8805 } else {
8806 tmp = xmlBuildQName(l, p, NULL, 0);
8807 xmlFree((char *)l);
8809 p = xmlDictLookup(ctxt->dict, tmp, -1);
8810 if (tmp != NULL) xmlFree(tmp);
8811 *prefix = NULL;
8812 return(p);
8814 if (CUR == ':') {
8815 xmlChar *tmp;
8817 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8818 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8819 NEXT;
8820 tmp = (xmlChar *) xmlParseName(ctxt);
8821 if (tmp != NULL) {
8822 tmp = xmlBuildQName(tmp, l, NULL, 0);
8823 l = xmlDictLookup(ctxt->dict, tmp, -1);
8824 if (tmp != NULL) xmlFree(tmp);
8825 *prefix = p;
8826 return(l);
8828 if (ctxt->instate == XML_PARSER_EOF)
8829 return(NULL);
8830 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8831 l = xmlDictLookup(ctxt->dict, tmp, -1);
8832 if (tmp != NULL) xmlFree(tmp);
8833 *prefix = p;
8834 return(l);
8836 *prefix = p;
8837 } else
8838 *prefix = NULL;
8839 return(l);
8843 * xmlParseQNameAndCompare:
8844 * @ctxt: an XML parser context
8845 * @name: the localname
8846 * @prefix: the prefix, if any.
8848 * parse an XML name and compares for match
8849 * (specialized for endtag parsing)
8851 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8852 * and the name for mismatch
8855 static const xmlChar *
8856 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8857 xmlChar const *prefix) {
8858 const xmlChar *cmp;
8859 const xmlChar *in;
8860 const xmlChar *ret;
8861 const xmlChar *prefix2;
8863 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8865 GROW;
8866 in = ctxt->input->cur;
8868 cmp = prefix;
8869 while (*in != 0 && *in == *cmp) {
8870 ++in;
8871 ++cmp;
8873 if ((*cmp == 0) && (*in == ':')) {
8874 in++;
8875 cmp = name;
8876 while (*in != 0 && *in == *cmp) {
8877 ++in;
8878 ++cmp;
8880 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8881 /* success */
8882 ctxt->input->col += in - ctxt->input->cur;
8883 ctxt->input->cur = in;
8884 return((const xmlChar*) 1);
8888 * all strings coms from the dictionary, equality can be done directly
8890 ret = xmlParseQName (ctxt, &prefix2);
8891 if ((ret == name) && (prefix == prefix2))
8892 return((const xmlChar*) 1);
8893 return ret;
8897 * xmlParseAttValueInternal:
8898 * @ctxt: an XML parser context
8899 * @len: attribute len result
8900 * @alloc: whether the attribute was reallocated as a new string
8901 * @normalize: if 1 then further non-CDATA normalization must be done
8903 * parse a value for an attribute.
8904 * NOTE: if no normalization is needed, the routine will return pointers
8905 * directly from the data buffer.
8907 * 3.3.3 Attribute-Value Normalization:
8908 * Before the value of an attribute is passed to the application or
8909 * checked for validity, the XML processor must normalize it as follows:
8910 * - a character reference is processed by appending the referenced
8911 * character to the attribute value
8912 * - an entity reference is processed by recursively processing the
8913 * replacement text of the entity
8914 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8915 * appending #x20 to the normalized value, except that only a single
8916 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8917 * parsed entity or the literal entity value of an internal parsed entity
8918 * - other characters are processed by appending them to the normalized value
8919 * If the declared value is not CDATA, then the XML processor must further
8920 * process the normalized attribute value by discarding any leading and
8921 * trailing space (#x20) characters, and by replacing sequences of space
8922 * (#x20) characters by a single space (#x20) character.
8923 * All attributes for which no declaration has been read should be treated
8924 * by a non-validating parser as if declared CDATA.
8926 * Returns the AttValue parsed or NULL. The value has to be freed by the
8927 * caller if it was copied, this can be detected by val[*len] == 0.
8930 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8931 const xmlChar *oldbase = ctxt->input->base;\
8932 GROW;\
8933 if (ctxt->instate == XML_PARSER_EOF)\
8934 return(NULL);\
8935 if (oldbase != ctxt->input->base) {\
8936 ptrdiff_t delta = ctxt->input->base - oldbase;\
8937 start = start + delta;\
8938 in = in + delta;\
8940 end = ctxt->input->end;
8942 static xmlChar *
8943 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8944 int normalize)
8946 xmlChar limit = 0;
8947 const xmlChar *in = NULL, *start, *end, *last;
8948 xmlChar *ret = NULL;
8949 int line, col;
8950 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8951 XML_MAX_HUGE_LENGTH :
8952 XML_MAX_TEXT_LENGTH;
8954 GROW;
8955 in = (xmlChar *) CUR_PTR;
8956 line = ctxt->input->line;
8957 col = ctxt->input->col;
8958 if (*in != '"' && *in != '\'') {
8959 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8960 return (NULL);
8962 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8965 * try to handle in this routine the most common case where no
8966 * allocation of a new string is required and where content is
8967 * pure ASCII.
8969 limit = *in++;
8970 col++;
8971 end = ctxt->input->end;
8972 start = in;
8973 if (in >= end) {
8974 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8976 if (normalize) {
8978 * Skip any leading spaces
8980 while ((in < end) && (*in != limit) &&
8981 ((*in == 0x20) || (*in == 0x9) ||
8982 (*in == 0xA) || (*in == 0xD))) {
8983 if (*in == 0xA) {
8984 line++; col = 1;
8985 } else {
8986 col++;
8988 in++;
8989 start = in;
8990 if (in >= end) {
8991 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8992 if ((in - start) > maxLength) {
8993 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8994 "AttValue length too long\n");
8995 return(NULL);
8999 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9000 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9001 col++;
9002 if ((*in++ == 0x20) && (*in == 0x20)) break;
9003 if (in >= end) {
9004 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9005 if ((in - start) > maxLength) {
9006 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9007 "AttValue length too long\n");
9008 return(NULL);
9012 last = in;
9014 * skip the trailing blanks
9016 while ((last[-1] == 0x20) && (last > start)) last--;
9017 while ((in < end) && (*in != limit) &&
9018 ((*in == 0x20) || (*in == 0x9) ||
9019 (*in == 0xA) || (*in == 0xD))) {
9020 if (*in == 0xA) {
9021 line++, col = 1;
9022 } else {
9023 col++;
9025 in++;
9026 if (in >= end) {
9027 const xmlChar *oldbase = ctxt->input->base;
9028 GROW;
9029 if (ctxt->instate == XML_PARSER_EOF)
9030 return(NULL);
9031 if (oldbase != ctxt->input->base) {
9032 ptrdiff_t delta = ctxt->input->base - oldbase;
9033 start = start + delta;
9034 in = in + delta;
9035 last = last + delta;
9037 end = ctxt->input->end;
9038 if ((in - start) > maxLength) {
9039 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9040 "AttValue length too long\n");
9041 return(NULL);
9045 if ((in - start) > maxLength) {
9046 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9047 "AttValue length too long\n");
9048 return(NULL);
9050 if (*in != limit) goto need_complex;
9051 } else {
9052 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9053 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9054 in++;
9055 col++;
9056 if (in >= end) {
9057 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9058 if ((in - start) > maxLength) {
9059 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9060 "AttValue length too long\n");
9061 return(NULL);
9065 last = in;
9066 if ((in - start) > maxLength) {
9067 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9068 "AttValue length too long\n");
9069 return(NULL);
9071 if (*in != limit) goto need_complex;
9073 in++;
9074 col++;
9075 if (len != NULL) {
9076 if (alloc) *alloc = 0;
9077 *len = last - start;
9078 ret = (xmlChar *) start;
9079 } else {
9080 if (alloc) *alloc = 1;
9081 ret = xmlStrndup(start, last - start);
9083 CUR_PTR = in;
9084 ctxt->input->line = line;
9085 ctxt->input->col = col;
9086 return ret;
9087 need_complex:
9088 if (alloc) *alloc = 1;
9089 return xmlParseAttValueComplex(ctxt, len, normalize);
9093 * xmlParseAttribute2:
9094 * @ctxt: an XML parser context
9095 * @pref: the element prefix
9096 * @elem: the element name
9097 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9098 * @value: a xmlChar ** used to store the value of the attribute
9099 * @len: an int * to save the length of the attribute
9100 * @alloc: an int * to indicate if the attribute was allocated
9102 * parse an attribute in the new SAX2 framework.
9104 * Returns the attribute name, and the value in *value, .
9107 static const xmlChar *
9108 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9109 const xmlChar * pref, const xmlChar * elem,
9110 const xmlChar ** prefix, xmlChar ** value,
9111 int *len, int *alloc)
9113 const xmlChar *name;
9114 xmlChar *val, *internal_val = NULL;
9115 int normalize = 0;
9117 *value = NULL;
9118 GROW;
9119 name = xmlParseQName(ctxt, prefix);
9120 if (name == NULL) {
9121 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9122 "error parsing attribute name\n");
9123 return (NULL);
9127 * get the type if needed
9129 if (ctxt->attsSpecial != NULL) {
9130 int type;
9132 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9133 pref, elem, *prefix, name);
9134 if (type != 0)
9135 normalize = 1;
9139 * read the value
9141 SKIP_BLANKS;
9142 if (RAW == '=') {
9143 NEXT;
9144 SKIP_BLANKS;
9145 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9146 if (val == NULL)
9147 return (NULL);
9148 if (normalize) {
9150 * Sometimes a second normalisation pass for spaces is needed
9151 * but that only happens if charrefs or entities references
9152 * have been used in the attribute value, i.e. the attribute
9153 * value have been extracted in an allocated string already.
9155 if (*alloc) {
9156 const xmlChar *val2;
9158 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9159 if ((val2 != NULL) && (val2 != val)) {
9160 xmlFree(val);
9161 val = (xmlChar *) val2;
9165 ctxt->instate = XML_PARSER_CONTENT;
9166 } else {
9167 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9168 "Specification mandates value for attribute %s\n",
9169 name);
9170 return (name);
9173 if (*prefix == ctxt->str_xml) {
9175 * Check that xml:lang conforms to the specification
9176 * No more registered as an error, just generate a warning now
9177 * since this was deprecated in XML second edition
9179 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9180 internal_val = xmlStrndup(val, *len);
9181 if (!xmlCheckLanguageID(internal_val)) {
9182 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9183 "Malformed value for xml:lang : %s\n",
9184 internal_val, NULL);
9189 * Check that xml:space conforms to the specification
9191 if (xmlStrEqual(name, BAD_CAST "space")) {
9192 internal_val = xmlStrndup(val, *len);
9193 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9194 *(ctxt->space) = 0;
9195 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9196 *(ctxt->space) = 1;
9197 else {
9198 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9199 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9200 internal_val, NULL);
9203 if (internal_val) {
9204 xmlFree(internal_val);
9208 *value = val;
9209 return (name);
9212 * xmlParseStartTag2:
9213 * @ctxt: an XML parser context
9215 * Parse a start tag. Always consumes '<'.
9217 * This routine is called when running SAX2 parsing
9219 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9221 * [ WFC: Unique Att Spec ]
9222 * No attribute name may appear more than once in the same start-tag or
9223 * empty-element tag.
9225 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9227 * [ WFC: Unique Att Spec ]
9228 * No attribute name may appear more than once in the same start-tag or
9229 * empty-element tag.
9231 * With namespace:
9233 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9235 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9237 * Returns the element name parsed
9240 static const xmlChar *
9241 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9242 const xmlChar **URI, int *tlen) {
9243 const xmlChar *localname;
9244 const xmlChar *prefix;
9245 const xmlChar *attname;
9246 const xmlChar *aprefix;
9247 const xmlChar *nsname;
9248 xmlChar *attvalue;
9249 const xmlChar **atts = ctxt->atts;
9250 int maxatts = ctxt->maxatts;
9251 int nratts, nbatts, nbdef, inputid;
9252 int i, j, nbNs, attval;
9253 size_t cur;
9254 int nsNr = ctxt->nsNr;
9256 if (RAW != '<') return(NULL);
9257 NEXT1;
9259 cur = ctxt->input->cur - ctxt->input->base;
9260 inputid = ctxt->input->id;
9261 nbatts = 0;
9262 nratts = 0;
9263 nbdef = 0;
9264 nbNs = 0;
9265 attval = 0;
9266 /* Forget any namespaces added during an earlier parse of this element. */
9267 ctxt->nsNr = nsNr;
9269 localname = xmlParseQName(ctxt, &prefix);
9270 if (localname == NULL) {
9271 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9272 "StartTag: invalid element name\n");
9273 return(NULL);
9275 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9278 * Now parse the attributes, it ends up with the ending
9280 * (S Attribute)* S?
9282 SKIP_BLANKS;
9283 GROW;
9285 while (((RAW != '>') &&
9286 ((RAW != '/') || (NXT(1) != '>')) &&
9287 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9288 int len = -1, alloc = 0;
9290 attname = xmlParseAttribute2(ctxt, prefix, localname,
9291 &aprefix, &attvalue, &len, &alloc);
9292 if (attname == NULL) {
9293 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9294 "xmlParseStartTag: problem parsing attributes\n");
9295 break;
9297 if (attvalue == NULL)
9298 goto next_attr;
9299 if (len < 0) len = xmlStrlen(attvalue);
9301 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9302 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9303 xmlURIPtr uri;
9305 if (URL == NULL) {
9306 xmlErrMemory(ctxt, "dictionary allocation failure");
9307 if ((attvalue != NULL) && (alloc != 0))
9308 xmlFree(attvalue);
9309 localname = NULL;
9310 goto done;
9312 if (*URL != 0) {
9313 uri = xmlParseURI((const char *) URL);
9314 if (uri == NULL) {
9315 xmlNsErr(ctxt, XML_WAR_NS_URI,
9316 "xmlns: '%s' is not a valid URI\n",
9317 URL, NULL, NULL);
9318 } else {
9319 if (uri->scheme == NULL) {
9320 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9321 "xmlns: URI %s is not absolute\n",
9322 URL, NULL, NULL);
9324 xmlFreeURI(uri);
9326 if (URL == ctxt->str_xml_ns) {
9327 if (attname != ctxt->str_xml) {
9328 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9329 "xml namespace URI cannot be the default namespace\n",
9330 NULL, NULL, NULL);
9332 goto next_attr;
9334 if ((len == 29) &&
9335 (xmlStrEqual(URL,
9336 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9337 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9338 "reuse of the xmlns namespace name is forbidden\n",
9339 NULL, NULL, NULL);
9340 goto next_attr;
9344 * check that it's not a defined namespace
9346 for (j = 1;j <= nbNs;j++)
9347 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9348 break;
9349 if (j <= nbNs)
9350 xmlErrAttributeDup(ctxt, NULL, attname);
9351 else
9352 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9354 } else if (aprefix == ctxt->str_xmlns) {
9355 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9356 xmlURIPtr uri;
9358 if (attname == ctxt->str_xml) {
9359 if (URL != ctxt->str_xml_ns) {
9360 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9361 "xml namespace prefix mapped to wrong URI\n",
9362 NULL, NULL, NULL);
9365 * Do not keep a namespace definition node
9367 goto next_attr;
9369 if (URL == ctxt->str_xml_ns) {
9370 if (attname != ctxt->str_xml) {
9371 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9372 "xml namespace URI mapped to wrong prefix\n",
9373 NULL, NULL, NULL);
9375 goto next_attr;
9377 if (attname == ctxt->str_xmlns) {
9378 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9379 "redefinition of the xmlns prefix is forbidden\n",
9380 NULL, NULL, NULL);
9381 goto next_attr;
9383 if ((len == 29) &&
9384 (xmlStrEqual(URL,
9385 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9386 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9387 "reuse of the xmlns namespace name is forbidden\n",
9388 NULL, NULL, NULL);
9389 goto next_attr;
9391 if ((URL == NULL) || (URL[0] == 0)) {
9392 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9393 "xmlns:%s: Empty XML namespace is not allowed\n",
9394 attname, NULL, NULL);
9395 goto next_attr;
9396 } else {
9397 uri = xmlParseURI((const char *) URL);
9398 if (uri == NULL) {
9399 xmlNsErr(ctxt, XML_WAR_NS_URI,
9400 "xmlns:%s: '%s' is not a valid URI\n",
9401 attname, URL, NULL);
9402 } else {
9403 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9404 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9405 "xmlns:%s: URI %s is not absolute\n",
9406 attname, URL, NULL);
9408 xmlFreeURI(uri);
9413 * check that it's not a defined namespace
9415 for (j = 1;j <= nbNs;j++)
9416 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9417 break;
9418 if (j <= nbNs)
9419 xmlErrAttributeDup(ctxt, aprefix, attname);
9420 else
9421 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9423 } else {
9425 * Add the pair to atts
9427 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9428 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9429 goto next_attr;
9431 maxatts = ctxt->maxatts;
9432 atts = ctxt->atts;
9434 ctxt->attallocs[nratts++] = alloc;
9435 atts[nbatts++] = attname;
9436 atts[nbatts++] = aprefix;
9438 * The namespace URI field is used temporarily to point at the
9439 * base of the current input buffer for non-alloced attributes.
9440 * When the input buffer is reallocated, all the pointers become
9441 * invalid, but they can be reconstructed later.
9443 if (alloc)
9444 atts[nbatts++] = NULL;
9445 else
9446 atts[nbatts++] = ctxt->input->base;
9447 atts[nbatts++] = attvalue;
9448 attvalue += len;
9449 atts[nbatts++] = attvalue;
9451 * tag if some deallocation is needed
9453 if (alloc != 0) attval = 1;
9454 attvalue = NULL; /* moved into atts */
9457 next_attr:
9458 if ((attvalue != NULL) && (alloc != 0)) {
9459 xmlFree(attvalue);
9460 attvalue = NULL;
9463 GROW
9464 if (ctxt->instate == XML_PARSER_EOF)
9465 break;
9466 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9467 break;
9468 if (SKIP_BLANKS == 0) {
9469 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9470 "attributes construct error\n");
9471 break;
9473 GROW;
9476 if (ctxt->input->id != inputid) {
9477 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9478 "Unexpected change of input\n");
9479 localname = NULL;
9480 goto done;
9483 /* Reconstruct attribute value pointers. */
9484 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9485 if (atts[i+2] != NULL) {
9487 * Arithmetic on dangling pointers is technically undefined
9488 * behavior, but well...
9490 const xmlChar *old = atts[i+2];
9491 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9492 atts[i+3] = ctxt->input->base + (atts[i+3] - old); /* value */
9493 atts[i+4] = ctxt->input->base + (atts[i+4] - old); /* valuend */
9498 * The attributes defaulting
9500 if (ctxt->attsDefault != NULL) {
9501 xmlDefAttrsPtr defaults;
9503 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9504 if (defaults != NULL) {
9505 for (i = 0;i < defaults->nbAttrs;i++) {
9506 attname = defaults->values[5 * i];
9507 aprefix = defaults->values[5 * i + 1];
9510 * special work for namespaces defaulted defs
9512 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9514 * check that it's not a defined namespace
9516 for (j = 1;j <= nbNs;j++)
9517 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9518 break;
9519 if (j <= nbNs) continue;
9521 nsname = xmlGetNamespace(ctxt, NULL);
9522 if (nsname != defaults->values[5 * i + 2]) {
9523 if (nsPush(ctxt, NULL,
9524 defaults->values[5 * i + 2]) > 0)
9525 nbNs++;
9527 } else if (aprefix == ctxt->str_xmlns) {
9529 * check that it's not a defined namespace
9531 for (j = 1;j <= nbNs;j++)
9532 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9533 break;
9534 if (j <= nbNs) continue;
9536 nsname = xmlGetNamespace(ctxt, attname);
9537 if (nsname != defaults->values[5 * i + 2]) {
9538 if (nsPush(ctxt, attname,
9539 defaults->values[5 * i + 2]) > 0)
9540 nbNs++;
9542 } else {
9544 * check that it's not a defined attribute
9546 for (j = 0;j < nbatts;j+=5) {
9547 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9548 break;
9550 if (j < nbatts) continue;
9552 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9553 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9554 localname = NULL;
9555 goto done;
9557 maxatts = ctxt->maxatts;
9558 atts = ctxt->atts;
9560 atts[nbatts++] = attname;
9561 atts[nbatts++] = aprefix;
9562 if (aprefix == NULL)
9563 atts[nbatts++] = NULL;
9564 else
9565 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9566 atts[nbatts++] = defaults->values[5 * i + 2];
9567 atts[nbatts++] = defaults->values[5 * i + 3];
9568 if ((ctxt->standalone == 1) &&
9569 (defaults->values[5 * i + 4] != NULL)) {
9570 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9571 "standalone: attribute %s on %s defaulted from external subset\n",
9572 attname, localname);
9574 nbdef++;
9581 * The attributes checkings
9583 for (i = 0; i < nbatts;i += 5) {
9585 * The default namespace does not apply to attribute names.
9587 if (atts[i + 1] != NULL) {
9588 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9589 if (nsname == NULL) {
9590 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9591 "Namespace prefix %s for %s on %s is not defined\n",
9592 atts[i + 1], atts[i], localname);
9594 atts[i + 2] = nsname;
9595 } else
9596 nsname = NULL;
9598 * [ WFC: Unique Att Spec ]
9599 * No attribute name may appear more than once in the same
9600 * start-tag or empty-element tag.
9601 * As extended by the Namespace in XML REC.
9603 for (j = 0; j < i;j += 5) {
9604 if (atts[i] == atts[j]) {
9605 if (atts[i+1] == atts[j+1]) {
9606 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9607 break;
9609 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9610 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9611 "Namespaced Attribute %s in '%s' redefined\n",
9612 atts[i], nsname, NULL);
9613 break;
9619 nsname = xmlGetNamespace(ctxt, prefix);
9620 if ((prefix != NULL) && (nsname == NULL)) {
9621 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9622 "Namespace prefix %s on %s is not defined\n",
9623 prefix, localname, NULL);
9625 *pref = prefix;
9626 *URI = nsname;
9629 * SAX: Start of Element !
9631 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9632 (!ctxt->disableSAX)) {
9633 if (nbNs > 0)
9634 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9635 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9636 nbatts / 5, nbdef, atts);
9637 else
9638 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9639 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9642 done:
9644 * Free up attribute allocated strings if needed
9646 if (attval != 0) {
9647 for (i = 3,j = 0; j < nratts;i += 5,j++)
9648 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9649 xmlFree((xmlChar *) atts[i]);
9652 return(localname);
9656 * xmlParseEndTag2:
9657 * @ctxt: an XML parser context
9658 * @line: line of the start tag
9659 * @nsNr: number of namespaces on the start tag
9661 * Parse an end tag. Always consumes '</'.
9663 * [42] ETag ::= '</' Name S? '>'
9665 * With namespace
9667 * [NS 9] ETag ::= '</' QName S? '>'
9670 static void
9671 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9672 const xmlChar *name;
9674 GROW;
9675 if ((RAW != '<') || (NXT(1) != '/')) {
9676 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9677 return;
9679 SKIP(2);
9681 if (tag->prefix == NULL)
9682 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9683 else
9684 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9687 * We should definitely be at the ending "S? '>'" part
9689 GROW;
9690 if (ctxt->instate == XML_PARSER_EOF)
9691 return;
9692 SKIP_BLANKS;
9693 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9694 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9695 } else
9696 NEXT1;
9699 * [ WFC: Element Type Match ]
9700 * The Name in an element's end-tag must match the element type in the
9701 * start-tag.
9704 if (name != (xmlChar*)1) {
9705 if (name == NULL) name = BAD_CAST "unparsable";
9706 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9707 "Opening and ending tag mismatch: %s line %d and %s\n",
9708 ctxt->name, tag->line, name);
9712 * SAX: End of Tag
9714 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9715 (!ctxt->disableSAX))
9716 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9717 tag->URI);
9719 spacePop(ctxt);
9720 if (tag->nsNr != 0)
9721 nsPop(ctxt, tag->nsNr);
9725 * xmlParseCDSect:
9726 * @ctxt: an XML parser context
9728 * DEPRECATED: Internal function, don't use.
9730 * Parse escaped pure raw content. Always consumes '<!['.
9732 * [18] CDSect ::= CDStart CData CDEnd
9734 * [19] CDStart ::= '<![CDATA['
9736 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9738 * [21] CDEnd ::= ']]>'
9740 void
9741 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9742 xmlChar *buf = NULL;
9743 int len = 0;
9744 int size = XML_PARSER_BUFFER_SIZE;
9745 int r, rl;
9746 int s, sl;
9747 int cur, l;
9748 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9749 XML_MAX_HUGE_LENGTH :
9750 XML_MAX_TEXT_LENGTH;
9752 if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9753 return;
9754 SKIP(3);
9756 if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9757 return;
9758 SKIP(6);
9760 ctxt->instate = XML_PARSER_CDATA_SECTION;
9761 r = CUR_CHAR(rl);
9762 if (!IS_CHAR(r)) {
9763 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9764 goto out;
9766 NEXTL(rl);
9767 s = CUR_CHAR(sl);
9768 if (!IS_CHAR(s)) {
9769 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9770 goto out;
9772 NEXTL(sl);
9773 cur = CUR_CHAR(l);
9774 buf = (xmlChar *) xmlMallocAtomic(size);
9775 if (buf == NULL) {
9776 xmlErrMemory(ctxt, NULL);
9777 goto out;
9779 while (IS_CHAR(cur) &&
9780 ((r != ']') || (s != ']') || (cur != '>'))) {
9781 if (len + 5 >= size) {
9782 xmlChar *tmp;
9784 tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9785 if (tmp == NULL) {
9786 xmlErrMemory(ctxt, NULL);
9787 goto out;
9789 buf = tmp;
9790 size *= 2;
9792 COPY_BUF(rl,buf,len,r);
9793 if (len > maxLength) {
9794 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9795 "CData section too big found\n");
9796 goto out;
9798 r = s;
9799 rl = sl;
9800 s = cur;
9801 sl = l;
9802 NEXTL(l);
9803 cur = CUR_CHAR(l);
9805 buf[len] = 0;
9806 if (ctxt->instate == XML_PARSER_EOF) {
9807 xmlFree(buf);
9808 return;
9810 if (cur != '>') {
9811 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9812 "CData section not finished\n%.50s\n", buf);
9813 goto out;
9815 NEXTL(l);
9818 * OK the buffer is to be consumed as cdata.
9820 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9821 if (ctxt->sax->cdataBlock != NULL)
9822 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9823 else if (ctxt->sax->characters != NULL)
9824 ctxt->sax->characters(ctxt->userData, buf, len);
9827 out:
9828 if (ctxt->instate != XML_PARSER_EOF)
9829 ctxt->instate = XML_PARSER_CONTENT;
9830 xmlFree(buf);
9834 * xmlParseContentInternal:
9835 * @ctxt: an XML parser context
9837 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9838 * unexpected EOF to the caller.
9841 static void
9842 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9843 int nameNr = ctxt->nameNr;
9845 GROW;
9846 while ((RAW != 0) &&
9847 (ctxt->instate != XML_PARSER_EOF)) {
9848 const xmlChar *cur = ctxt->input->cur;
9851 * First case : a Processing Instruction.
9853 if ((*cur == '<') && (cur[1] == '?')) {
9854 xmlParsePI(ctxt);
9858 * Second case : a CDSection
9860 /* 2.6.0 test was *cur not RAW */
9861 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9862 xmlParseCDSect(ctxt);
9866 * Third case : a comment
9868 else if ((*cur == '<') && (NXT(1) == '!') &&
9869 (NXT(2) == '-') && (NXT(3) == '-')) {
9870 xmlParseComment(ctxt);
9871 ctxt->instate = XML_PARSER_CONTENT;
9875 * Fourth case : a sub-element.
9877 else if (*cur == '<') {
9878 if (NXT(1) == '/') {
9879 if (ctxt->nameNr <= nameNr)
9880 break;
9881 xmlParseElementEnd(ctxt);
9882 } else {
9883 xmlParseElementStart(ctxt);
9888 * Fifth case : a reference. If if has not been resolved,
9889 * parsing returns it's Name, create the node
9892 else if (*cur == '&') {
9893 xmlParseReference(ctxt);
9897 * Last case, text. Note that References are handled directly.
9899 else {
9900 xmlParseCharDataInternal(ctxt, 0);
9903 SHRINK;
9904 GROW;
9909 * xmlParseContent:
9910 * @ctxt: an XML parser context
9912 * Parse a content sequence. Stops at EOF or '</'.
9914 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9917 void
9918 xmlParseContent(xmlParserCtxtPtr ctxt) {
9919 int nameNr = ctxt->nameNr;
9921 xmlParseContentInternal(ctxt);
9923 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9924 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9925 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9926 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9927 "Premature end of data in tag %s line %d\n",
9928 name, line, NULL);
9933 * xmlParseElement:
9934 * @ctxt: an XML parser context
9936 * DEPRECATED: Internal function, don't use.
9938 * parse an XML element
9940 * [39] element ::= EmptyElemTag | STag content ETag
9942 * [ WFC: Element Type Match ]
9943 * The Name in an element's end-tag must match the element type in the
9944 * start-tag.
9948 void
9949 xmlParseElement(xmlParserCtxtPtr ctxt) {
9950 if (xmlParseElementStart(ctxt) != 0)
9951 return;
9953 xmlParseContentInternal(ctxt);
9954 if (ctxt->instate == XML_PARSER_EOF)
9955 return;
9957 if (CUR == 0) {
9958 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9959 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9960 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9961 "Premature end of data in tag %s line %d\n",
9962 name, line, NULL);
9963 return;
9966 xmlParseElementEnd(ctxt);
9970 * xmlParseElementStart:
9971 * @ctxt: an XML parser context
9973 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9974 * opening tag was parsed, 1 if an empty element was parsed.
9976 * Always consumes '<'.
9978 static int
9979 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9980 const xmlChar *name;
9981 const xmlChar *prefix = NULL;
9982 const xmlChar *URI = NULL;
9983 xmlParserNodeInfo node_info;
9984 int line, tlen = 0;
9985 xmlNodePtr cur;
9986 int nsNr = ctxt->nsNr;
9988 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9989 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9990 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9991 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9992 xmlParserMaxDepth);
9993 xmlHaltParser(ctxt);
9994 return(-1);
9997 /* Capture start position */
9998 if (ctxt->record_info) {
9999 node_info.begin_pos = ctxt->input->consumed +
10000 (CUR_PTR - ctxt->input->base);
10001 node_info.begin_line = ctxt->input->line;
10004 if (ctxt->spaceNr == 0)
10005 spacePush(ctxt, -1);
10006 else if (*ctxt->space == -2)
10007 spacePush(ctxt, -1);
10008 else
10009 spacePush(ctxt, *ctxt->space);
10011 line = ctxt->input->line;
10012 #ifdef LIBXML_SAX1_ENABLED
10013 if (ctxt->sax2)
10014 #endif /* LIBXML_SAX1_ENABLED */
10015 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10016 #ifdef LIBXML_SAX1_ENABLED
10017 else
10018 name = xmlParseStartTag(ctxt);
10019 #endif /* LIBXML_SAX1_ENABLED */
10020 if (ctxt->instate == XML_PARSER_EOF)
10021 return(-1);
10022 if (name == NULL) {
10023 spacePop(ctxt);
10024 return(-1);
10026 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10027 cur = ctxt->node;
10029 #ifdef LIBXML_VALID_ENABLED
10031 * [ VC: Root Element Type ]
10032 * The Name in the document type declaration must match the element
10033 * type of the root element.
10035 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10036 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10037 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10038 #endif /* LIBXML_VALID_ENABLED */
10041 * Check for an Empty Element.
10043 if ((RAW == '/') && (NXT(1) == '>')) {
10044 SKIP(2);
10045 if (ctxt->sax2) {
10046 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10047 (!ctxt->disableSAX))
10048 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10049 #ifdef LIBXML_SAX1_ENABLED
10050 } else {
10051 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10052 (!ctxt->disableSAX))
10053 ctxt->sax->endElement(ctxt->userData, name);
10054 #endif /* LIBXML_SAX1_ENABLED */
10056 namePop(ctxt);
10057 spacePop(ctxt);
10058 if (nsNr != ctxt->nsNr)
10059 nsPop(ctxt, ctxt->nsNr - nsNr);
10060 if (cur != NULL && ctxt->record_info) {
10061 node_info.node = cur;
10062 node_info.end_pos = ctxt->input->consumed +
10063 (CUR_PTR - ctxt->input->base);
10064 node_info.end_line = ctxt->input->line;
10065 xmlParserAddNodeInfo(ctxt, &node_info);
10067 return(1);
10069 if (RAW == '>') {
10070 NEXT1;
10071 if (cur != NULL && ctxt->record_info) {
10072 node_info.node = cur;
10073 node_info.end_pos = 0;
10074 node_info.end_line = 0;
10075 xmlParserAddNodeInfo(ctxt, &node_info);
10077 } else {
10078 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10079 "Couldn't find end of Start Tag %s line %d\n",
10080 name, line, NULL);
10083 * end of parsing of this node.
10085 nodePop(ctxt);
10086 namePop(ctxt);
10087 spacePop(ctxt);
10088 if (nsNr != ctxt->nsNr)
10089 nsPop(ctxt, ctxt->nsNr - nsNr);
10090 return(-1);
10093 return(0);
10097 * xmlParseElementEnd:
10098 * @ctxt: an XML parser context
10100 * Parse the end of an XML element. Always consumes '</'.
10102 static void
10103 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10104 xmlNodePtr cur = ctxt->node;
10106 if (ctxt->nameNr <= 0) {
10107 if ((RAW == '<') && (NXT(1) == '/'))
10108 SKIP(2);
10109 return;
10113 * parse the end of tag: '</' should be here.
10115 if (ctxt->sax2) {
10116 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10117 namePop(ctxt);
10119 #ifdef LIBXML_SAX1_ENABLED
10120 else
10121 xmlParseEndTag1(ctxt, 0);
10122 #endif /* LIBXML_SAX1_ENABLED */
10125 * Capture end position
10127 if (cur != NULL && ctxt->record_info) {
10128 xmlParserNodeInfoPtr node_info;
10130 node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10131 if (node_info != NULL) {
10132 node_info->end_pos = ctxt->input->consumed +
10133 (CUR_PTR - ctxt->input->base);
10134 node_info->end_line = ctxt->input->line;
10140 * xmlParseVersionNum:
10141 * @ctxt: an XML parser context
10143 * DEPRECATED: Internal function, don't use.
10145 * parse the XML version value.
10147 * [26] VersionNum ::= '1.' [0-9]+
10149 * In practice allow [0-9].[0-9]+ at that level
10151 * Returns the string giving the XML version number, or NULL
10153 xmlChar *
10154 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10155 xmlChar *buf = NULL;
10156 int len = 0;
10157 int size = 10;
10158 xmlChar cur;
10160 buf = (xmlChar *) xmlMallocAtomic(size);
10161 if (buf == NULL) {
10162 xmlErrMemory(ctxt, NULL);
10163 return(NULL);
10165 cur = CUR;
10166 if (!((cur >= '0') && (cur <= '9'))) {
10167 xmlFree(buf);
10168 return(NULL);
10170 buf[len++] = cur;
10171 NEXT;
10172 cur=CUR;
10173 if (cur != '.') {
10174 xmlFree(buf);
10175 return(NULL);
10177 buf[len++] = cur;
10178 NEXT;
10179 cur=CUR;
10180 while ((cur >= '0') && (cur <= '9')) {
10181 if (len + 1 >= size) {
10182 xmlChar *tmp;
10184 size *= 2;
10185 tmp = (xmlChar *) xmlRealloc(buf, size);
10186 if (tmp == NULL) {
10187 xmlFree(buf);
10188 xmlErrMemory(ctxt, NULL);
10189 return(NULL);
10191 buf = tmp;
10193 buf[len++] = cur;
10194 NEXT;
10195 cur=CUR;
10197 buf[len] = 0;
10198 return(buf);
10202 * xmlParseVersionInfo:
10203 * @ctxt: an XML parser context
10205 * DEPRECATED: Internal function, don't use.
10207 * parse the XML version.
10209 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10211 * [25] Eq ::= S? '=' S?
10213 * Returns the version string, e.g. "1.0"
10216 xmlChar *
10217 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10218 xmlChar *version = NULL;
10220 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10221 SKIP(7);
10222 SKIP_BLANKS;
10223 if (RAW != '=') {
10224 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10225 return(NULL);
10227 NEXT;
10228 SKIP_BLANKS;
10229 if (RAW == '"') {
10230 NEXT;
10231 version = xmlParseVersionNum(ctxt);
10232 if (RAW != '"') {
10233 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10234 } else
10235 NEXT;
10236 } else if (RAW == '\''){
10237 NEXT;
10238 version = xmlParseVersionNum(ctxt);
10239 if (RAW != '\'') {
10240 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10241 } else
10242 NEXT;
10243 } else {
10244 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10247 return(version);
10251 * xmlParseEncName:
10252 * @ctxt: an XML parser context
10254 * DEPRECATED: Internal function, don't use.
10256 * parse the XML encoding name
10258 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10260 * Returns the encoding name value or NULL
10262 xmlChar *
10263 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10264 xmlChar *buf = NULL;
10265 int len = 0;
10266 int size = 10;
10267 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10268 XML_MAX_TEXT_LENGTH :
10269 XML_MAX_NAME_LENGTH;
10270 xmlChar cur;
10272 cur = CUR;
10273 if (((cur >= 'a') && (cur <= 'z')) ||
10274 ((cur >= 'A') && (cur <= 'Z'))) {
10275 buf = (xmlChar *) xmlMallocAtomic(size);
10276 if (buf == NULL) {
10277 xmlErrMemory(ctxt, NULL);
10278 return(NULL);
10281 buf[len++] = cur;
10282 NEXT;
10283 cur = CUR;
10284 while (((cur >= 'a') && (cur <= 'z')) ||
10285 ((cur >= 'A') && (cur <= 'Z')) ||
10286 ((cur >= '0') && (cur <= '9')) ||
10287 (cur == '.') || (cur == '_') ||
10288 (cur == '-')) {
10289 if (len + 1 >= size) {
10290 xmlChar *tmp;
10292 size *= 2;
10293 tmp = (xmlChar *) xmlRealloc(buf, size);
10294 if (tmp == NULL) {
10295 xmlErrMemory(ctxt, NULL);
10296 xmlFree(buf);
10297 return(NULL);
10299 buf = tmp;
10301 buf[len++] = cur;
10302 if (len > maxLength) {
10303 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10304 xmlFree(buf);
10305 return(NULL);
10307 NEXT;
10308 cur = CUR;
10310 buf[len] = 0;
10311 } else {
10312 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10314 return(buf);
10318 * xmlParseEncodingDecl:
10319 * @ctxt: an XML parser context
10321 * DEPRECATED: Internal function, don't use.
10323 * parse the XML encoding declaration
10325 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10327 * this setups the conversion filters.
10329 * Returns the encoding value or NULL
10332 const xmlChar *
10333 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10334 xmlChar *encoding = NULL;
10336 SKIP_BLANKS;
10337 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10338 SKIP(8);
10339 SKIP_BLANKS;
10340 if (RAW != '=') {
10341 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10342 return(NULL);
10344 NEXT;
10345 SKIP_BLANKS;
10346 if (RAW == '"') {
10347 NEXT;
10348 encoding = xmlParseEncName(ctxt);
10349 if (RAW != '"') {
10350 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10351 xmlFree((xmlChar *) encoding);
10352 return(NULL);
10353 } else
10354 NEXT;
10355 } else if (RAW == '\''){
10356 NEXT;
10357 encoding = xmlParseEncName(ctxt);
10358 if (RAW != '\'') {
10359 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10360 xmlFree((xmlChar *) encoding);
10361 return(NULL);
10362 } else
10363 NEXT;
10364 } else {
10365 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10369 * Non standard parsing, allowing the user to ignore encoding
10371 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10372 xmlFree((xmlChar *) encoding);
10373 return(NULL);
10377 * UTF-16 encoding switch has already taken place at this stage,
10378 * more over the little-endian/big-endian selection is already done
10380 if ((encoding != NULL) &&
10381 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10382 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10384 * If no encoding was passed to the parser, that we are
10385 * using UTF-16 and no decoder is present i.e. the
10386 * document is apparently UTF-8 compatible, then raise an
10387 * encoding mismatch fatal error
10389 if ((ctxt->encoding == NULL) &&
10390 (ctxt->input->buf != NULL) &&
10391 (ctxt->input->buf->encoder == NULL)) {
10392 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10393 "Document labelled UTF-16 but has UTF-8 content\n");
10395 if (ctxt->encoding != NULL)
10396 xmlFree((xmlChar *) ctxt->encoding);
10397 ctxt->encoding = encoding;
10400 * UTF-8 encoding is handled natively
10402 else if ((encoding != NULL) &&
10403 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10404 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10405 /* TODO: Check for encoding mismatch. */
10406 if (ctxt->encoding != NULL)
10407 xmlFree((xmlChar *) ctxt->encoding);
10408 ctxt->encoding = encoding;
10410 else if (encoding != NULL) {
10411 xmlCharEncodingHandlerPtr handler;
10413 if (ctxt->input->encoding != NULL)
10414 xmlFree((xmlChar *) ctxt->input->encoding);
10415 ctxt->input->encoding = encoding;
10417 handler = xmlFindCharEncodingHandler((const char *) encoding);
10418 if (handler != NULL) {
10419 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10420 /* failed to convert */
10421 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10422 return(NULL);
10424 } else {
10425 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10426 "Unsupported encoding %s\n", encoding);
10427 return(NULL);
10431 return(encoding);
10435 * xmlParseSDDecl:
10436 * @ctxt: an XML parser context
10438 * DEPRECATED: Internal function, don't use.
10440 * parse the XML standalone declaration
10442 * [32] SDDecl ::= S 'standalone' Eq
10443 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10445 * [ VC: Standalone Document Declaration ]
10446 * TODO The standalone document declaration must have the value "no"
10447 * if any external markup declarations contain declarations of:
10448 * - attributes with default values, if elements to which these
10449 * attributes apply appear in the document without specifications
10450 * of values for these attributes, or
10451 * - entities (other than amp, lt, gt, apos, quot), if references
10452 * to those entities appear in the document, or
10453 * - attributes with values subject to normalization, where the
10454 * attribute appears in the document with a value which will change
10455 * as a result of normalization, or
10456 * - element types with element content, if white space occurs directly
10457 * within any instance of those types.
10459 * Returns:
10460 * 1 if standalone="yes"
10461 * 0 if standalone="no"
10462 * -2 if standalone attribute is missing or invalid
10463 * (A standalone value of -2 means that the XML declaration was found,
10464 * but no value was specified for the standalone attribute).
10468 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10469 int standalone = -2;
10471 SKIP_BLANKS;
10472 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10473 SKIP(10);
10474 SKIP_BLANKS;
10475 if (RAW != '=') {
10476 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10477 return(standalone);
10479 NEXT;
10480 SKIP_BLANKS;
10481 if (RAW == '\''){
10482 NEXT;
10483 if ((RAW == 'n') && (NXT(1) == 'o')) {
10484 standalone = 0;
10485 SKIP(2);
10486 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10487 (NXT(2) == 's')) {
10488 standalone = 1;
10489 SKIP(3);
10490 } else {
10491 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10493 if (RAW != '\'') {
10494 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10495 } else
10496 NEXT;
10497 } else if (RAW == '"'){
10498 NEXT;
10499 if ((RAW == 'n') && (NXT(1) == 'o')) {
10500 standalone = 0;
10501 SKIP(2);
10502 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10503 (NXT(2) == 's')) {
10504 standalone = 1;
10505 SKIP(3);
10506 } else {
10507 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10509 if (RAW != '"') {
10510 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10511 } else
10512 NEXT;
10513 } else {
10514 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10517 return(standalone);
10521 * xmlParseXMLDecl:
10522 * @ctxt: an XML parser context
10524 * DEPRECATED: Internal function, don't use.
10526 * parse an XML declaration header
10528 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10531 void
10532 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10533 xmlChar *version;
10536 * This value for standalone indicates that the document has an
10537 * XML declaration but it does not have a standalone attribute.
10538 * It will be overwritten later if a standalone attribute is found.
10540 ctxt->input->standalone = -2;
10543 * We know that '<?xml' is here.
10545 SKIP(5);
10547 if (!IS_BLANK_CH(RAW)) {
10548 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10549 "Blank needed after '<?xml'\n");
10551 SKIP_BLANKS;
10554 * We must have the VersionInfo here.
10556 version = xmlParseVersionInfo(ctxt);
10557 if (version == NULL) {
10558 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10559 } else {
10560 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10562 * Changed here for XML-1.0 5th edition
10564 if (ctxt->options & XML_PARSE_OLD10) {
10565 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10566 "Unsupported version '%s'\n",
10567 version);
10568 } else {
10569 if ((version[0] == '1') && ((version[1] == '.'))) {
10570 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10571 "Unsupported version '%s'\n",
10572 version, NULL);
10573 } else {
10574 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10575 "Unsupported version '%s'\n",
10576 version);
10580 if (ctxt->version != NULL)
10581 xmlFree((void *) ctxt->version);
10582 ctxt->version = version;
10586 * We may have the encoding declaration
10588 if (!IS_BLANK_CH(RAW)) {
10589 if ((RAW == '?') && (NXT(1) == '>')) {
10590 SKIP(2);
10591 return;
10593 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10595 xmlParseEncodingDecl(ctxt);
10596 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10597 (ctxt->instate == XML_PARSER_EOF)) {
10599 * The XML REC instructs us to stop parsing right here
10601 return;
10605 * We may have the standalone status.
10607 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10608 if ((RAW == '?') && (NXT(1) == '>')) {
10609 SKIP(2);
10610 return;
10612 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10616 * We can grow the input buffer freely at that point
10618 GROW;
10620 SKIP_BLANKS;
10621 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10623 SKIP_BLANKS;
10624 if ((RAW == '?') && (NXT(1) == '>')) {
10625 SKIP(2);
10626 } else if (RAW == '>') {
10627 /* Deprecated old WD ... */
10628 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10629 NEXT;
10630 } else {
10631 int c;
10633 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10634 while ((c = CUR) != 0) {
10635 NEXT;
10636 if (c == '>')
10637 break;
10643 * xmlParseMisc:
10644 * @ctxt: an XML parser context
10646 * DEPRECATED: Internal function, don't use.
10648 * parse an XML Misc* optional field.
10650 * [27] Misc ::= Comment | PI | S
10653 void
10654 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10655 while (ctxt->instate != XML_PARSER_EOF) {
10656 SKIP_BLANKS;
10657 GROW;
10658 if ((RAW == '<') && (NXT(1) == '?')) {
10659 xmlParsePI(ctxt);
10660 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10661 xmlParseComment(ctxt);
10662 } else {
10663 break;
10669 * xmlParseDocument:
10670 * @ctxt: an XML parser context
10672 * parse an XML document (and build a tree if using the standard SAX
10673 * interface).
10675 * [1] document ::= prolog element Misc*
10677 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10679 * Returns 0, -1 in case of error. the parser context is augmented
10680 * as a result of the parsing.
10684 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10685 xmlChar start[4];
10686 xmlCharEncoding enc;
10688 xmlInitParser();
10690 if ((ctxt == NULL) || (ctxt->input == NULL))
10691 return(-1);
10693 GROW;
10696 * SAX: detecting the level.
10698 xmlDetectSAX2(ctxt);
10701 * SAX: beginning of the document processing.
10703 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10704 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10705 if (ctxt->instate == XML_PARSER_EOF)
10706 return(-1);
10708 if ((ctxt->encoding == NULL) &&
10709 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10711 * Get the 4 first bytes and decode the charset
10712 * if enc != XML_CHAR_ENCODING_NONE
10713 * plug some encoding conversion routines.
10715 start[0] = RAW;
10716 start[1] = NXT(1);
10717 start[2] = NXT(2);
10718 start[3] = NXT(3);
10719 enc = xmlDetectCharEncoding(&start[0], 4);
10720 if (enc != XML_CHAR_ENCODING_NONE) {
10721 xmlSwitchEncoding(ctxt, enc);
10726 if (CUR == 0) {
10727 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10728 return(-1);
10731 GROW;
10732 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10735 * Note that we will switch encoding on the fly.
10737 xmlParseXMLDecl(ctxt);
10738 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10739 (ctxt->instate == XML_PARSER_EOF)) {
10741 * The XML REC instructs us to stop parsing right here
10743 return(-1);
10745 ctxt->standalone = ctxt->input->standalone;
10746 SKIP_BLANKS;
10747 } else {
10748 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10750 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10751 ctxt->sax->startDocument(ctxt->userData);
10752 if (ctxt->instate == XML_PARSER_EOF)
10753 return(-1);
10754 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10755 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10756 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10760 * The Misc part of the Prolog
10762 xmlParseMisc(ctxt);
10765 * Then possibly doc type declaration(s) and more Misc
10766 * (doctypedecl Misc*)?
10768 GROW;
10769 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10771 ctxt->inSubset = 1;
10772 xmlParseDocTypeDecl(ctxt);
10773 if (RAW == '[') {
10774 ctxt->instate = XML_PARSER_DTD;
10775 xmlParseInternalSubset(ctxt);
10776 if (ctxt->instate == XML_PARSER_EOF)
10777 return(-1);
10781 * Create and update the external subset.
10783 ctxt->inSubset = 2;
10784 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10785 (!ctxt->disableSAX))
10786 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10787 ctxt->extSubSystem, ctxt->extSubURI);
10788 if (ctxt->instate == XML_PARSER_EOF)
10789 return(-1);
10790 ctxt->inSubset = 0;
10792 xmlCleanSpecialAttr(ctxt);
10794 ctxt->instate = XML_PARSER_PROLOG;
10795 xmlParseMisc(ctxt);
10799 * Time to start parsing the tree itself
10801 GROW;
10802 if (RAW != '<') {
10803 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10804 "Start tag expected, '<' not found\n");
10805 } else {
10806 ctxt->instate = XML_PARSER_CONTENT;
10807 xmlParseElement(ctxt);
10808 ctxt->instate = XML_PARSER_EPILOG;
10812 * The Misc part at the end
10814 xmlParseMisc(ctxt);
10816 if (RAW != 0) {
10817 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10819 ctxt->instate = XML_PARSER_EOF;
10823 * SAX: end of the document processing.
10825 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10826 ctxt->sax->endDocument(ctxt->userData);
10829 * Remove locally kept entity definitions if the tree was not built
10831 if ((ctxt->myDoc != NULL) &&
10832 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10833 xmlFreeDoc(ctxt->myDoc);
10834 ctxt->myDoc = NULL;
10837 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10838 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10839 if (ctxt->valid)
10840 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10841 if (ctxt->nsWellFormed)
10842 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10843 if (ctxt->options & XML_PARSE_OLD10)
10844 ctxt->myDoc->properties |= XML_DOC_OLD10;
10846 if (! ctxt->wellFormed) {
10847 ctxt->valid = 0;
10848 return(-1);
10850 return(0);
10854 * xmlParseExtParsedEnt:
10855 * @ctxt: an XML parser context
10857 * parse a general parsed entity
10858 * An external general parsed entity is well-formed if it matches the
10859 * production labeled extParsedEnt.
10861 * [78] extParsedEnt ::= TextDecl? content
10863 * Returns 0, -1 in case of error. the parser context is augmented
10864 * as a result of the parsing.
10868 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10869 xmlChar start[4];
10870 xmlCharEncoding enc;
10872 if ((ctxt == NULL) || (ctxt->input == NULL))
10873 return(-1);
10875 xmlDetectSAX2(ctxt);
10877 GROW;
10880 * SAX: beginning of the document processing.
10882 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10883 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10886 * Get the 4 first bytes and decode the charset
10887 * if enc != XML_CHAR_ENCODING_NONE
10888 * plug some encoding conversion routines.
10890 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10891 start[0] = RAW;
10892 start[1] = NXT(1);
10893 start[2] = NXT(2);
10894 start[3] = NXT(3);
10895 enc = xmlDetectCharEncoding(start, 4);
10896 if (enc != XML_CHAR_ENCODING_NONE) {
10897 xmlSwitchEncoding(ctxt, enc);
10902 if (CUR == 0) {
10903 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10907 * Check for the XMLDecl in the Prolog.
10909 GROW;
10910 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10913 * Note that we will switch encoding on the fly.
10915 xmlParseXMLDecl(ctxt);
10916 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10918 * The XML REC instructs us to stop parsing right here
10920 return(-1);
10922 SKIP_BLANKS;
10923 } else {
10924 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10926 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10927 ctxt->sax->startDocument(ctxt->userData);
10928 if (ctxt->instate == XML_PARSER_EOF)
10929 return(-1);
10932 * Doing validity checking on chunk doesn't make sense
10934 ctxt->instate = XML_PARSER_CONTENT;
10935 ctxt->validate = 0;
10936 ctxt->loadsubset = 0;
10937 ctxt->depth = 0;
10939 xmlParseContent(ctxt);
10940 if (ctxt->instate == XML_PARSER_EOF)
10941 return(-1);
10943 if ((RAW == '<') && (NXT(1) == '/')) {
10944 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10945 } else if (RAW != 0) {
10946 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10950 * SAX: end of the document processing.
10952 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10953 ctxt->sax->endDocument(ctxt->userData);
10955 if (! ctxt->wellFormed) return(-1);
10956 return(0);
10959 #ifdef LIBXML_PUSH_ENABLED
10960 /************************************************************************
10962 * Progressive parsing interfaces *
10964 ************************************************************************/
10967 * xmlParseLookupChar:
10968 * @ctxt: an XML parser context
10969 * @c: character
10971 * Check whether the input buffer contains a character.
10973 static int
10974 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10975 const xmlChar *cur;
10977 if (ctxt->checkIndex == 0) {
10978 cur = ctxt->input->cur + 1;
10979 } else {
10980 cur = ctxt->input->cur + ctxt->checkIndex;
10983 if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10984 size_t index = ctxt->input->end - ctxt->input->cur;
10986 if (index > LONG_MAX) {
10987 ctxt->checkIndex = 0;
10988 return(1);
10990 ctxt->checkIndex = index;
10991 return(0);
10992 } else {
10993 ctxt->checkIndex = 0;
10994 return(1);
10999 * xmlParseLookupString:
11000 * @ctxt: an XML parser context
11001 * @startDelta: delta to apply at the start
11002 * @str: string
11003 * @strLen: length of string
11005 * Check whether the input buffer contains a string.
11007 static const xmlChar *
11008 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11009 const char *str, size_t strLen) {
11010 const xmlChar *cur, *term;
11012 if (ctxt->checkIndex == 0) {
11013 cur = ctxt->input->cur + startDelta;
11014 } else {
11015 cur = ctxt->input->cur + ctxt->checkIndex;
11018 term = BAD_CAST strstr((const char *) cur, str);
11019 if (term == NULL) {
11020 const xmlChar *end = ctxt->input->end;
11021 size_t index;
11023 /* Rescan (strLen - 1) characters. */
11024 if ((size_t) (end - cur) < strLen)
11025 end = cur;
11026 else
11027 end -= strLen - 1;
11028 index = end - ctxt->input->cur;
11029 if (index > LONG_MAX) {
11030 ctxt->checkIndex = 0;
11031 return(ctxt->input->end - strLen);
11033 ctxt->checkIndex = index;
11034 } else {
11035 ctxt->checkIndex = 0;
11038 return(term);
11042 * xmlParseLookupCharData:
11043 * @ctxt: an XML parser context
11045 * Check whether the input buffer contains terminated char data.
11047 static int
11048 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11049 const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11050 const xmlChar *end = ctxt->input->end;
11051 size_t index;
11053 while (cur < end) {
11054 if ((*cur == '<') || (*cur == '&')) {
11055 ctxt->checkIndex = 0;
11056 return(1);
11058 cur++;
11061 index = cur - ctxt->input->cur;
11062 if (index > LONG_MAX) {
11063 ctxt->checkIndex = 0;
11064 return(1);
11066 ctxt->checkIndex = index;
11067 return(0);
11071 * xmlParseLookupGt:
11072 * @ctxt: an XML parser context
11074 * Check whether there's enough data in the input buffer to finish parsing
11075 * a start tag. This has to take quotes into account.
11077 static int
11078 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11079 const xmlChar *cur;
11080 const xmlChar *end = ctxt->input->end;
11081 int state = ctxt->endCheckState;
11082 size_t index;
11084 if (ctxt->checkIndex == 0)
11085 cur = ctxt->input->cur + 1;
11086 else
11087 cur = ctxt->input->cur + ctxt->checkIndex;
11089 while (cur < end) {
11090 if (state) {
11091 if (*cur == state)
11092 state = 0;
11093 } else if (*cur == '\'' || *cur == '"') {
11094 state = *cur;
11095 } else if (*cur == '>') {
11096 ctxt->checkIndex = 0;
11097 ctxt->endCheckState = 0;
11098 return(1);
11100 cur++;
11103 index = cur - ctxt->input->cur;
11104 if (index > LONG_MAX) {
11105 ctxt->checkIndex = 0;
11106 ctxt->endCheckState = 0;
11107 return(1);
11109 ctxt->checkIndex = index;
11110 ctxt->endCheckState = state;
11111 return(0);
11115 * xmlParseLookupInternalSubset:
11116 * @ctxt: an XML parser context
11118 * Check whether there's enough data in the input buffer to finish parsing
11119 * the internal subset.
11121 static int
11122 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11124 * Sorry, but progressive parsing of the internal subset is not
11125 * supported. We first check that the full content of the internal
11126 * subset is available and parsing is launched only at that point.
11127 * Internal subset ends with "']' S? '>'" in an unescaped section and
11128 * not in a ']]>' sequence which are conditional sections.
11130 const xmlChar *cur, *start;
11131 const xmlChar *end = ctxt->input->end;
11132 int state = ctxt->endCheckState;
11133 size_t index;
11135 if (ctxt->checkIndex == 0) {
11136 cur = ctxt->input->cur + 1;
11137 } else {
11138 cur = ctxt->input->cur + ctxt->checkIndex;
11140 start = cur;
11142 while (cur < end) {
11143 if (state == '-') {
11144 if ((*cur == '-') &&
11145 (cur[1] == '-') &&
11146 (cur[2] == '>')) {
11147 state = 0;
11148 cur += 3;
11149 start = cur;
11150 continue;
11153 else if (state == ']') {
11154 if (*cur == '>') {
11155 ctxt->checkIndex = 0;
11156 ctxt->endCheckState = 0;
11157 return(1);
11159 if (IS_BLANK_CH(*cur)) {
11160 state = ' ';
11161 } else if (*cur != ']') {
11162 state = 0;
11163 start = cur;
11164 continue;
11167 else if (state == ' ') {
11168 if (*cur == '>') {
11169 ctxt->checkIndex = 0;
11170 ctxt->endCheckState = 0;
11171 return(1);
11173 if (!IS_BLANK_CH(*cur)) {
11174 state = 0;
11175 start = cur;
11176 continue;
11179 else if (state != 0) {
11180 if (*cur == state) {
11181 state = 0;
11182 start = cur + 1;
11185 else if (*cur == '<') {
11186 if ((cur[1] == '!') &&
11187 (cur[2] == '-') &&
11188 (cur[3] == '-')) {
11189 state = '-';
11190 cur += 4;
11191 /* Don't treat <!--> as comment */
11192 start = cur;
11193 continue;
11196 else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11197 state = *cur;
11200 cur++;
11204 * Rescan the three last characters to detect "<!--" and "-->"
11205 * split across chunks.
11207 if ((state == 0) || (state == '-')) {
11208 if (cur - start < 3)
11209 cur = start;
11210 else
11211 cur -= 3;
11213 index = cur - ctxt->input->cur;
11214 if (index > LONG_MAX) {
11215 ctxt->checkIndex = 0;
11216 ctxt->endCheckState = 0;
11217 return(1);
11219 ctxt->checkIndex = index;
11220 ctxt->endCheckState = state;
11221 return(0);
11225 * xmlCheckCdataPush:
11226 * @cur: pointer to the block of characters
11227 * @len: length of the block in bytes
11228 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11230 * Check that the block of characters is okay as SCdata content [20]
11232 * Returns the number of bytes to pass if okay, a negative index where an
11233 * UTF-8 error occurred otherwise
11235 static int
11236 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11237 int ix;
11238 unsigned char c;
11239 int codepoint;
11241 if ((utf == NULL) || (len <= 0))
11242 return(0);
11244 for (ix = 0; ix < len;) { /* string is 0-terminated */
11245 c = utf[ix];
11246 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11247 if (c >= 0x20)
11248 ix++;
11249 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11250 ix++;
11251 else
11252 return(-ix);
11253 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11254 if (ix + 2 > len) return(complete ? -ix : ix);
11255 if ((utf[ix+1] & 0xc0 ) != 0x80)
11256 return(-ix);
11257 codepoint = (utf[ix] & 0x1f) << 6;
11258 codepoint |= utf[ix+1] & 0x3f;
11259 if (!xmlIsCharQ(codepoint))
11260 return(-ix);
11261 ix += 2;
11262 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11263 if (ix + 3 > len) return(complete ? -ix : ix);
11264 if (((utf[ix+1] & 0xc0) != 0x80) ||
11265 ((utf[ix+2] & 0xc0) != 0x80))
11266 return(-ix);
11267 codepoint = (utf[ix] & 0xf) << 12;
11268 codepoint |= (utf[ix+1] & 0x3f) << 6;
11269 codepoint |= utf[ix+2] & 0x3f;
11270 if (!xmlIsCharQ(codepoint))
11271 return(-ix);
11272 ix += 3;
11273 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11274 if (ix + 4 > len) return(complete ? -ix : ix);
11275 if (((utf[ix+1] & 0xc0) != 0x80) ||
11276 ((utf[ix+2] & 0xc0) != 0x80) ||
11277 ((utf[ix+3] & 0xc0) != 0x80))
11278 return(-ix);
11279 codepoint = (utf[ix] & 0x7) << 18;
11280 codepoint |= (utf[ix+1] & 0x3f) << 12;
11281 codepoint |= (utf[ix+2] & 0x3f) << 6;
11282 codepoint |= utf[ix+3] & 0x3f;
11283 if (!xmlIsCharQ(codepoint))
11284 return(-ix);
11285 ix += 4;
11286 } else /* unknown encoding */
11287 return(-ix);
11289 return(ix);
11293 * xmlParseTryOrFinish:
11294 * @ctxt: an XML parser context
11295 * @terminate: last chunk indicator
11297 * Try to progress on parsing
11299 * Returns zero if no parsing was possible
11301 static int
11302 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11303 int ret = 0;
11304 int tlen;
11305 size_t avail;
11306 xmlChar cur, next;
11308 if (ctxt->input == NULL)
11309 return(0);
11311 #ifdef DEBUG_PUSH
11312 switch (ctxt->instate) {
11313 case XML_PARSER_EOF:
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: try EOF\n"); break;
11316 case XML_PARSER_START:
11317 xmlGenericError(xmlGenericErrorContext,
11318 "PP: try START\n"); break;
11319 case XML_PARSER_MISC:
11320 xmlGenericError(xmlGenericErrorContext,
11321 "PP: try MISC\n");break;
11322 case XML_PARSER_COMMENT:
11323 xmlGenericError(xmlGenericErrorContext,
11324 "PP: try COMMENT\n");break;
11325 case XML_PARSER_PROLOG:
11326 xmlGenericError(xmlGenericErrorContext,
11327 "PP: try PROLOG\n");break;
11328 case XML_PARSER_START_TAG:
11329 xmlGenericError(xmlGenericErrorContext,
11330 "PP: try START_TAG\n");break;
11331 case XML_PARSER_CONTENT:
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: try CONTENT\n");break;
11334 case XML_PARSER_CDATA_SECTION:
11335 xmlGenericError(xmlGenericErrorContext,
11336 "PP: try CDATA_SECTION\n");break;
11337 case XML_PARSER_END_TAG:
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: try END_TAG\n");break;
11340 case XML_PARSER_ENTITY_DECL:
11341 xmlGenericError(xmlGenericErrorContext,
11342 "PP: try ENTITY_DECL\n");break;
11343 case XML_PARSER_ENTITY_VALUE:
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: try ENTITY_VALUE\n");break;
11346 case XML_PARSER_ATTRIBUTE_VALUE:
11347 xmlGenericError(xmlGenericErrorContext,
11348 "PP: try ATTRIBUTE_VALUE\n");break;
11349 case XML_PARSER_DTD:
11350 xmlGenericError(xmlGenericErrorContext,
11351 "PP: try DTD\n");break;
11352 case XML_PARSER_EPILOG:
11353 xmlGenericError(xmlGenericErrorContext,
11354 "PP: try EPILOG\n");break;
11355 case XML_PARSER_PI:
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: try PI\n");break;
11358 case XML_PARSER_IGNORE:
11359 xmlGenericError(xmlGenericErrorContext,
11360 "PP: try IGNORE\n");break;
11362 #endif
11364 if ((ctxt->input != NULL) &&
11365 (ctxt->input->cur - ctxt->input->base > 4096)) {
11366 xmlParserShrink(ctxt);
11369 while (ctxt->instate != XML_PARSER_EOF) {
11370 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11371 return(0);
11373 if (ctxt->input == NULL) break;
11374 if (ctxt->input->buf != NULL) {
11376 * If we are operating on converted input, try to flush
11377 * remaining chars to avoid them stalling in the non-converted
11378 * buffer.
11380 if ((ctxt->input->buf->raw != NULL) &&
11381 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11382 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11383 ctxt->input);
11384 size_t current = ctxt->input->cur - ctxt->input->base;
11386 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11387 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11388 base, current);
11391 avail = ctxt->input->end - ctxt->input->cur;
11392 if (avail < 1)
11393 goto done;
11394 switch (ctxt->instate) {
11395 case XML_PARSER_EOF:
11397 * Document parsing is done !
11399 goto done;
11400 case XML_PARSER_START:
11401 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11402 xmlChar start[4];
11403 xmlCharEncoding enc;
11406 * Very first chars read from the document flow.
11408 if (avail < 4)
11409 goto done;
11412 * Get the 4 first bytes and decode the charset
11413 * if enc != XML_CHAR_ENCODING_NONE
11414 * plug some encoding conversion routines,
11415 * else xmlSwitchEncoding will set to (default)
11416 * UTF8.
11418 start[0] = RAW;
11419 start[1] = NXT(1);
11420 start[2] = NXT(2);
11421 start[3] = NXT(3);
11422 enc = xmlDetectCharEncoding(start, 4);
11424 * We need more bytes to detect EBCDIC code pages.
11425 * See xmlDetectEBCDIC.
11427 if ((enc == XML_CHAR_ENCODING_EBCDIC) &&
11428 (!terminate) && (avail < 200))
11429 goto done;
11430 xmlSwitchEncoding(ctxt, enc);
11431 break;
11434 if (avail < 2)
11435 goto done;
11436 cur = ctxt->input->cur[0];
11437 next = ctxt->input->cur[1];
11438 if (cur == 0) {
11439 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11440 ctxt->sax->setDocumentLocator(ctxt->userData,
11441 &xmlDefaultSAXLocator);
11442 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11443 xmlHaltParser(ctxt);
11444 #ifdef DEBUG_PUSH
11445 xmlGenericError(xmlGenericErrorContext,
11446 "PP: entering EOF\n");
11447 #endif
11448 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11449 ctxt->sax->endDocument(ctxt->userData);
11450 goto done;
11452 if ((cur == '<') && (next == '?')) {
11453 /* PI or XML decl */
11454 if (avail < 5) goto done;
11455 if ((!terminate) &&
11456 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11457 goto done;
11458 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11459 ctxt->sax->setDocumentLocator(ctxt->userData,
11460 &xmlDefaultSAXLocator);
11461 if ((ctxt->input->cur[2] == 'x') &&
11462 (ctxt->input->cur[3] == 'm') &&
11463 (ctxt->input->cur[4] == 'l') &&
11464 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11465 ret += 5;
11466 #ifdef DEBUG_PUSH
11467 xmlGenericError(xmlGenericErrorContext,
11468 "PP: Parsing XML Decl\n");
11469 #endif
11470 xmlParseXMLDecl(ctxt);
11471 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11473 * The XML REC instructs us to stop parsing right
11474 * here
11476 xmlHaltParser(ctxt);
11477 return(0);
11479 ctxt->standalone = ctxt->input->standalone;
11480 if ((ctxt->encoding == NULL) &&
11481 (ctxt->input->encoding != NULL))
11482 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11483 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11484 (!ctxt->disableSAX))
11485 ctxt->sax->startDocument(ctxt->userData);
11486 ctxt->instate = XML_PARSER_MISC;
11487 #ifdef DEBUG_PUSH
11488 xmlGenericError(xmlGenericErrorContext,
11489 "PP: entering MISC\n");
11490 #endif
11491 } else {
11492 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11493 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11494 (!ctxt->disableSAX))
11495 ctxt->sax->startDocument(ctxt->userData);
11496 ctxt->instate = XML_PARSER_MISC;
11497 #ifdef DEBUG_PUSH
11498 xmlGenericError(xmlGenericErrorContext,
11499 "PP: entering MISC\n");
11500 #endif
11502 } else {
11503 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11504 ctxt->sax->setDocumentLocator(ctxt->userData,
11505 &xmlDefaultSAXLocator);
11506 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11507 if (ctxt->version == NULL) {
11508 xmlErrMemory(ctxt, NULL);
11509 break;
11511 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11512 (!ctxt->disableSAX))
11513 ctxt->sax->startDocument(ctxt->userData);
11514 ctxt->instate = XML_PARSER_MISC;
11515 #ifdef DEBUG_PUSH
11516 xmlGenericError(xmlGenericErrorContext,
11517 "PP: entering MISC\n");
11518 #endif
11520 break;
11521 case XML_PARSER_START_TAG: {
11522 const xmlChar *name;
11523 const xmlChar *prefix = NULL;
11524 const xmlChar *URI = NULL;
11525 int line = ctxt->input->line;
11526 int nsNr = ctxt->nsNr;
11528 if ((avail < 2) && (ctxt->inputNr == 1))
11529 goto done;
11530 cur = ctxt->input->cur[0];
11531 if (cur != '<') {
11532 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11533 xmlHaltParser(ctxt);
11534 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11535 ctxt->sax->endDocument(ctxt->userData);
11536 goto done;
11538 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11539 goto done;
11540 if (ctxt->spaceNr == 0)
11541 spacePush(ctxt, -1);
11542 else if (*ctxt->space == -2)
11543 spacePush(ctxt, -1);
11544 else
11545 spacePush(ctxt, *ctxt->space);
11546 #ifdef LIBXML_SAX1_ENABLED
11547 if (ctxt->sax2)
11548 #endif /* LIBXML_SAX1_ENABLED */
11549 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11550 #ifdef LIBXML_SAX1_ENABLED
11551 else
11552 name = xmlParseStartTag(ctxt);
11553 #endif /* LIBXML_SAX1_ENABLED */
11554 if (ctxt->instate == XML_PARSER_EOF)
11555 goto done;
11556 if (name == NULL) {
11557 spacePop(ctxt);
11558 xmlHaltParser(ctxt);
11559 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11560 ctxt->sax->endDocument(ctxt->userData);
11561 goto done;
11563 #ifdef LIBXML_VALID_ENABLED
11565 * [ VC: Root Element Type ]
11566 * The Name in the document type declaration must match
11567 * the element type of the root element.
11569 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11570 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11571 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11572 #endif /* LIBXML_VALID_ENABLED */
11575 * Check for an Empty Element.
11577 if ((RAW == '/') && (NXT(1) == '>')) {
11578 SKIP(2);
11580 if (ctxt->sax2) {
11581 if ((ctxt->sax != NULL) &&
11582 (ctxt->sax->endElementNs != NULL) &&
11583 (!ctxt->disableSAX))
11584 ctxt->sax->endElementNs(ctxt->userData, name,
11585 prefix, URI);
11586 if (ctxt->nsNr - nsNr > 0)
11587 nsPop(ctxt, ctxt->nsNr - nsNr);
11588 #ifdef LIBXML_SAX1_ENABLED
11589 } else {
11590 if ((ctxt->sax != NULL) &&
11591 (ctxt->sax->endElement != NULL) &&
11592 (!ctxt->disableSAX))
11593 ctxt->sax->endElement(ctxt->userData, name);
11594 #endif /* LIBXML_SAX1_ENABLED */
11596 if (ctxt->instate == XML_PARSER_EOF)
11597 goto done;
11598 spacePop(ctxt);
11599 if (ctxt->nameNr == 0) {
11600 ctxt->instate = XML_PARSER_EPILOG;
11601 } else {
11602 ctxt->instate = XML_PARSER_CONTENT;
11604 break;
11606 if (RAW == '>') {
11607 NEXT;
11608 } else {
11609 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11610 "Couldn't find end of Start Tag %s\n",
11611 name);
11612 nodePop(ctxt);
11613 spacePop(ctxt);
11615 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11617 ctxt->instate = XML_PARSER_CONTENT;
11618 break;
11620 case XML_PARSER_CONTENT: {
11621 if ((avail < 2) && (ctxt->inputNr == 1))
11622 goto done;
11623 cur = ctxt->input->cur[0];
11624 next = ctxt->input->cur[1];
11626 if ((cur == '<') && (next == '/')) {
11627 ctxt->instate = XML_PARSER_END_TAG;
11628 break;
11629 } else if ((cur == '<') && (next == '?')) {
11630 if ((!terminate) &&
11631 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11632 goto done;
11633 xmlParsePI(ctxt);
11634 ctxt->instate = XML_PARSER_CONTENT;
11635 } else if ((cur == '<') && (next != '!')) {
11636 ctxt->instate = XML_PARSER_START_TAG;
11637 break;
11638 } else if ((cur == '<') && (next == '!') &&
11639 (ctxt->input->cur[2] == '-') &&
11640 (ctxt->input->cur[3] == '-')) {
11641 if ((!terminate) &&
11642 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11643 goto done;
11644 xmlParseComment(ctxt);
11645 ctxt->instate = XML_PARSER_CONTENT;
11646 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11647 (ctxt->input->cur[2] == '[') &&
11648 (ctxt->input->cur[3] == 'C') &&
11649 (ctxt->input->cur[4] == 'D') &&
11650 (ctxt->input->cur[5] == 'A') &&
11651 (ctxt->input->cur[6] == 'T') &&
11652 (ctxt->input->cur[7] == 'A') &&
11653 (ctxt->input->cur[8] == '[')) {
11654 SKIP(9);
11655 ctxt->instate = XML_PARSER_CDATA_SECTION;
11656 break;
11657 } else if ((cur == '<') && (next == '!') &&
11658 (avail < 9)) {
11659 goto done;
11660 } else if (cur == '<') {
11661 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11662 "detected an error in element content\n");
11663 SKIP(1);
11664 } else if (cur == '&') {
11665 if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11666 goto done;
11667 xmlParseReference(ctxt);
11668 } else {
11669 /* TODO Avoid the extra copy, handle directly !!! */
11671 * Goal of the following test is:
11672 * - minimize calls to the SAX 'character' callback
11673 * when they are mergeable
11674 * - handle an problem for isBlank when we only parse
11675 * a sequence of blank chars and the next one is
11676 * not available to check against '<' presence.
11677 * - tries to homogenize the differences in SAX
11678 * callbacks between the push and pull versions
11679 * of the parser.
11681 if ((ctxt->inputNr == 1) &&
11682 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11683 if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11684 goto done;
11686 ctxt->checkIndex = 0;
11687 xmlParseCharDataInternal(ctxt, !terminate);
11689 break;
11691 case XML_PARSER_END_TAG:
11692 if (avail < 2)
11693 goto done;
11694 if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11695 goto done;
11696 if (ctxt->sax2) {
11697 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11698 nameNsPop(ctxt);
11700 #ifdef LIBXML_SAX1_ENABLED
11701 else
11702 xmlParseEndTag1(ctxt, 0);
11703 #endif /* LIBXML_SAX1_ENABLED */
11704 if (ctxt->instate == XML_PARSER_EOF) {
11705 /* Nothing */
11706 } else if (ctxt->nameNr == 0) {
11707 ctxt->instate = XML_PARSER_EPILOG;
11708 } else {
11709 ctxt->instate = XML_PARSER_CONTENT;
11711 break;
11712 case XML_PARSER_CDATA_SECTION: {
11714 * The Push mode need to have the SAX callback for
11715 * cdataBlock merge back contiguous callbacks.
11717 const xmlChar *term;
11719 if (terminate) {
11721 * Don't call xmlParseLookupString. If 'terminate'
11722 * is set, checkIndex is invalid.
11724 term = BAD_CAST strstr((const char *) ctxt->input->cur,
11725 "]]>");
11726 } else {
11727 term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11730 if (term == NULL) {
11731 int tmp, size;
11733 if (terminate) {
11734 /* Unfinished CDATA section */
11735 size = ctxt->input->end - ctxt->input->cur;
11736 } else {
11737 if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11738 goto done;
11739 ctxt->checkIndex = 0;
11740 /* XXX: Why don't we pass the full buffer? */
11741 size = XML_PARSER_BIG_BUFFER_SIZE;
11743 tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11744 if (tmp <= 0) {
11745 tmp = -tmp;
11746 ctxt->input->cur += tmp;
11747 goto encoding_error;
11749 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11750 if (ctxt->sax->cdataBlock != NULL)
11751 ctxt->sax->cdataBlock(ctxt->userData,
11752 ctxt->input->cur, tmp);
11753 else if (ctxt->sax->characters != NULL)
11754 ctxt->sax->characters(ctxt->userData,
11755 ctxt->input->cur, tmp);
11757 if (ctxt->instate == XML_PARSER_EOF)
11758 goto done;
11759 SKIPL(tmp);
11760 } else {
11761 int base = term - CUR_PTR;
11762 int tmp;
11764 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11765 if ((tmp < 0) || (tmp != base)) {
11766 tmp = -tmp;
11767 ctxt->input->cur += tmp;
11768 goto encoding_error;
11770 if ((ctxt->sax != NULL) && (base == 0) &&
11771 (ctxt->sax->cdataBlock != NULL) &&
11772 (!ctxt->disableSAX)) {
11774 * Special case to provide identical behaviour
11775 * between pull and push parsers on enpty CDATA
11776 * sections
11778 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11779 (!strncmp((const char *)&ctxt->input->cur[-9],
11780 "<![CDATA[", 9)))
11781 ctxt->sax->cdataBlock(ctxt->userData,
11782 BAD_CAST "", 0);
11783 } else if ((ctxt->sax != NULL) && (base > 0) &&
11784 (!ctxt->disableSAX)) {
11785 if (ctxt->sax->cdataBlock != NULL)
11786 ctxt->sax->cdataBlock(ctxt->userData,
11787 ctxt->input->cur, base);
11788 else if (ctxt->sax->characters != NULL)
11789 ctxt->sax->characters(ctxt->userData,
11790 ctxt->input->cur, base);
11792 if (ctxt->instate == XML_PARSER_EOF)
11793 goto done;
11794 SKIPL(base + 3);
11795 ctxt->instate = XML_PARSER_CONTENT;
11796 #ifdef DEBUG_PUSH
11797 xmlGenericError(xmlGenericErrorContext,
11798 "PP: entering CONTENT\n");
11799 #endif
11801 break;
11803 case XML_PARSER_MISC:
11804 case XML_PARSER_PROLOG:
11805 case XML_PARSER_EPILOG:
11806 SKIP_BLANKS;
11807 avail = ctxt->input->end - ctxt->input->cur;
11808 if (avail < 2)
11809 goto done;
11810 cur = ctxt->input->cur[0];
11811 next = ctxt->input->cur[1];
11812 if ((cur == '<') && (next == '?')) {
11813 if ((!terminate) &&
11814 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11815 goto done;
11816 #ifdef DEBUG_PUSH
11817 xmlGenericError(xmlGenericErrorContext,
11818 "PP: Parsing PI\n");
11819 #endif
11820 xmlParsePI(ctxt);
11821 if (ctxt->instate == XML_PARSER_EOF)
11822 goto done;
11823 } else if ((cur == '<') && (next == '!') &&
11824 (ctxt->input->cur[2] == '-') &&
11825 (ctxt->input->cur[3] == '-')) {
11826 if ((!terminate) &&
11827 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11828 goto done;
11829 #ifdef DEBUG_PUSH
11830 xmlGenericError(xmlGenericErrorContext,
11831 "PP: Parsing Comment\n");
11832 #endif
11833 xmlParseComment(ctxt);
11834 if (ctxt->instate == XML_PARSER_EOF)
11835 goto done;
11836 } else if ((ctxt->instate == XML_PARSER_MISC) &&
11837 (cur == '<') && (next == '!') &&
11838 (ctxt->input->cur[2] == 'D') &&
11839 (ctxt->input->cur[3] == 'O') &&
11840 (ctxt->input->cur[4] == 'C') &&
11841 (ctxt->input->cur[5] == 'T') &&
11842 (ctxt->input->cur[6] == 'Y') &&
11843 (ctxt->input->cur[7] == 'P') &&
11844 (ctxt->input->cur[8] == 'E')) {
11845 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11846 goto done;
11847 #ifdef DEBUG_PUSH
11848 xmlGenericError(xmlGenericErrorContext,
11849 "PP: Parsing internal subset\n");
11850 #endif
11851 ctxt->inSubset = 1;
11852 xmlParseDocTypeDecl(ctxt);
11853 if (ctxt->instate == XML_PARSER_EOF)
11854 goto done;
11855 if (RAW == '[') {
11856 ctxt->instate = XML_PARSER_DTD;
11857 #ifdef DEBUG_PUSH
11858 xmlGenericError(xmlGenericErrorContext,
11859 "PP: entering DTD\n");
11860 #endif
11861 } else {
11863 * Create and update the external subset.
11865 ctxt->inSubset = 2;
11866 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11867 (ctxt->sax->externalSubset != NULL))
11868 ctxt->sax->externalSubset(ctxt->userData,
11869 ctxt->intSubName, ctxt->extSubSystem,
11870 ctxt->extSubURI);
11871 ctxt->inSubset = 0;
11872 xmlCleanSpecialAttr(ctxt);
11873 ctxt->instate = XML_PARSER_PROLOG;
11874 #ifdef DEBUG_PUSH
11875 xmlGenericError(xmlGenericErrorContext,
11876 "PP: entering PROLOG\n");
11877 #endif
11879 } else if ((cur == '<') && (next == '!') &&
11880 (avail <
11881 (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11882 goto done;
11883 } else if (ctxt->instate == XML_PARSER_EPILOG) {
11884 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11885 xmlHaltParser(ctxt);
11886 #ifdef DEBUG_PUSH
11887 xmlGenericError(xmlGenericErrorContext,
11888 "PP: entering EOF\n");
11889 #endif
11890 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11891 ctxt->sax->endDocument(ctxt->userData);
11892 goto done;
11893 } else {
11894 ctxt->instate = XML_PARSER_START_TAG;
11895 #ifdef DEBUG_PUSH
11896 xmlGenericError(xmlGenericErrorContext,
11897 "PP: entering START_TAG\n");
11898 #endif
11900 break;
11901 case XML_PARSER_DTD: {
11902 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11903 goto done;
11904 xmlParseInternalSubset(ctxt);
11905 if (ctxt->instate == XML_PARSER_EOF)
11906 goto done;
11907 ctxt->inSubset = 2;
11908 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909 (ctxt->sax->externalSubset != NULL))
11910 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11911 ctxt->extSubSystem, ctxt->extSubURI);
11912 ctxt->inSubset = 0;
11913 xmlCleanSpecialAttr(ctxt);
11914 if (ctxt->instate == XML_PARSER_EOF)
11915 goto done;
11916 ctxt->instate = XML_PARSER_PROLOG;
11917 #ifdef DEBUG_PUSH
11918 xmlGenericError(xmlGenericErrorContext,
11919 "PP: entering PROLOG\n");
11920 #endif
11921 break;
11923 case XML_PARSER_COMMENT:
11924 xmlGenericError(xmlGenericErrorContext,
11925 "PP: internal error, state == COMMENT\n");
11926 ctxt->instate = XML_PARSER_CONTENT;
11927 #ifdef DEBUG_PUSH
11928 xmlGenericError(xmlGenericErrorContext,
11929 "PP: entering CONTENT\n");
11930 #endif
11931 break;
11932 case XML_PARSER_IGNORE:
11933 xmlGenericError(xmlGenericErrorContext,
11934 "PP: internal error, state == IGNORE");
11935 ctxt->instate = XML_PARSER_DTD;
11936 #ifdef DEBUG_PUSH
11937 xmlGenericError(xmlGenericErrorContext,
11938 "PP: entering DTD\n");
11939 #endif
11940 break;
11941 case XML_PARSER_PI:
11942 xmlGenericError(xmlGenericErrorContext,
11943 "PP: internal error, state == PI\n");
11944 ctxt->instate = XML_PARSER_CONTENT;
11945 #ifdef DEBUG_PUSH
11946 xmlGenericError(xmlGenericErrorContext,
11947 "PP: entering CONTENT\n");
11948 #endif
11949 break;
11950 case XML_PARSER_ENTITY_DECL:
11951 xmlGenericError(xmlGenericErrorContext,
11952 "PP: internal error, state == ENTITY_DECL\n");
11953 ctxt->instate = XML_PARSER_DTD;
11954 #ifdef DEBUG_PUSH
11955 xmlGenericError(xmlGenericErrorContext,
11956 "PP: entering DTD\n");
11957 #endif
11958 break;
11959 case XML_PARSER_ENTITY_VALUE:
11960 xmlGenericError(xmlGenericErrorContext,
11961 "PP: internal error, state == ENTITY_VALUE\n");
11962 ctxt->instate = XML_PARSER_CONTENT;
11963 #ifdef DEBUG_PUSH
11964 xmlGenericError(xmlGenericErrorContext,
11965 "PP: entering DTD\n");
11966 #endif
11967 break;
11968 case XML_PARSER_ATTRIBUTE_VALUE:
11969 xmlGenericError(xmlGenericErrorContext,
11970 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11971 ctxt->instate = XML_PARSER_START_TAG;
11972 #ifdef DEBUG_PUSH
11973 xmlGenericError(xmlGenericErrorContext,
11974 "PP: entering START_TAG\n");
11975 #endif
11976 break;
11977 case XML_PARSER_SYSTEM_LITERAL:
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: internal error, state == SYSTEM_LITERAL\n");
11980 ctxt->instate = XML_PARSER_START_TAG;
11981 #ifdef DEBUG_PUSH
11982 xmlGenericError(xmlGenericErrorContext,
11983 "PP: entering START_TAG\n");
11984 #endif
11985 break;
11986 case XML_PARSER_PUBLIC_LITERAL:
11987 xmlGenericError(xmlGenericErrorContext,
11988 "PP: internal error, state == PUBLIC_LITERAL\n");
11989 ctxt->instate = XML_PARSER_START_TAG;
11990 #ifdef DEBUG_PUSH
11991 xmlGenericError(xmlGenericErrorContext,
11992 "PP: entering START_TAG\n");
11993 #endif
11994 break;
11997 done:
11998 #ifdef DEBUG_PUSH
11999 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12000 #endif
12001 return(ret);
12002 encoding_error:
12003 if (ctxt->input->end - ctxt->input->cur < 4) {
12004 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12005 "Input is not proper UTF-8, indicate encoding !\n",
12006 NULL, NULL);
12007 } else {
12008 char buffer[150];
12010 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12011 ctxt->input->cur[0], ctxt->input->cur[1],
12012 ctxt->input->cur[2], ctxt->input->cur[3]);
12013 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12014 "Input is not proper UTF-8, indicate encoding !\n%s",
12015 BAD_CAST buffer, NULL);
12017 return(0);
12021 * xmlParseChunk:
12022 * @ctxt: an XML parser context
12023 * @chunk: an char array
12024 * @size: the size in byte of the chunk
12025 * @terminate: last chunk indicator
12027 * Parse a Chunk of memory
12029 * Returns zero if no error, the xmlParserErrors otherwise.
12032 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12033 int terminate) {
12034 int end_in_lf = 0;
12036 if (ctxt == NULL)
12037 return(XML_ERR_INTERNAL_ERROR);
12038 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12039 return(ctxt->errNo);
12040 if (ctxt->instate == XML_PARSER_EOF)
12041 return(-1);
12042 if (ctxt->input == NULL)
12043 return(-1);
12045 ctxt->progressive = 1;
12046 if (ctxt->instate == XML_PARSER_START)
12047 xmlDetectSAX2(ctxt);
12048 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12049 (chunk[size - 1] == '\r')) {
12050 end_in_lf = 1;
12051 size--;
12054 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12055 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12056 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12057 size_t cur = ctxt->input->cur - ctxt->input->base;
12058 int res;
12060 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12061 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12062 if (res < 0) {
12063 ctxt->errNo = XML_PARSER_EOF;
12064 xmlHaltParser(ctxt);
12065 return (XML_PARSER_EOF);
12067 #ifdef DEBUG_PUSH
12068 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12069 #endif
12071 } else if (ctxt->instate != XML_PARSER_EOF) {
12072 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12073 xmlParserInputBufferPtr in = ctxt->input->buf;
12074 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12075 (in->raw != NULL)) {
12076 int nbchars;
12077 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12078 size_t current = ctxt->input->cur - ctxt->input->base;
12080 nbchars = xmlCharEncInput(in, terminate);
12081 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12082 if (nbchars < 0) {
12083 /* TODO 2.6.0 */
12084 xmlGenericError(xmlGenericErrorContext,
12085 "xmlParseChunk: encoder error\n");
12086 xmlHaltParser(ctxt);
12087 return(XML_ERR_INVALID_ENCODING);
12093 xmlParseTryOrFinish(ctxt, terminate);
12094 if (ctxt->instate == XML_PARSER_EOF)
12095 return(ctxt->errNo);
12097 if ((ctxt->input != NULL) &&
12098 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12099 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12100 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12101 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12102 xmlHaltParser(ctxt);
12104 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12105 return(ctxt->errNo);
12107 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12108 (ctxt->input->buf != NULL)) {
12109 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12110 ctxt->input);
12111 size_t current = ctxt->input->cur - ctxt->input->base;
12113 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12115 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12116 base, current);
12118 if (terminate) {
12120 * Check for termination
12122 if ((ctxt->instate != XML_PARSER_EOF) &&
12123 (ctxt->instate != XML_PARSER_EPILOG)) {
12124 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12126 if ((ctxt->instate == XML_PARSER_EPILOG) &&
12127 (ctxt->input->cur < ctxt->input->end)) {
12128 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12130 if (ctxt->instate != XML_PARSER_EOF) {
12131 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12132 ctxt->sax->endDocument(ctxt->userData);
12134 ctxt->instate = XML_PARSER_EOF;
12136 if (ctxt->wellFormed == 0)
12137 return((xmlParserErrors) ctxt->errNo);
12138 else
12139 return(0);
12142 /************************************************************************
12144 * I/O front end functions to the parser *
12146 ************************************************************************/
12149 * xmlCreatePushParserCtxt:
12150 * @sax: a SAX handler
12151 * @user_data: The user data returned on SAX callbacks
12152 * @chunk: a pointer to an array of chars
12153 * @size: number of chars in the array
12154 * @filename: an optional file name or URI
12156 * Create a parser context for using the XML parser in push mode.
12157 * If @buffer and @size are non-NULL, the data is used to detect
12158 * the encoding. The remaining characters will be parsed so they
12159 * don't need to be fed in again through xmlParseChunk.
12160 * To allow content encoding detection, @size should be >= 4
12161 * The value of @filename is used for fetching external entities
12162 * and error/warning reports.
12164 * Returns the new parser context or NULL
12167 xmlParserCtxtPtr
12168 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12169 const char *chunk, int size, const char *filename) {
12170 xmlParserCtxtPtr ctxt;
12171 xmlParserInputPtr inputStream;
12172 xmlParserInputBufferPtr buf;
12174 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
12175 if (buf == NULL) return(NULL);
12177 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12178 if (ctxt == NULL) {
12179 xmlErrMemory(NULL, "creating parser: out of memory\n");
12180 xmlFreeParserInputBuffer(buf);
12181 return(NULL);
12183 ctxt->dictNames = 1;
12184 if (filename == NULL) {
12185 ctxt->directory = NULL;
12186 } else {
12187 ctxt->directory = xmlParserGetDirectory(filename);
12190 inputStream = xmlNewInputStream(ctxt);
12191 if (inputStream == NULL) {
12192 xmlFreeParserCtxt(ctxt);
12193 xmlFreeParserInputBuffer(buf);
12194 return(NULL);
12197 if (filename == NULL)
12198 inputStream->filename = NULL;
12199 else {
12200 inputStream->filename = (char *)
12201 xmlCanonicPath((const xmlChar *) filename);
12202 if (inputStream->filename == NULL) {
12203 xmlFreeInputStream(inputStream);
12204 xmlFreeParserCtxt(ctxt);
12205 xmlFreeParserInputBuffer(buf);
12206 return(NULL);
12209 inputStream->buf = buf;
12210 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12211 inputPush(ctxt, inputStream);
12214 * If the caller didn't provide an initial 'chunk' for determining
12215 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12216 * that it can be automatically determined later
12218 ctxt->charset = XML_CHAR_ENCODING_NONE;
12220 if ((size != 0) && (chunk != NULL) &&
12221 (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12222 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12223 size_t cur = ctxt->input->cur - ctxt->input->base;
12225 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12227 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12228 #ifdef DEBUG_PUSH
12229 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12230 #endif
12233 return(ctxt);
12235 #endif /* LIBXML_PUSH_ENABLED */
12238 * xmlStopParser:
12239 * @ctxt: an XML parser context
12241 * Blocks further parser processing
12243 void
12244 xmlStopParser(xmlParserCtxtPtr ctxt) {
12245 if (ctxt == NULL)
12246 return;
12247 xmlHaltParser(ctxt);
12248 ctxt->errNo = XML_ERR_USER_STOP;
12252 * xmlCreateIOParserCtxt:
12253 * @sax: a SAX handler
12254 * @user_data: The user data returned on SAX callbacks
12255 * @ioread: an I/O read function
12256 * @ioclose: an I/O close function
12257 * @ioctx: an I/O handler
12258 * @enc: the charset encoding if known
12260 * Create a parser context for using the XML parser with an existing
12261 * I/O stream
12263 * Returns the new parser context or NULL
12265 xmlParserCtxtPtr
12266 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12267 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12268 void *ioctx, xmlCharEncoding enc) {
12269 xmlParserCtxtPtr ctxt;
12270 xmlParserInputPtr inputStream;
12271 xmlParserInputBufferPtr buf;
12273 if (ioread == NULL) return(NULL);
12275 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12276 if (buf == NULL) {
12277 if (ioclose != NULL)
12278 ioclose(ioctx);
12279 return (NULL);
12282 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12283 if (ctxt == NULL) {
12284 xmlFreeParserInputBuffer(buf);
12285 return(NULL);
12288 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12289 if (inputStream == NULL) {
12290 xmlFreeParserCtxt(ctxt);
12291 return(NULL);
12293 inputPush(ctxt, inputStream);
12295 return(ctxt);
12298 #ifdef LIBXML_VALID_ENABLED
12299 /************************************************************************
12301 * Front ends when parsing a DTD *
12303 ************************************************************************/
12306 * xmlIOParseDTD:
12307 * @sax: the SAX handler block or NULL
12308 * @input: an Input Buffer
12309 * @enc: the charset encoding if known
12311 * Load and parse a DTD
12313 * Returns the resulting xmlDtdPtr or NULL in case of error.
12314 * @input will be freed by the function in any case.
12317 xmlDtdPtr
12318 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12319 xmlCharEncoding enc) {
12320 xmlDtdPtr ret = NULL;
12321 xmlParserCtxtPtr ctxt;
12322 xmlParserInputPtr pinput = NULL;
12323 xmlChar start[4];
12325 if (input == NULL)
12326 return(NULL);
12328 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12329 if (ctxt == NULL) {
12330 xmlFreeParserInputBuffer(input);
12331 return(NULL);
12334 /* We are loading a DTD */
12335 ctxt->options |= XML_PARSE_DTDLOAD;
12337 xmlDetectSAX2(ctxt);
12340 * generate a parser input from the I/O handler
12343 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12344 if (pinput == NULL) {
12345 xmlFreeParserInputBuffer(input);
12346 xmlFreeParserCtxt(ctxt);
12347 return(NULL);
12351 * plug some encoding conversion routines here.
12353 if (xmlPushInput(ctxt, pinput) < 0) {
12354 xmlFreeParserCtxt(ctxt);
12355 return(NULL);
12357 if (enc != XML_CHAR_ENCODING_NONE) {
12358 xmlSwitchEncoding(ctxt, enc);
12361 pinput->filename = NULL;
12362 pinput->line = 1;
12363 pinput->col = 1;
12364 pinput->base = ctxt->input->cur;
12365 pinput->cur = ctxt->input->cur;
12366 pinput->free = NULL;
12369 * let's parse that entity knowing it's an external subset.
12371 ctxt->inSubset = 2;
12372 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12373 if (ctxt->myDoc == NULL) {
12374 xmlErrMemory(ctxt, "New Doc failed");
12375 return(NULL);
12377 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12378 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12379 BAD_CAST "none", BAD_CAST "none");
12381 if ((enc == XML_CHAR_ENCODING_NONE) &&
12382 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12384 * Get the 4 first bytes and decode the charset
12385 * if enc != XML_CHAR_ENCODING_NONE
12386 * plug some encoding conversion routines.
12388 start[0] = RAW;
12389 start[1] = NXT(1);
12390 start[2] = NXT(2);
12391 start[3] = NXT(3);
12392 enc = xmlDetectCharEncoding(start, 4);
12393 if (enc != XML_CHAR_ENCODING_NONE) {
12394 xmlSwitchEncoding(ctxt, enc);
12398 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12400 if (ctxt->myDoc != NULL) {
12401 if (ctxt->wellFormed) {
12402 ret = ctxt->myDoc->extSubset;
12403 ctxt->myDoc->extSubset = NULL;
12404 if (ret != NULL) {
12405 xmlNodePtr tmp;
12407 ret->doc = NULL;
12408 tmp = ret->children;
12409 while (tmp != NULL) {
12410 tmp->doc = NULL;
12411 tmp = tmp->next;
12414 } else {
12415 ret = NULL;
12417 xmlFreeDoc(ctxt->myDoc);
12418 ctxt->myDoc = NULL;
12420 xmlFreeParserCtxt(ctxt);
12422 return(ret);
12426 * xmlSAXParseDTD:
12427 * @sax: the SAX handler block
12428 * @ExternalID: a NAME* containing the External ID of the DTD
12429 * @SystemID: a NAME* containing the URL to the DTD
12431 * DEPRECATED: Don't use.
12433 * Load and parse an external subset.
12435 * Returns the resulting xmlDtdPtr or NULL in case of error.
12438 xmlDtdPtr
12439 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12440 const xmlChar *SystemID) {
12441 xmlDtdPtr ret = NULL;
12442 xmlParserCtxtPtr ctxt;
12443 xmlParserInputPtr input = NULL;
12444 xmlCharEncoding enc;
12445 xmlChar* systemIdCanonic;
12447 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12449 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12450 if (ctxt == NULL) {
12451 return(NULL);
12454 /* We are loading a DTD */
12455 ctxt->options |= XML_PARSE_DTDLOAD;
12458 * Canonicalise the system ID
12460 systemIdCanonic = xmlCanonicPath(SystemID);
12461 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12462 xmlFreeParserCtxt(ctxt);
12463 return(NULL);
12467 * Ask the Entity resolver to load the damn thing
12470 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12471 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12472 systemIdCanonic);
12473 if (input == NULL) {
12474 xmlFreeParserCtxt(ctxt);
12475 if (systemIdCanonic != NULL)
12476 xmlFree(systemIdCanonic);
12477 return(NULL);
12481 * plug some encoding conversion routines here.
12483 if (xmlPushInput(ctxt, input) < 0) {
12484 xmlFreeParserCtxt(ctxt);
12485 if (systemIdCanonic != NULL)
12486 xmlFree(systemIdCanonic);
12487 return(NULL);
12489 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12490 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12491 xmlSwitchEncoding(ctxt, enc);
12494 if (input->filename == NULL)
12495 input->filename = (char *) systemIdCanonic;
12496 else
12497 xmlFree(systemIdCanonic);
12498 input->line = 1;
12499 input->col = 1;
12500 input->base = ctxt->input->cur;
12501 input->cur = ctxt->input->cur;
12502 input->free = NULL;
12505 * let's parse that entity knowing it's an external subset.
12507 ctxt->inSubset = 2;
12508 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12509 if (ctxt->myDoc == NULL) {
12510 xmlErrMemory(ctxt, "New Doc failed");
12511 xmlFreeParserCtxt(ctxt);
12512 return(NULL);
12514 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12515 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12516 ExternalID, SystemID);
12517 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12519 if (ctxt->myDoc != NULL) {
12520 if (ctxt->wellFormed) {
12521 ret = ctxt->myDoc->extSubset;
12522 ctxt->myDoc->extSubset = NULL;
12523 if (ret != NULL) {
12524 xmlNodePtr tmp;
12526 ret->doc = NULL;
12527 tmp = ret->children;
12528 while (tmp != NULL) {
12529 tmp->doc = NULL;
12530 tmp = tmp->next;
12533 } else {
12534 ret = NULL;
12536 xmlFreeDoc(ctxt->myDoc);
12537 ctxt->myDoc = NULL;
12539 xmlFreeParserCtxt(ctxt);
12541 return(ret);
12546 * xmlParseDTD:
12547 * @ExternalID: a NAME* containing the External ID of the DTD
12548 * @SystemID: a NAME* containing the URL to the DTD
12550 * Load and parse an external subset.
12552 * Returns the resulting xmlDtdPtr or NULL in case of error.
12555 xmlDtdPtr
12556 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12557 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12559 #endif /* LIBXML_VALID_ENABLED */
12561 /************************************************************************
12563 * Front ends when parsing an Entity *
12565 ************************************************************************/
12568 * xmlParseCtxtExternalEntity:
12569 * @ctx: the existing parsing context
12570 * @URL: the URL for the entity to load
12571 * @ID: the System ID for the entity to load
12572 * @lst: the return value for the set of parsed nodes
12574 * Parse an external general entity within an existing parsing context
12575 * An external general parsed entity is well-formed if it matches the
12576 * production labeled extParsedEnt.
12578 * [78] extParsedEnt ::= TextDecl? content
12580 * Returns 0 if the entity is well formed, -1 in case of args problem and
12581 * the parser error code otherwise
12585 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12586 const xmlChar *ID, xmlNodePtr *lst) {
12587 void *userData;
12589 if (ctx == NULL) return(-1);
12591 * If the user provided their own SAX callbacks, then reuse the
12592 * userData callback field, otherwise the expected setup in a
12593 * DOM builder is to have userData == ctxt
12595 if (ctx->userData == ctx)
12596 userData = NULL;
12597 else
12598 userData = ctx->userData;
12599 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12600 userData, ctx->depth + 1,
12601 URL, ID, lst);
12605 * xmlParseExternalEntityPrivate:
12606 * @doc: the document the chunk pertains to
12607 * @oldctxt: the previous parser context if available
12608 * @sax: the SAX handler block (possibly NULL)
12609 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12610 * @depth: Used for loop detection, use 0
12611 * @URL: the URL for the entity to load
12612 * @ID: the System ID for the entity to load
12613 * @list: the return value for the set of parsed nodes
12615 * Private version of xmlParseExternalEntity()
12617 * Returns 0 if the entity is well formed, -1 in case of args problem and
12618 * the parser error code otherwise
12621 static xmlParserErrors
12622 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12623 xmlSAXHandlerPtr sax,
12624 void *user_data, int depth, const xmlChar *URL,
12625 const xmlChar *ID, xmlNodePtr *list) {
12626 xmlParserCtxtPtr ctxt;
12627 xmlDocPtr newDoc;
12628 xmlNodePtr newRoot;
12629 xmlParserErrors ret = XML_ERR_OK;
12630 xmlChar start[4];
12631 xmlCharEncoding enc;
12633 if (((depth > 40) &&
12634 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12635 (depth > 100)) {
12636 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12637 "Maximum entity nesting depth exceeded");
12638 return(XML_ERR_ENTITY_LOOP);
12641 if (list != NULL)
12642 *list = NULL;
12643 if ((URL == NULL) && (ID == NULL))
12644 return(XML_ERR_INTERNAL_ERROR);
12645 if (doc == NULL)
12646 return(XML_ERR_INTERNAL_ERROR);
12648 ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12649 oldctxt);
12650 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12651 if (oldctxt != NULL) {
12652 ctxt->nbErrors = oldctxt->nbErrors;
12653 ctxt->nbWarnings = oldctxt->nbWarnings;
12655 xmlDetectSAX2(ctxt);
12657 newDoc = xmlNewDoc(BAD_CAST "1.0");
12658 if (newDoc == NULL) {
12659 xmlFreeParserCtxt(ctxt);
12660 return(XML_ERR_INTERNAL_ERROR);
12662 newDoc->properties = XML_DOC_INTERNAL;
12663 if (doc) {
12664 newDoc->intSubset = doc->intSubset;
12665 newDoc->extSubset = doc->extSubset;
12666 if (doc->dict) {
12667 newDoc->dict = doc->dict;
12668 xmlDictReference(newDoc->dict);
12670 if (doc->URL != NULL) {
12671 newDoc->URL = xmlStrdup(doc->URL);
12674 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12675 if (newRoot == NULL) {
12676 if (sax != NULL)
12677 xmlFreeParserCtxt(ctxt);
12678 newDoc->intSubset = NULL;
12679 newDoc->extSubset = NULL;
12680 xmlFreeDoc(newDoc);
12681 return(XML_ERR_INTERNAL_ERROR);
12683 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12684 nodePush(ctxt, newDoc->children);
12685 if (doc == NULL) {
12686 ctxt->myDoc = newDoc;
12687 } else {
12688 ctxt->myDoc = doc;
12689 newRoot->doc = doc;
12693 * Get the 4 first bytes and decode the charset
12694 * if enc != XML_CHAR_ENCODING_NONE
12695 * plug some encoding conversion routines.
12697 GROW;
12698 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12699 start[0] = RAW;
12700 start[1] = NXT(1);
12701 start[2] = NXT(2);
12702 start[3] = NXT(3);
12703 enc = xmlDetectCharEncoding(start, 4);
12704 if (enc != XML_CHAR_ENCODING_NONE) {
12705 xmlSwitchEncoding(ctxt, enc);
12710 * Parse a possible text declaration first
12712 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12713 xmlParseTextDecl(ctxt);
12715 * An XML-1.0 document can't reference an entity not XML-1.0
12717 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12718 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12719 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12720 "Version mismatch between document and entity\n");
12724 ctxt->instate = XML_PARSER_CONTENT;
12725 ctxt->depth = depth;
12726 if (oldctxt != NULL) {
12727 ctxt->_private = oldctxt->_private;
12728 ctxt->loadsubset = oldctxt->loadsubset;
12729 ctxt->validate = oldctxt->validate;
12730 ctxt->valid = oldctxt->valid;
12731 ctxt->replaceEntities = oldctxt->replaceEntities;
12732 if (oldctxt->validate) {
12733 ctxt->vctxt.error = oldctxt->vctxt.error;
12734 ctxt->vctxt.warning = oldctxt->vctxt.warning;
12735 ctxt->vctxt.userData = oldctxt->vctxt.userData;
12736 ctxt->vctxt.flags = oldctxt->vctxt.flags;
12738 ctxt->external = oldctxt->external;
12739 if (ctxt->dict) xmlDictFree(ctxt->dict);
12740 ctxt->dict = oldctxt->dict;
12741 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12742 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12743 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12744 ctxt->dictNames = oldctxt->dictNames;
12745 ctxt->attsDefault = oldctxt->attsDefault;
12746 ctxt->attsSpecial = oldctxt->attsSpecial;
12747 ctxt->linenumbers = oldctxt->linenumbers;
12748 ctxt->record_info = oldctxt->record_info;
12749 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12750 ctxt->node_seq.length = oldctxt->node_seq.length;
12751 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12752 } else {
12754 * Doing validity checking on chunk without context
12755 * doesn't make sense
12757 ctxt->_private = NULL;
12758 ctxt->validate = 0;
12759 ctxt->external = 2;
12760 ctxt->loadsubset = 0;
12763 xmlParseContent(ctxt);
12765 if ((RAW == '<') && (NXT(1) == '/')) {
12766 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12767 } else if (RAW != 0) {
12768 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12770 if (ctxt->node != newDoc->children) {
12771 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12774 if (!ctxt->wellFormed) {
12775 ret = (xmlParserErrors)ctxt->errNo;
12776 if (oldctxt != NULL) {
12777 oldctxt->errNo = ctxt->errNo;
12778 oldctxt->wellFormed = 0;
12779 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12781 } else {
12782 if (list != NULL) {
12783 xmlNodePtr cur;
12786 * Return the newly created nodeset after unlinking it from
12787 * they pseudo parent.
12789 cur = newDoc->children->children;
12790 *list = cur;
12791 while (cur != NULL) {
12792 cur->parent = NULL;
12793 cur = cur->next;
12795 newDoc->children->children = NULL;
12797 ret = XML_ERR_OK;
12801 * Also record the size of the entity parsed
12803 if (ctxt->input != NULL && oldctxt != NULL) {
12804 unsigned long consumed = ctxt->input->consumed;
12806 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12808 xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12809 xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12811 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12812 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12815 if (oldctxt != NULL) {
12816 ctxt->dict = NULL;
12817 ctxt->attsDefault = NULL;
12818 ctxt->attsSpecial = NULL;
12819 oldctxt->nbErrors = ctxt->nbErrors;
12820 oldctxt->nbWarnings = ctxt->nbWarnings;
12821 oldctxt->validate = ctxt->validate;
12822 oldctxt->valid = ctxt->valid;
12823 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12824 oldctxt->node_seq.length = ctxt->node_seq.length;
12825 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12827 ctxt->node_seq.maximum = 0;
12828 ctxt->node_seq.length = 0;
12829 ctxt->node_seq.buffer = NULL;
12830 xmlFreeParserCtxt(ctxt);
12831 newDoc->intSubset = NULL;
12832 newDoc->extSubset = NULL;
12833 xmlFreeDoc(newDoc);
12835 return(ret);
12838 #ifdef LIBXML_SAX1_ENABLED
12840 * xmlParseExternalEntity:
12841 * @doc: the document the chunk pertains to
12842 * @sax: the SAX handler block (possibly NULL)
12843 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12844 * @depth: Used for loop detection, use 0
12845 * @URL: the URL for the entity to load
12846 * @ID: the System ID for the entity to load
12847 * @lst: the return value for the set of parsed nodes
12849 * Parse an external general entity
12850 * An external general parsed entity is well-formed if it matches the
12851 * production labeled extParsedEnt.
12853 * [78] extParsedEnt ::= TextDecl? content
12855 * Returns 0 if the entity is well formed, -1 in case of args problem and
12856 * the parser error code otherwise
12860 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12861 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12862 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12863 ID, lst));
12867 * xmlParseBalancedChunkMemory:
12868 * @doc: the document the chunk pertains to (must not be NULL)
12869 * @sax: the SAX handler block (possibly NULL)
12870 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12871 * @depth: Used for loop detection, use 0
12872 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12873 * @lst: the return value for the set of parsed nodes
12875 * Parse a well-balanced chunk of an XML document
12876 * called by the parser
12877 * The allowed sequence for the Well Balanced Chunk is the one defined by
12878 * the content production in the XML grammar:
12880 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12882 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12883 * the parser error code otherwise
12887 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12888 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12889 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12890 depth, string, lst, 0 );
12892 #endif /* LIBXML_SAX1_ENABLED */
12895 * xmlParseBalancedChunkMemoryInternal:
12896 * @oldctxt: the existing parsing context
12897 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12898 * @user_data: the user data field for the parser context
12899 * @lst: the return value for the set of parsed nodes
12902 * Parse a well-balanced chunk of an XML document
12903 * called by the parser
12904 * The allowed sequence for the Well Balanced Chunk is the one defined by
12905 * the content production in the XML grammar:
12907 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12909 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12910 * error code otherwise
12912 * In case recover is set to 1, the nodelist will not be empty even if
12913 * the parsed chunk is not well balanced.
12915 static xmlParserErrors
12916 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12917 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12918 xmlParserCtxtPtr ctxt;
12919 xmlDocPtr newDoc = NULL;
12920 xmlNodePtr newRoot;
12921 xmlSAXHandlerPtr oldsax = NULL;
12922 xmlNodePtr content = NULL;
12923 xmlNodePtr last = NULL;
12924 int size;
12925 xmlParserErrors ret = XML_ERR_OK;
12926 #ifdef SAX2
12927 int i;
12928 #endif
12930 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12931 (oldctxt->depth > 100)) {
12932 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12933 "Maximum entity nesting depth exceeded");
12934 return(XML_ERR_ENTITY_LOOP);
12938 if (lst != NULL)
12939 *lst = NULL;
12940 if (string == NULL)
12941 return(XML_ERR_INTERNAL_ERROR);
12943 size = xmlStrlen(string);
12945 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12946 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12947 ctxt->nbErrors = oldctxt->nbErrors;
12948 ctxt->nbWarnings = oldctxt->nbWarnings;
12949 if (user_data != NULL)
12950 ctxt->userData = user_data;
12951 else
12952 ctxt->userData = ctxt;
12953 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12954 ctxt->dict = oldctxt->dict;
12955 ctxt->input_id = oldctxt->input_id;
12956 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12957 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12958 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12960 #ifdef SAX2
12961 /* propagate namespaces down the entity */
12962 for (i = 0;i < oldctxt->nsNr;i += 2) {
12963 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12965 #endif
12967 oldsax = ctxt->sax;
12968 ctxt->sax = oldctxt->sax;
12969 xmlDetectSAX2(ctxt);
12970 ctxt->replaceEntities = oldctxt->replaceEntities;
12971 ctxt->options = oldctxt->options;
12973 ctxt->_private = oldctxt->_private;
12974 if (oldctxt->myDoc == NULL) {
12975 newDoc = xmlNewDoc(BAD_CAST "1.0");
12976 if (newDoc == NULL) {
12977 ctxt->sax = oldsax;
12978 ctxt->dict = NULL;
12979 xmlFreeParserCtxt(ctxt);
12980 return(XML_ERR_INTERNAL_ERROR);
12982 newDoc->properties = XML_DOC_INTERNAL;
12983 newDoc->dict = ctxt->dict;
12984 xmlDictReference(newDoc->dict);
12985 ctxt->myDoc = newDoc;
12986 } else {
12987 ctxt->myDoc = oldctxt->myDoc;
12988 content = ctxt->myDoc->children;
12989 last = ctxt->myDoc->last;
12991 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12992 if (newRoot == NULL) {
12993 ctxt->sax = oldsax;
12994 ctxt->dict = NULL;
12995 xmlFreeParserCtxt(ctxt);
12996 if (newDoc != NULL) {
12997 xmlFreeDoc(newDoc);
12999 return(XML_ERR_INTERNAL_ERROR);
13001 ctxt->myDoc->children = NULL;
13002 ctxt->myDoc->last = NULL;
13003 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13004 nodePush(ctxt, ctxt->myDoc->children);
13005 ctxt->instate = XML_PARSER_CONTENT;
13006 ctxt->depth = oldctxt->depth;
13008 ctxt->validate = 0;
13009 ctxt->loadsubset = oldctxt->loadsubset;
13010 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13012 * ID/IDREF registration will be done in xmlValidateElement below
13014 ctxt->loadsubset |= XML_SKIP_IDS;
13016 ctxt->dictNames = oldctxt->dictNames;
13017 ctxt->attsDefault = oldctxt->attsDefault;
13018 ctxt->attsSpecial = oldctxt->attsSpecial;
13020 xmlParseContent(ctxt);
13021 if ((RAW == '<') && (NXT(1) == '/')) {
13022 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13023 } else if (RAW != 0) {
13024 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13026 if (ctxt->node != ctxt->myDoc->children) {
13027 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13030 if (!ctxt->wellFormed) {
13031 ret = (xmlParserErrors)ctxt->errNo;
13032 oldctxt->errNo = ctxt->errNo;
13033 oldctxt->wellFormed = 0;
13034 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13035 } else {
13036 ret = XML_ERR_OK;
13039 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13040 xmlNodePtr cur;
13043 * Return the newly created nodeset after unlinking it from
13044 * they pseudo parent.
13046 cur = ctxt->myDoc->children->children;
13047 *lst = cur;
13048 while (cur != NULL) {
13049 #ifdef LIBXML_VALID_ENABLED
13050 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13051 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13052 (cur->type == XML_ELEMENT_NODE)) {
13053 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13054 oldctxt->myDoc, cur);
13056 #endif /* LIBXML_VALID_ENABLED */
13057 cur->parent = NULL;
13058 cur = cur->next;
13060 ctxt->myDoc->children->children = NULL;
13062 if (ctxt->myDoc != NULL) {
13063 xmlFreeNode(ctxt->myDoc->children);
13064 ctxt->myDoc->children = content;
13065 ctxt->myDoc->last = last;
13069 * Also record the size of the entity parsed
13071 if (ctxt->input != NULL && oldctxt != NULL) {
13072 unsigned long consumed = ctxt->input->consumed;
13074 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13076 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13077 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13080 oldctxt->nbErrors = ctxt->nbErrors;
13081 oldctxt->nbWarnings = ctxt->nbWarnings;
13082 ctxt->sax = oldsax;
13083 ctxt->dict = NULL;
13084 ctxt->attsDefault = NULL;
13085 ctxt->attsSpecial = NULL;
13086 xmlFreeParserCtxt(ctxt);
13087 if (newDoc != NULL) {
13088 xmlFreeDoc(newDoc);
13091 return(ret);
13095 * xmlParseInNodeContext:
13096 * @node: the context node
13097 * @data: the input string
13098 * @datalen: the input string length in bytes
13099 * @options: a combination of xmlParserOption
13100 * @lst: the return value for the set of parsed nodes
13102 * Parse a well-balanced chunk of an XML document
13103 * within the context (DTD, namespaces, etc ...) of the given node.
13105 * The allowed sequence for the data is a Well Balanced Chunk defined by
13106 * the content production in the XML grammar:
13108 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13110 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13111 * error code otherwise
13113 xmlParserErrors
13114 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13115 int options, xmlNodePtr *lst) {
13116 #ifdef SAX2
13117 xmlParserCtxtPtr ctxt;
13118 xmlDocPtr doc = NULL;
13119 xmlNodePtr fake, cur;
13120 int nsnr = 0;
13122 xmlParserErrors ret = XML_ERR_OK;
13125 * check all input parameters, grab the document
13127 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13128 return(XML_ERR_INTERNAL_ERROR);
13129 switch (node->type) {
13130 case XML_ELEMENT_NODE:
13131 case XML_ATTRIBUTE_NODE:
13132 case XML_TEXT_NODE:
13133 case XML_CDATA_SECTION_NODE:
13134 case XML_ENTITY_REF_NODE:
13135 case XML_PI_NODE:
13136 case XML_COMMENT_NODE:
13137 case XML_DOCUMENT_NODE:
13138 case XML_HTML_DOCUMENT_NODE:
13139 break;
13140 default:
13141 return(XML_ERR_INTERNAL_ERROR);
13144 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13145 (node->type != XML_DOCUMENT_NODE) &&
13146 (node->type != XML_HTML_DOCUMENT_NODE))
13147 node = node->parent;
13148 if (node == NULL)
13149 return(XML_ERR_INTERNAL_ERROR);
13150 if (node->type == XML_ELEMENT_NODE)
13151 doc = node->doc;
13152 else
13153 doc = (xmlDocPtr) node;
13154 if (doc == NULL)
13155 return(XML_ERR_INTERNAL_ERROR);
13158 * allocate a context and set-up everything not related to the
13159 * node position in the tree
13161 if (doc->type == XML_DOCUMENT_NODE)
13162 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13163 #ifdef LIBXML_HTML_ENABLED
13164 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13165 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13167 * When parsing in context, it makes no sense to add implied
13168 * elements like html/body/etc...
13170 options |= HTML_PARSE_NOIMPLIED;
13172 #endif
13173 else
13174 return(XML_ERR_INTERNAL_ERROR);
13176 if (ctxt == NULL)
13177 return(XML_ERR_NO_MEMORY);
13180 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13181 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13182 * we must wait until the last moment to free the original one.
13184 if (doc->dict != NULL) {
13185 if (ctxt->dict != NULL)
13186 xmlDictFree(ctxt->dict);
13187 ctxt->dict = doc->dict;
13188 } else
13189 options |= XML_PARSE_NODICT;
13191 if (doc->encoding != NULL) {
13192 xmlCharEncodingHandlerPtr hdlr;
13194 if (ctxt->encoding != NULL)
13195 xmlFree((xmlChar *) ctxt->encoding);
13196 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13198 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13199 if (hdlr != NULL) {
13200 xmlSwitchToEncoding(ctxt, hdlr);
13201 } else {
13202 return(XML_ERR_UNSUPPORTED_ENCODING);
13206 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13207 xmlDetectSAX2(ctxt);
13208 ctxt->myDoc = doc;
13209 /* parsing in context, i.e. as within existing content */
13210 ctxt->input_id = 2;
13211 ctxt->instate = XML_PARSER_CONTENT;
13213 fake = xmlNewDocComment(node->doc, NULL);
13214 if (fake == NULL) {
13215 xmlFreeParserCtxt(ctxt);
13216 return(XML_ERR_NO_MEMORY);
13218 xmlAddChild(node, fake);
13220 if (node->type == XML_ELEMENT_NODE) {
13221 nodePush(ctxt, node);
13223 * initialize the SAX2 namespaces stack
13225 cur = node;
13226 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13227 xmlNsPtr ns = cur->nsDef;
13228 const xmlChar *iprefix, *ihref;
13230 while (ns != NULL) {
13231 if (ctxt->dict) {
13232 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13233 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13234 } else {
13235 iprefix = ns->prefix;
13236 ihref = ns->href;
13239 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13240 nsPush(ctxt, iprefix, ihref);
13241 nsnr++;
13243 ns = ns->next;
13245 cur = cur->parent;
13249 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13251 * ID/IDREF registration will be done in xmlValidateElement below
13253 ctxt->loadsubset |= XML_SKIP_IDS;
13256 #ifdef LIBXML_HTML_ENABLED
13257 if (doc->type == XML_HTML_DOCUMENT_NODE)
13258 __htmlParseContent(ctxt);
13259 else
13260 #endif
13261 xmlParseContent(ctxt);
13263 nsPop(ctxt, nsnr);
13264 if ((RAW == '<') && (NXT(1) == '/')) {
13265 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13266 } else if (RAW != 0) {
13267 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13269 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13270 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13271 ctxt->wellFormed = 0;
13274 if (!ctxt->wellFormed) {
13275 if (ctxt->errNo == 0)
13276 ret = XML_ERR_INTERNAL_ERROR;
13277 else
13278 ret = (xmlParserErrors)ctxt->errNo;
13279 } else {
13280 ret = XML_ERR_OK;
13284 * Return the newly created nodeset after unlinking it from
13285 * the pseudo sibling.
13288 cur = fake->next;
13289 fake->next = NULL;
13290 node->last = fake;
13292 if (cur != NULL) {
13293 cur->prev = NULL;
13296 *lst = cur;
13298 while (cur != NULL) {
13299 cur->parent = NULL;
13300 cur = cur->next;
13303 xmlUnlinkNode(fake);
13304 xmlFreeNode(fake);
13307 if (ret != XML_ERR_OK) {
13308 xmlFreeNodeList(*lst);
13309 *lst = NULL;
13312 if (doc->dict != NULL)
13313 ctxt->dict = NULL;
13314 xmlFreeParserCtxt(ctxt);
13316 return(ret);
13317 #else /* !SAX2 */
13318 return(XML_ERR_INTERNAL_ERROR);
13319 #endif
13322 #ifdef LIBXML_SAX1_ENABLED
13324 * xmlParseBalancedChunkMemoryRecover:
13325 * @doc: the document the chunk pertains to (must not be NULL)
13326 * @sax: the SAX handler block (possibly NULL)
13327 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13328 * @depth: Used for loop detection, use 0
13329 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13330 * @lst: the return value for the set of parsed nodes
13331 * @recover: return nodes even if the data is broken (use 0)
13334 * Parse a well-balanced chunk of an XML document
13335 * called by the parser
13336 * The allowed sequence for the Well Balanced Chunk is the one defined by
13337 * the content production in the XML grammar:
13339 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13341 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13342 * the parser error code otherwise
13344 * In case recover is set to 1, the nodelist will not be empty even if
13345 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13346 * some extent.
13349 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13350 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13351 int recover) {
13352 xmlParserCtxtPtr ctxt;
13353 xmlDocPtr newDoc;
13354 xmlSAXHandlerPtr oldsax = NULL;
13355 xmlNodePtr content, newRoot;
13356 int size;
13357 int ret = 0;
13359 if (depth > 40) {
13360 return(XML_ERR_ENTITY_LOOP);
13364 if (lst != NULL)
13365 *lst = NULL;
13366 if (string == NULL)
13367 return(-1);
13369 size = xmlStrlen(string);
13371 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13372 if (ctxt == NULL) return(-1);
13373 ctxt->userData = ctxt;
13374 if (sax != NULL) {
13375 oldsax = ctxt->sax;
13376 ctxt->sax = sax;
13377 if (user_data != NULL)
13378 ctxt->userData = user_data;
13380 newDoc = xmlNewDoc(BAD_CAST "1.0");
13381 if (newDoc == NULL) {
13382 xmlFreeParserCtxt(ctxt);
13383 return(-1);
13385 newDoc->properties = XML_DOC_INTERNAL;
13386 if ((doc != NULL) && (doc->dict != NULL)) {
13387 xmlDictFree(ctxt->dict);
13388 ctxt->dict = doc->dict;
13389 xmlDictReference(ctxt->dict);
13390 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13391 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13392 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13393 ctxt->dictNames = 1;
13394 } else {
13395 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13397 /* doc == NULL is only supported for historic reasons */
13398 if (doc != NULL) {
13399 newDoc->intSubset = doc->intSubset;
13400 newDoc->extSubset = doc->extSubset;
13402 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13403 if (newRoot == NULL) {
13404 if (sax != NULL)
13405 ctxt->sax = oldsax;
13406 xmlFreeParserCtxt(ctxt);
13407 newDoc->intSubset = NULL;
13408 newDoc->extSubset = NULL;
13409 xmlFreeDoc(newDoc);
13410 return(-1);
13412 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13413 nodePush(ctxt, newRoot);
13414 /* doc == NULL is only supported for historic reasons */
13415 if (doc == NULL) {
13416 ctxt->myDoc = newDoc;
13417 } else {
13418 ctxt->myDoc = newDoc;
13419 newDoc->children->doc = doc;
13420 /* Ensure that doc has XML spec namespace */
13421 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13422 newDoc->oldNs = doc->oldNs;
13424 ctxt->instate = XML_PARSER_CONTENT;
13425 ctxt->input_id = 2;
13426 ctxt->depth = depth;
13429 * Doing validity checking on chunk doesn't make sense
13431 ctxt->validate = 0;
13432 ctxt->loadsubset = 0;
13433 xmlDetectSAX2(ctxt);
13435 if ( doc != NULL ){
13436 content = doc->children;
13437 doc->children = NULL;
13438 xmlParseContent(ctxt);
13439 doc->children = content;
13441 else {
13442 xmlParseContent(ctxt);
13444 if ((RAW == '<') && (NXT(1) == '/')) {
13445 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13446 } else if (RAW != 0) {
13447 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13449 if (ctxt->node != newDoc->children) {
13450 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13453 if (!ctxt->wellFormed) {
13454 if (ctxt->errNo == 0)
13455 ret = 1;
13456 else
13457 ret = ctxt->errNo;
13458 } else {
13459 ret = 0;
13462 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13463 xmlNodePtr cur;
13466 * Return the newly created nodeset after unlinking it from
13467 * they pseudo parent.
13469 cur = newDoc->children->children;
13470 *lst = cur;
13471 while (cur != NULL) {
13472 xmlSetTreeDoc(cur, doc);
13473 cur->parent = NULL;
13474 cur = cur->next;
13476 newDoc->children->children = NULL;
13479 if (sax != NULL)
13480 ctxt->sax = oldsax;
13481 xmlFreeParserCtxt(ctxt);
13482 newDoc->intSubset = NULL;
13483 newDoc->extSubset = NULL;
13484 /* This leaks the namespace list if doc == NULL */
13485 newDoc->oldNs = NULL;
13486 xmlFreeDoc(newDoc);
13488 return(ret);
13492 * xmlSAXParseEntity:
13493 * @sax: the SAX handler block
13494 * @filename: the filename
13496 * DEPRECATED: Don't use.
13498 * parse an XML external entity out of context and build a tree.
13499 * It use the given SAX function block to handle the parsing callback.
13500 * If sax is NULL, fallback to the default DOM tree building routines.
13502 * [78] extParsedEnt ::= TextDecl? content
13504 * This correspond to a "Well Balanced" chunk
13506 * Returns the resulting document tree
13509 xmlDocPtr
13510 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13511 xmlDocPtr ret;
13512 xmlParserCtxtPtr ctxt;
13514 ctxt = xmlCreateFileParserCtxt(filename);
13515 if (ctxt == NULL) {
13516 return(NULL);
13518 if (sax != NULL) {
13519 if (ctxt->sax != NULL)
13520 xmlFree(ctxt->sax);
13521 ctxt->sax = sax;
13522 ctxt->userData = NULL;
13525 xmlParseExtParsedEnt(ctxt);
13527 if (ctxt->wellFormed)
13528 ret = ctxt->myDoc;
13529 else {
13530 ret = NULL;
13531 xmlFreeDoc(ctxt->myDoc);
13532 ctxt->myDoc = NULL;
13534 if (sax != NULL)
13535 ctxt->sax = NULL;
13536 xmlFreeParserCtxt(ctxt);
13538 return(ret);
13542 * xmlParseEntity:
13543 * @filename: the filename
13545 * parse an XML external entity out of context and build a tree.
13547 * [78] extParsedEnt ::= TextDecl? content
13549 * This correspond to a "Well Balanced" chunk
13551 * Returns the resulting document tree
13554 xmlDocPtr
13555 xmlParseEntity(const char *filename) {
13556 return(xmlSAXParseEntity(NULL, filename));
13558 #endif /* LIBXML_SAX1_ENABLED */
13561 * xmlCreateEntityParserCtxtInternal:
13562 * @URL: the entity URL
13563 * @ID: the entity PUBLIC ID
13564 * @base: a possible base for the target URI
13565 * @pctx: parser context used to set options on new context
13567 * Create a parser context for an external entity
13568 * Automatic support for ZLIB/Compress compressed document is provided
13569 * by default if found at compile-time.
13571 * Returns the new parser context or NULL
13573 static xmlParserCtxtPtr
13574 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13575 const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13576 xmlParserCtxtPtr pctx) {
13577 xmlParserCtxtPtr ctxt;
13578 xmlParserInputPtr inputStream;
13579 char *directory = NULL;
13580 xmlChar *uri;
13582 ctxt = xmlNewSAXParserCtxt(sax, userData);
13583 if (ctxt == NULL) {
13584 return(NULL);
13587 if (pctx != NULL) {
13588 ctxt->options = pctx->options;
13589 ctxt->_private = pctx->_private;
13590 ctxt->input_id = pctx->input_id;
13593 /* Don't read from stdin. */
13594 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13595 URL = BAD_CAST "./-";
13597 uri = xmlBuildURI(URL, base);
13599 if (uri == NULL) {
13600 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13601 if (inputStream == NULL) {
13602 xmlFreeParserCtxt(ctxt);
13603 return(NULL);
13606 inputPush(ctxt, inputStream);
13608 if ((ctxt->directory == NULL) && (directory == NULL))
13609 directory = xmlParserGetDirectory((char *)URL);
13610 if ((ctxt->directory == NULL) && (directory != NULL))
13611 ctxt->directory = directory;
13612 } else {
13613 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13614 if (inputStream == NULL) {
13615 xmlFree(uri);
13616 xmlFreeParserCtxt(ctxt);
13617 return(NULL);
13620 inputPush(ctxt, inputStream);
13622 if ((ctxt->directory == NULL) && (directory == NULL))
13623 directory = xmlParserGetDirectory((char *)uri);
13624 if ((ctxt->directory == NULL) && (directory != NULL))
13625 ctxt->directory = directory;
13626 xmlFree(uri);
13628 return(ctxt);
13632 * xmlCreateEntityParserCtxt:
13633 * @URL: the entity URL
13634 * @ID: the entity PUBLIC ID
13635 * @base: a possible base for the target URI
13637 * Create a parser context for an external entity
13638 * Automatic support for ZLIB/Compress compressed document is provided
13639 * by default if found at compile-time.
13641 * Returns the new parser context or NULL
13643 xmlParserCtxtPtr
13644 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13645 const xmlChar *base) {
13646 return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13650 /************************************************************************
13652 * Front ends when parsing from a file *
13654 ************************************************************************/
13657 * xmlCreateURLParserCtxt:
13658 * @filename: the filename or URL
13659 * @options: a combination of xmlParserOption
13661 * Create a parser context for a file or URL content.
13662 * Automatic support for ZLIB/Compress compressed document is provided
13663 * by default if found at compile-time and for file accesses
13665 * Returns the new parser context or NULL
13667 xmlParserCtxtPtr
13668 xmlCreateURLParserCtxt(const char *filename, int options)
13670 xmlParserCtxtPtr ctxt;
13671 xmlParserInputPtr inputStream;
13672 char *directory = NULL;
13674 ctxt = xmlNewParserCtxt();
13675 if (ctxt == NULL) {
13676 xmlErrMemory(NULL, "cannot allocate parser context");
13677 return(NULL);
13680 if (options)
13681 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13682 ctxt->linenumbers = 1;
13684 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13685 if (inputStream == NULL) {
13686 xmlFreeParserCtxt(ctxt);
13687 return(NULL);
13690 inputPush(ctxt, inputStream);
13691 if ((ctxt->directory == NULL) && (directory == NULL))
13692 directory = xmlParserGetDirectory(filename);
13693 if ((ctxt->directory == NULL) && (directory != NULL))
13694 ctxt->directory = directory;
13696 return(ctxt);
13700 * xmlCreateFileParserCtxt:
13701 * @filename: the filename
13703 * Create a parser context for a file content.
13704 * Automatic support for ZLIB/Compress compressed document is provided
13705 * by default if found at compile-time.
13707 * Returns the new parser context or NULL
13709 xmlParserCtxtPtr
13710 xmlCreateFileParserCtxt(const char *filename)
13712 return(xmlCreateURLParserCtxt(filename, 0));
13715 #ifdef LIBXML_SAX1_ENABLED
13717 * xmlSAXParseFileWithData:
13718 * @sax: the SAX handler block
13719 * @filename: the filename
13720 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13721 * documents
13722 * @data: the userdata
13724 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13726 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13727 * compressed document is provided by default if found at compile-time.
13728 * It use the given SAX function block to handle the parsing callback.
13729 * If sax is NULL, fallback to the default DOM tree building routines.
13731 * User data (void *) is stored within the parser context in the
13732 * context's _private member, so it is available nearly everywhere in libxml
13734 * Returns the resulting document tree
13737 xmlDocPtr
13738 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13739 int recovery, void *data) {
13740 xmlDocPtr ret;
13741 xmlParserCtxtPtr ctxt;
13743 xmlInitParser();
13745 ctxt = xmlCreateFileParserCtxt(filename);
13746 if (ctxt == NULL) {
13747 return(NULL);
13749 if (sax != NULL) {
13750 if (ctxt->sax != NULL)
13751 xmlFree(ctxt->sax);
13752 ctxt->sax = sax;
13754 xmlDetectSAX2(ctxt);
13755 if (data!=NULL) {
13756 ctxt->_private = data;
13759 if (ctxt->directory == NULL)
13760 ctxt->directory = xmlParserGetDirectory(filename);
13762 ctxt->recovery = recovery;
13764 xmlParseDocument(ctxt);
13766 if ((ctxt->wellFormed) || recovery) {
13767 ret = ctxt->myDoc;
13768 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13769 if (ctxt->input->buf->compressed > 0)
13770 ret->compression = 9;
13771 else
13772 ret->compression = ctxt->input->buf->compressed;
13775 else {
13776 ret = NULL;
13777 xmlFreeDoc(ctxt->myDoc);
13778 ctxt->myDoc = NULL;
13780 if (sax != NULL)
13781 ctxt->sax = NULL;
13782 xmlFreeParserCtxt(ctxt);
13784 return(ret);
13788 * xmlSAXParseFile:
13789 * @sax: the SAX handler block
13790 * @filename: the filename
13791 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13792 * documents
13794 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13796 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13797 * compressed document is provided by default if found at compile-time.
13798 * It use the given SAX function block to handle the parsing callback.
13799 * If sax is NULL, fallback to the default DOM tree building routines.
13801 * Returns the resulting document tree
13804 xmlDocPtr
13805 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13806 int recovery) {
13807 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13811 * xmlRecoverDoc:
13812 * @cur: a pointer to an array of xmlChar
13814 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13816 * parse an XML in-memory document and build a tree.
13817 * In the case the document is not Well Formed, a attempt to build a
13818 * tree is tried anyway
13820 * Returns the resulting document tree or NULL in case of failure
13823 xmlDocPtr
13824 xmlRecoverDoc(const xmlChar *cur) {
13825 return(xmlSAXParseDoc(NULL, cur, 1));
13829 * xmlParseFile:
13830 * @filename: the filename
13832 * DEPRECATED: Use xmlReadFile.
13834 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13835 * compressed document is provided by default if found at compile-time.
13837 * Returns the resulting document tree if the file was wellformed,
13838 * NULL otherwise.
13841 xmlDocPtr
13842 xmlParseFile(const char *filename) {
13843 return(xmlSAXParseFile(NULL, filename, 0));
13847 * xmlRecoverFile:
13848 * @filename: the filename
13850 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13852 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13853 * compressed document is provided by default if found at compile-time.
13854 * In the case the document is not Well Formed, it attempts to build
13855 * a tree anyway
13857 * Returns the resulting document tree or NULL in case of failure
13860 xmlDocPtr
13861 xmlRecoverFile(const char *filename) {
13862 return(xmlSAXParseFile(NULL, filename, 1));
13867 * xmlSetupParserForBuffer:
13868 * @ctxt: an XML parser context
13869 * @buffer: a xmlChar * buffer
13870 * @filename: a file name
13872 * DEPRECATED: Don't use.
13874 * Setup the parser context to parse a new buffer; Clears any prior
13875 * contents from the parser context. The buffer parameter must not be
13876 * NULL, but the filename parameter can be
13878 void
13879 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13880 const char* filename)
13882 xmlParserInputPtr input;
13884 if ((ctxt == NULL) || (buffer == NULL))
13885 return;
13887 input = xmlNewInputStream(ctxt);
13888 if (input == NULL) {
13889 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13890 xmlClearParserCtxt(ctxt);
13891 return;
13894 xmlClearParserCtxt(ctxt);
13895 if (filename != NULL)
13896 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13897 input->base = buffer;
13898 input->cur = buffer;
13899 input->end = &buffer[xmlStrlen(buffer)];
13900 inputPush(ctxt, input);
13904 * xmlSAXUserParseFile:
13905 * @sax: a SAX handler
13906 * @user_data: The user data returned on SAX callbacks
13907 * @filename: a file name
13909 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13911 * parse an XML file and call the given SAX handler routines.
13912 * Automatic support for ZLIB/Compress compressed document is provided
13914 * Returns 0 in case of success or a error number otherwise
13917 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13918 const char *filename) {
13919 int ret = 0;
13920 xmlParserCtxtPtr ctxt;
13922 ctxt = xmlCreateFileParserCtxt(filename);
13923 if (ctxt == NULL) return -1;
13924 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13925 xmlFree(ctxt->sax);
13926 ctxt->sax = sax;
13927 xmlDetectSAX2(ctxt);
13929 if (user_data != NULL)
13930 ctxt->userData = user_data;
13932 xmlParseDocument(ctxt);
13934 if (ctxt->wellFormed)
13935 ret = 0;
13936 else {
13937 if (ctxt->errNo != 0)
13938 ret = ctxt->errNo;
13939 else
13940 ret = -1;
13942 if (sax != NULL)
13943 ctxt->sax = NULL;
13944 if (ctxt->myDoc != NULL) {
13945 xmlFreeDoc(ctxt->myDoc);
13946 ctxt->myDoc = NULL;
13948 xmlFreeParserCtxt(ctxt);
13950 return ret;
13952 #endif /* LIBXML_SAX1_ENABLED */
13954 /************************************************************************
13956 * Front ends when parsing from memory *
13958 ************************************************************************/
13961 * xmlCreateMemoryParserCtxt:
13962 * @buffer: a pointer to a char array
13963 * @size: the size of the array
13965 * Create a parser context for an XML in-memory document.
13967 * Returns the new parser context or NULL
13969 xmlParserCtxtPtr
13970 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13971 xmlParserCtxtPtr ctxt;
13972 xmlParserInputPtr input;
13973 xmlParserInputBufferPtr buf;
13975 if (buffer == NULL)
13976 return(NULL);
13977 if (size <= 0)
13978 return(NULL);
13980 ctxt = xmlNewParserCtxt();
13981 if (ctxt == NULL)
13982 return(NULL);
13984 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13985 if (buf == NULL) {
13986 xmlFreeParserCtxt(ctxt);
13987 return(NULL);
13990 input = xmlNewInputStream(ctxt);
13991 if (input == NULL) {
13992 xmlFreeParserInputBuffer(buf);
13993 xmlFreeParserCtxt(ctxt);
13994 return(NULL);
13997 input->filename = NULL;
13998 input->buf = buf;
13999 xmlBufResetInput(input->buf->buffer, input);
14001 inputPush(ctxt, input);
14002 return(ctxt);
14005 #ifdef LIBXML_SAX1_ENABLED
14007 * xmlSAXParseMemoryWithData:
14008 * @sax: the SAX handler block
14009 * @buffer: an pointer to a char array
14010 * @size: the size of the array
14011 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14012 * documents
14013 * @data: the userdata
14015 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14017 * parse an XML in-memory block and use the given SAX function block
14018 * to handle the parsing callback. If sax is NULL, fallback to the default
14019 * DOM tree building routines.
14021 * User data (void *) is stored within the parser context in the
14022 * context's _private member, so it is available nearly everywhere in libxml
14024 * Returns the resulting document tree
14027 xmlDocPtr
14028 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14029 int size, int recovery, void *data) {
14030 xmlDocPtr ret;
14031 xmlParserCtxtPtr ctxt;
14033 xmlInitParser();
14035 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14036 if (ctxt == NULL) return(NULL);
14037 if (sax != NULL) {
14038 if (ctxt->sax != NULL)
14039 xmlFree(ctxt->sax);
14040 ctxt->sax = sax;
14042 xmlDetectSAX2(ctxt);
14043 if (data!=NULL) {
14044 ctxt->_private=data;
14047 ctxt->recovery = recovery;
14049 xmlParseDocument(ctxt);
14051 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14052 else {
14053 ret = NULL;
14054 xmlFreeDoc(ctxt->myDoc);
14055 ctxt->myDoc = NULL;
14057 if (sax != NULL)
14058 ctxt->sax = NULL;
14059 xmlFreeParserCtxt(ctxt);
14061 return(ret);
14065 * xmlSAXParseMemory:
14066 * @sax: the SAX handler block
14067 * @buffer: an pointer to a char array
14068 * @size: the size of the array
14069 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14070 * documents
14072 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14074 * parse an XML in-memory block and use the given SAX function block
14075 * to handle the parsing callback. If sax is NULL, fallback to the default
14076 * DOM tree building routines.
14078 * Returns the resulting document tree
14080 xmlDocPtr
14081 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14082 int size, int recovery) {
14083 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14087 * xmlParseMemory:
14088 * @buffer: an pointer to a char array
14089 * @size: the size of the array
14091 * DEPRECATED: Use xmlReadMemory.
14093 * parse an XML in-memory block and build a tree.
14095 * Returns the resulting document tree
14098 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14099 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14103 * xmlRecoverMemory:
14104 * @buffer: an pointer to a char array
14105 * @size: the size of the array
14107 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14109 * parse an XML in-memory block and build a tree.
14110 * In the case the document is not Well Formed, an attempt to
14111 * build a tree is tried anyway
14113 * Returns the resulting document tree or NULL in case of error
14116 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14117 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14121 * xmlSAXUserParseMemory:
14122 * @sax: a SAX handler
14123 * @user_data: The user data returned on SAX callbacks
14124 * @buffer: an in-memory XML document input
14125 * @size: the length of the XML document in bytes
14127 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14129 * parse an XML in-memory buffer and call the given SAX handler routines.
14131 * Returns 0 in case of success or a error number otherwise
14133 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14134 const char *buffer, int size) {
14135 int ret = 0;
14136 xmlParserCtxtPtr ctxt;
14138 xmlInitParser();
14140 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14141 if (ctxt == NULL) return -1;
14142 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14143 xmlFree(ctxt->sax);
14144 ctxt->sax = sax;
14145 xmlDetectSAX2(ctxt);
14147 if (user_data != NULL)
14148 ctxt->userData = user_data;
14150 xmlParseDocument(ctxt);
14152 if (ctxt->wellFormed)
14153 ret = 0;
14154 else {
14155 if (ctxt->errNo != 0)
14156 ret = ctxt->errNo;
14157 else
14158 ret = -1;
14160 if (sax != NULL)
14161 ctxt->sax = NULL;
14162 if (ctxt->myDoc != NULL) {
14163 xmlFreeDoc(ctxt->myDoc);
14164 ctxt->myDoc = NULL;
14166 xmlFreeParserCtxt(ctxt);
14168 return ret;
14170 #endif /* LIBXML_SAX1_ENABLED */
14173 * xmlCreateDocParserCtxt:
14174 * @cur: a pointer to an array of xmlChar
14176 * Creates a parser context for an XML in-memory document.
14178 * Returns the new parser context or NULL
14180 xmlParserCtxtPtr
14181 xmlCreateDocParserCtxt(const xmlChar *cur) {
14182 int len;
14184 if (cur == NULL)
14185 return(NULL);
14186 len = xmlStrlen(cur);
14187 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14190 #ifdef LIBXML_SAX1_ENABLED
14192 * xmlSAXParseDoc:
14193 * @sax: the SAX handler block
14194 * @cur: a pointer to an array of xmlChar
14195 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14196 * documents
14198 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14200 * parse an XML in-memory document and build a tree.
14201 * It use the given SAX function block to handle the parsing callback.
14202 * If sax is NULL, fallback to the default DOM tree building routines.
14204 * Returns the resulting document tree
14207 xmlDocPtr
14208 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14209 xmlDocPtr ret;
14210 xmlParserCtxtPtr ctxt;
14211 xmlSAXHandlerPtr oldsax = NULL;
14213 if (cur == NULL) return(NULL);
14216 ctxt = xmlCreateDocParserCtxt(cur);
14217 if (ctxt == NULL) return(NULL);
14218 if (sax != NULL) {
14219 oldsax = ctxt->sax;
14220 ctxt->sax = sax;
14221 ctxt->userData = NULL;
14223 xmlDetectSAX2(ctxt);
14225 xmlParseDocument(ctxt);
14226 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14227 else {
14228 ret = NULL;
14229 xmlFreeDoc(ctxt->myDoc);
14230 ctxt->myDoc = NULL;
14232 if (sax != NULL)
14233 ctxt->sax = oldsax;
14234 xmlFreeParserCtxt(ctxt);
14236 return(ret);
14240 * xmlParseDoc:
14241 * @cur: a pointer to an array of xmlChar
14243 * DEPRECATED: Use xmlReadDoc.
14245 * parse an XML in-memory document and build a tree.
14247 * Returns the resulting document tree
14250 xmlDocPtr
14251 xmlParseDoc(const xmlChar *cur) {
14252 return(xmlSAXParseDoc(NULL, cur, 0));
14254 #endif /* LIBXML_SAX1_ENABLED */
14256 #ifdef LIBXML_LEGACY_ENABLED
14257 /************************************************************************
14259 * Specific function to keep track of entities references *
14260 * and used by the XSLT debugger *
14262 ************************************************************************/
14264 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14267 * xmlAddEntityReference:
14268 * @ent : A valid entity
14269 * @firstNode : A valid first node for children of entity
14270 * @lastNode : A valid last node of children entity
14272 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14274 static void
14275 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14276 xmlNodePtr lastNode)
14278 if (xmlEntityRefFunc != NULL) {
14279 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14285 * xmlSetEntityReferenceFunc:
14286 * @func: A valid function
14288 * Set the function to call call back when a xml reference has been made
14290 void
14291 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14293 xmlEntityRefFunc = func;
14295 #endif /* LIBXML_LEGACY_ENABLED */
14297 /************************************************************************
14299 * Miscellaneous *
14301 ************************************************************************/
14303 static int xmlParserInitialized = 0;
14306 * xmlInitParser:
14308 * Initialization function for the XML parser.
14309 * This is not reentrant. Call once before processing in case of
14310 * use in multithreaded programs.
14313 void
14314 xmlInitParser(void) {
14316 * Note that the initialization code must not make memory allocations.
14318 if (xmlParserInitialized != 0)
14319 return;
14321 #ifdef LIBXML_THREAD_ENABLED
14322 __xmlGlobalInitMutexLock();
14323 if (xmlParserInitialized == 0) {
14324 #endif
14325 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14326 if (xmlFree == free)
14327 atexit(xmlCleanupParser);
14328 #endif
14330 xmlInitThreadsInternal();
14331 xmlInitGlobalsInternal();
14332 xmlInitMemoryInternal();
14333 __xmlInitializeDict();
14334 xmlInitEncodingInternal();
14335 xmlRegisterDefaultInputCallbacks();
14336 #ifdef LIBXML_OUTPUT_ENABLED
14337 xmlRegisterDefaultOutputCallbacks();
14338 #endif /* LIBXML_OUTPUT_ENABLED */
14339 #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14340 xmlInitXPathInternal();
14341 #endif
14342 xmlParserInitialized = 1;
14343 #ifdef LIBXML_THREAD_ENABLED
14345 __xmlGlobalInitMutexUnlock();
14346 #endif
14350 * xmlCleanupParser:
14352 * This function name is somewhat misleading. It does not clean up
14353 * parser state, it cleans up memory allocated by the library itself.
14354 * It is a cleanup function for the XML library. It tries to reclaim all
14355 * related global memory allocated for the library processing.
14356 * It doesn't deallocate any document related memory. One should
14357 * call xmlCleanupParser() only when the process has finished using
14358 * the library and all XML/HTML documents built with it.
14359 * See also xmlInitParser() which has the opposite function of preparing
14360 * the library for operations.
14362 * WARNING: if your application is multithreaded or has plugin support
14363 * calling this may crash the application if another thread or
14364 * a plugin is still using libxml2. It's sometimes very hard to
14365 * guess if libxml2 is in use in the application, some libraries
14366 * or plugins may use it without notice. In case of doubt abstain
14367 * from calling this function or do it just before calling exit()
14368 * to avoid leak reports from valgrind !
14371 void
14372 xmlCleanupParser(void) {
14373 if (!xmlParserInitialized)
14374 return;
14376 xmlCleanupCharEncodingHandlers();
14377 #ifdef LIBXML_CATALOG_ENABLED
14378 xmlCatalogCleanup();
14379 #endif
14380 xmlCleanupDictInternal();
14381 xmlCleanupInputCallbacks();
14382 #ifdef LIBXML_OUTPUT_ENABLED
14383 xmlCleanupOutputCallbacks();
14384 #endif
14385 #ifdef LIBXML_SCHEMAS_ENABLED
14386 xmlSchemaCleanupTypes();
14387 xmlRelaxNGCleanupTypes();
14388 #endif
14389 xmlCleanupGlobalsInternal();
14390 xmlCleanupThreadsInternal();
14391 xmlCleanupMemoryInternal();
14392 xmlParserInitialized = 0;
14395 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14396 !defined(_WIN32)
14397 static void
14398 ATTRIBUTE_DESTRUCTOR
14399 xmlDestructor(void) {
14401 * Calling custom deallocation functions in a destructor can cause
14402 * problems, for example with Nokogiri.
14404 if (xmlFree == free)
14405 xmlCleanupParser();
14407 #endif
14409 /************************************************************************
14411 * New set (2.6.0) of simpler and more flexible APIs *
14413 ************************************************************************/
14416 * DICT_FREE:
14417 * @str: a string
14419 * Free a string if it is not owned by the "dict" dictionary in the
14420 * current scope
14422 #define DICT_FREE(str) \
14423 if ((str) && ((!dict) || \
14424 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14425 xmlFree((char *)(str));
14428 * xmlCtxtReset:
14429 * @ctxt: an XML parser context
14431 * Reset a parser context
14433 void
14434 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14436 xmlParserInputPtr input;
14437 xmlDictPtr dict;
14439 if (ctxt == NULL)
14440 return;
14442 dict = ctxt->dict;
14444 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14445 xmlFreeInputStream(input);
14447 ctxt->inputNr = 0;
14448 ctxt->input = NULL;
14450 ctxt->spaceNr = 0;
14451 if (ctxt->spaceTab != NULL) {
14452 ctxt->spaceTab[0] = -1;
14453 ctxt->space = &ctxt->spaceTab[0];
14454 } else {
14455 ctxt->space = NULL;
14459 ctxt->nodeNr = 0;
14460 ctxt->node = NULL;
14462 ctxt->nameNr = 0;
14463 ctxt->name = NULL;
14465 ctxt->nsNr = 0;
14467 DICT_FREE(ctxt->version);
14468 ctxt->version = NULL;
14469 DICT_FREE(ctxt->encoding);
14470 ctxt->encoding = NULL;
14471 DICT_FREE(ctxt->directory);
14472 ctxt->directory = NULL;
14473 DICT_FREE(ctxt->extSubURI);
14474 ctxt->extSubURI = NULL;
14475 DICT_FREE(ctxt->extSubSystem);
14476 ctxt->extSubSystem = NULL;
14477 if (ctxt->myDoc != NULL)
14478 xmlFreeDoc(ctxt->myDoc);
14479 ctxt->myDoc = NULL;
14481 ctxt->standalone = -1;
14482 ctxt->hasExternalSubset = 0;
14483 ctxt->hasPErefs = 0;
14484 ctxt->html = 0;
14485 ctxt->external = 0;
14486 ctxt->instate = XML_PARSER_START;
14487 ctxt->token = 0;
14489 ctxt->wellFormed = 1;
14490 ctxt->nsWellFormed = 1;
14491 ctxt->disableSAX = 0;
14492 ctxt->valid = 1;
14493 #if 0
14494 ctxt->vctxt.userData = ctxt;
14495 ctxt->vctxt.error = xmlParserValidityError;
14496 ctxt->vctxt.warning = xmlParserValidityWarning;
14497 #endif
14498 ctxt->record_info = 0;
14499 ctxt->checkIndex = 0;
14500 ctxt->endCheckState = 0;
14501 ctxt->inSubset = 0;
14502 ctxt->errNo = XML_ERR_OK;
14503 ctxt->depth = 0;
14504 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14505 ctxt->catalogs = NULL;
14506 ctxt->sizeentities = 0;
14507 ctxt->sizeentcopy = 0;
14508 xmlInitNodeInfoSeq(&ctxt->node_seq);
14510 if (ctxt->attsDefault != NULL) {
14511 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14512 ctxt->attsDefault = NULL;
14514 if (ctxt->attsSpecial != NULL) {
14515 xmlHashFree(ctxt->attsSpecial, NULL);
14516 ctxt->attsSpecial = NULL;
14519 #ifdef LIBXML_CATALOG_ENABLED
14520 if (ctxt->catalogs != NULL)
14521 xmlCatalogFreeLocal(ctxt->catalogs);
14522 #endif
14523 ctxt->nbErrors = 0;
14524 ctxt->nbWarnings = 0;
14525 if (ctxt->lastError.code != XML_ERR_OK)
14526 xmlResetError(&ctxt->lastError);
14530 * xmlCtxtResetPush:
14531 * @ctxt: an XML parser context
14532 * @chunk: a pointer to an array of chars
14533 * @size: number of chars in the array
14534 * @filename: an optional file name or URI
14535 * @encoding: the document encoding, or NULL
14537 * Reset a push parser context
14539 * Returns 0 in case of success and 1 in case of error
14542 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14543 int size, const char *filename, const char *encoding)
14545 xmlParserInputPtr inputStream;
14546 xmlParserInputBufferPtr buf;
14547 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14549 if (ctxt == NULL)
14550 return(1);
14552 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14553 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14555 buf = xmlAllocParserInputBuffer(enc);
14556 if (buf == NULL)
14557 return(1);
14559 if (ctxt == NULL) {
14560 xmlFreeParserInputBuffer(buf);
14561 return(1);
14564 xmlCtxtReset(ctxt);
14566 if (filename == NULL) {
14567 ctxt->directory = NULL;
14568 } else {
14569 ctxt->directory = xmlParserGetDirectory(filename);
14572 inputStream = xmlNewInputStream(ctxt);
14573 if (inputStream == NULL) {
14574 xmlFreeParserInputBuffer(buf);
14575 return(1);
14578 if (filename == NULL)
14579 inputStream->filename = NULL;
14580 else
14581 inputStream->filename = (char *)
14582 xmlCanonicPath((const xmlChar *) filename);
14583 inputStream->buf = buf;
14584 xmlBufResetInput(buf->buffer, inputStream);
14586 inputPush(ctxt, inputStream);
14588 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14589 (ctxt->input->buf != NULL)) {
14590 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14591 size_t cur = ctxt->input->cur - ctxt->input->base;
14593 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14595 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14596 #ifdef DEBUG_PUSH
14597 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14598 #endif
14601 if (encoding != NULL) {
14602 xmlCharEncodingHandlerPtr hdlr;
14604 if (ctxt->encoding != NULL)
14605 xmlFree((xmlChar *) ctxt->encoding);
14606 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14608 hdlr = xmlFindCharEncodingHandler(encoding);
14609 if (hdlr != NULL) {
14610 xmlSwitchToEncoding(ctxt, hdlr);
14611 } else {
14612 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14613 "Unsupported encoding %s\n", BAD_CAST encoding);
14615 } else if (enc != XML_CHAR_ENCODING_NONE) {
14616 xmlSwitchEncoding(ctxt, enc);
14619 return(0);
14624 * xmlCtxtUseOptionsInternal:
14625 * @ctxt: an XML parser context
14626 * @options: a combination of xmlParserOption
14627 * @encoding: the user provided encoding to use
14629 * Applies the options to the parser context
14631 * Returns 0 in case of success, the set of unknown or unimplemented options
14632 * in case of error.
14634 static int
14635 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14637 if (ctxt == NULL)
14638 return(-1);
14639 if (encoding != NULL) {
14640 if (ctxt->encoding != NULL)
14641 xmlFree((xmlChar *) ctxt->encoding);
14642 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14644 if (options & XML_PARSE_RECOVER) {
14645 ctxt->recovery = 1;
14646 options -= XML_PARSE_RECOVER;
14647 ctxt->options |= XML_PARSE_RECOVER;
14648 } else
14649 ctxt->recovery = 0;
14650 if (options & XML_PARSE_DTDLOAD) {
14651 ctxt->loadsubset = XML_DETECT_IDS;
14652 options -= XML_PARSE_DTDLOAD;
14653 ctxt->options |= XML_PARSE_DTDLOAD;
14654 } else
14655 ctxt->loadsubset = 0;
14656 if (options & XML_PARSE_DTDATTR) {
14657 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14658 options -= XML_PARSE_DTDATTR;
14659 ctxt->options |= XML_PARSE_DTDATTR;
14661 if (options & XML_PARSE_NOENT) {
14662 ctxt->replaceEntities = 1;
14663 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14664 options -= XML_PARSE_NOENT;
14665 ctxt->options |= XML_PARSE_NOENT;
14666 } else
14667 ctxt->replaceEntities = 0;
14668 if (options & XML_PARSE_PEDANTIC) {
14669 ctxt->pedantic = 1;
14670 options -= XML_PARSE_PEDANTIC;
14671 ctxt->options |= XML_PARSE_PEDANTIC;
14672 } else
14673 ctxt->pedantic = 0;
14674 if (options & XML_PARSE_NOBLANKS) {
14675 ctxt->keepBlanks = 0;
14676 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14677 options -= XML_PARSE_NOBLANKS;
14678 ctxt->options |= XML_PARSE_NOBLANKS;
14679 } else
14680 ctxt->keepBlanks = 1;
14681 if (options & XML_PARSE_DTDVALID) {
14682 ctxt->validate = 1;
14683 if (options & XML_PARSE_NOWARNING)
14684 ctxt->vctxt.warning = NULL;
14685 if (options & XML_PARSE_NOERROR)
14686 ctxt->vctxt.error = NULL;
14687 options -= XML_PARSE_DTDVALID;
14688 ctxt->options |= XML_PARSE_DTDVALID;
14689 } else
14690 ctxt->validate = 0;
14691 if (options & XML_PARSE_NOWARNING) {
14692 ctxt->sax->warning = NULL;
14693 options -= XML_PARSE_NOWARNING;
14695 if (options & XML_PARSE_NOERROR) {
14696 ctxt->sax->error = NULL;
14697 ctxt->sax->fatalError = NULL;
14698 options -= XML_PARSE_NOERROR;
14700 #ifdef LIBXML_SAX1_ENABLED
14701 if (options & XML_PARSE_SAX1) {
14702 ctxt->sax->startElement = xmlSAX2StartElement;
14703 ctxt->sax->endElement = xmlSAX2EndElement;
14704 ctxt->sax->startElementNs = NULL;
14705 ctxt->sax->endElementNs = NULL;
14706 ctxt->sax->initialized = 1;
14707 options -= XML_PARSE_SAX1;
14708 ctxt->options |= XML_PARSE_SAX1;
14710 #endif /* LIBXML_SAX1_ENABLED */
14711 if (options & XML_PARSE_NODICT) {
14712 ctxt->dictNames = 0;
14713 options -= XML_PARSE_NODICT;
14714 ctxt->options |= XML_PARSE_NODICT;
14715 } else {
14716 ctxt->dictNames = 1;
14718 if (options & XML_PARSE_NOCDATA) {
14719 ctxt->sax->cdataBlock = NULL;
14720 options -= XML_PARSE_NOCDATA;
14721 ctxt->options |= XML_PARSE_NOCDATA;
14723 if (options & XML_PARSE_NSCLEAN) {
14724 ctxt->options |= XML_PARSE_NSCLEAN;
14725 options -= XML_PARSE_NSCLEAN;
14727 if (options & XML_PARSE_NONET) {
14728 ctxt->options |= XML_PARSE_NONET;
14729 options -= XML_PARSE_NONET;
14731 if (options & XML_PARSE_COMPACT) {
14732 ctxt->options |= XML_PARSE_COMPACT;
14733 options -= XML_PARSE_COMPACT;
14735 if (options & XML_PARSE_OLD10) {
14736 ctxt->options |= XML_PARSE_OLD10;
14737 options -= XML_PARSE_OLD10;
14739 if (options & XML_PARSE_NOBASEFIX) {
14740 ctxt->options |= XML_PARSE_NOBASEFIX;
14741 options -= XML_PARSE_NOBASEFIX;
14743 if (options & XML_PARSE_HUGE) {
14744 ctxt->options |= XML_PARSE_HUGE;
14745 options -= XML_PARSE_HUGE;
14746 if (ctxt->dict != NULL)
14747 xmlDictSetLimit(ctxt->dict, 0);
14749 if (options & XML_PARSE_OLDSAX) {
14750 ctxt->options |= XML_PARSE_OLDSAX;
14751 options -= XML_PARSE_OLDSAX;
14753 if (options & XML_PARSE_IGNORE_ENC) {
14754 ctxt->options |= XML_PARSE_IGNORE_ENC;
14755 options -= XML_PARSE_IGNORE_ENC;
14757 if (options & XML_PARSE_BIG_LINES) {
14758 ctxt->options |= XML_PARSE_BIG_LINES;
14759 options -= XML_PARSE_BIG_LINES;
14761 ctxt->linenumbers = 1;
14762 return (options);
14766 * xmlCtxtUseOptions:
14767 * @ctxt: an XML parser context
14768 * @options: a combination of xmlParserOption
14770 * Applies the options to the parser context
14772 * Returns 0 in case of success, the set of unknown or unimplemented options
14773 * in case of error.
14776 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14778 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14782 * xmlDoRead:
14783 * @ctxt: an XML parser context
14784 * @URL: the base URL to use for the document
14785 * @encoding: the document encoding, or NULL
14786 * @options: a combination of xmlParserOption
14787 * @reuse: keep the context for reuse
14789 * Common front-end for the xmlRead functions
14791 * Returns the resulting document tree or NULL
14793 static xmlDocPtr
14794 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14795 int options, int reuse)
14797 xmlDocPtr ret;
14799 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14800 if (encoding != NULL) {
14801 xmlCharEncodingHandlerPtr hdlr;
14804 * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14805 * caller provided an encoding. Otherwise, we might switch to
14806 * the encoding from the XML declaration which is likely to
14807 * break things. Also see xmlSwitchInputEncoding.
14809 hdlr = xmlFindCharEncodingHandler(encoding);
14810 if (hdlr != NULL)
14811 xmlSwitchToEncoding(ctxt, hdlr);
14813 if ((URL != NULL) && (ctxt->input != NULL) &&
14814 (ctxt->input->filename == NULL))
14815 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14816 xmlParseDocument(ctxt);
14817 if ((ctxt->wellFormed) || ctxt->recovery)
14818 ret = ctxt->myDoc;
14819 else {
14820 ret = NULL;
14821 if (ctxt->myDoc != NULL) {
14822 xmlFreeDoc(ctxt->myDoc);
14825 ctxt->myDoc = NULL;
14826 if (!reuse) {
14827 xmlFreeParserCtxt(ctxt);
14830 return (ret);
14834 * xmlReadDoc:
14835 * @cur: a pointer to a zero terminated string
14836 * @URL: the base URL to use for the document
14837 * @encoding: the document encoding, or NULL
14838 * @options: a combination of xmlParserOption
14840 * parse an XML in-memory document and build a tree.
14842 * Returns the resulting document tree
14844 xmlDocPtr
14845 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14847 xmlParserCtxtPtr ctxt;
14849 if (cur == NULL)
14850 return (NULL);
14851 xmlInitParser();
14853 ctxt = xmlCreateDocParserCtxt(cur);
14854 if (ctxt == NULL)
14855 return (NULL);
14856 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14860 * xmlReadFile:
14861 * @filename: a file or URL
14862 * @encoding: the document encoding, or NULL
14863 * @options: a combination of xmlParserOption
14865 * parse an XML file from the filesystem or the network.
14867 * Returns the resulting document tree
14869 xmlDocPtr
14870 xmlReadFile(const char *filename, const char *encoding, int options)
14872 xmlParserCtxtPtr ctxt;
14874 xmlInitParser();
14875 ctxt = xmlCreateURLParserCtxt(filename, options);
14876 if (ctxt == NULL)
14877 return (NULL);
14878 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14882 * xmlReadMemory:
14883 * @buffer: a pointer to a char array
14884 * @size: the size of the array
14885 * @URL: the base URL to use for the document
14886 * @encoding: the document encoding, or NULL
14887 * @options: a combination of xmlParserOption
14889 * parse an XML in-memory document and build a tree.
14891 * Returns the resulting document tree
14893 xmlDocPtr
14894 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14896 xmlParserCtxtPtr ctxt;
14898 xmlInitParser();
14899 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14900 if (ctxt == NULL)
14901 return (NULL);
14902 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14906 * xmlReadFd:
14907 * @fd: an open file descriptor
14908 * @URL: the base URL to use for the document
14909 * @encoding: the document encoding, or NULL
14910 * @options: a combination of xmlParserOption
14912 * parse an XML from a file descriptor and build a tree.
14913 * NOTE that the file descriptor will not be closed when the
14914 * reader is closed or reset.
14916 * Returns the resulting document tree
14918 xmlDocPtr
14919 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14921 xmlParserCtxtPtr ctxt;
14922 xmlParserInputBufferPtr input;
14923 xmlParserInputPtr stream;
14925 if (fd < 0)
14926 return (NULL);
14927 xmlInitParser();
14929 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14930 if (input == NULL)
14931 return (NULL);
14932 input->closecallback = NULL;
14933 ctxt = xmlNewParserCtxt();
14934 if (ctxt == NULL) {
14935 xmlFreeParserInputBuffer(input);
14936 return (NULL);
14938 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14939 if (stream == NULL) {
14940 xmlFreeParserInputBuffer(input);
14941 xmlFreeParserCtxt(ctxt);
14942 return (NULL);
14944 inputPush(ctxt, stream);
14945 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14949 * xmlReadIO:
14950 * @ioread: an I/O read function
14951 * @ioclose: an I/O close function
14952 * @ioctx: an I/O handler
14953 * @URL: the base URL to use for the document
14954 * @encoding: the document encoding, or NULL
14955 * @options: a combination of xmlParserOption
14957 * parse an XML document from I/O functions and source and build a tree.
14959 * Returns the resulting document tree
14961 xmlDocPtr
14962 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14963 void *ioctx, const char *URL, const char *encoding, int options)
14965 xmlParserCtxtPtr ctxt;
14966 xmlParserInputBufferPtr input;
14967 xmlParserInputPtr stream;
14969 if (ioread == NULL)
14970 return (NULL);
14971 xmlInitParser();
14973 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14974 XML_CHAR_ENCODING_NONE);
14975 if (input == NULL) {
14976 if (ioclose != NULL)
14977 ioclose(ioctx);
14978 return (NULL);
14980 ctxt = xmlNewParserCtxt();
14981 if (ctxt == NULL) {
14982 xmlFreeParserInputBuffer(input);
14983 return (NULL);
14985 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14986 if (stream == NULL) {
14987 xmlFreeParserInputBuffer(input);
14988 xmlFreeParserCtxt(ctxt);
14989 return (NULL);
14991 inputPush(ctxt, stream);
14992 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14996 * xmlCtxtReadDoc:
14997 * @ctxt: an XML parser context
14998 * @cur: a pointer to a zero terminated string
14999 * @URL: the base URL to use for the document
15000 * @encoding: the document encoding, or NULL
15001 * @options: a combination of xmlParserOption
15003 * parse an XML in-memory document and build a tree.
15004 * This reuses the existing @ctxt parser context
15006 * Returns the resulting document tree
15008 xmlDocPtr
15009 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15010 const char *URL, const char *encoding, int options)
15012 if (cur == NULL)
15013 return (NULL);
15014 return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15015 encoding, options));
15019 * xmlCtxtReadFile:
15020 * @ctxt: an XML parser context
15021 * @filename: a file or URL
15022 * @encoding: the document encoding, or NULL
15023 * @options: a combination of xmlParserOption
15025 * parse an XML file from the filesystem or the network.
15026 * This reuses the existing @ctxt parser context
15028 * Returns the resulting document tree
15030 xmlDocPtr
15031 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15032 const char *encoding, int options)
15034 xmlParserInputPtr stream;
15036 if (filename == NULL)
15037 return (NULL);
15038 if (ctxt == NULL)
15039 return (NULL);
15040 xmlInitParser();
15042 xmlCtxtReset(ctxt);
15044 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15045 if (stream == NULL) {
15046 return (NULL);
15048 inputPush(ctxt, stream);
15049 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15053 * xmlCtxtReadMemory:
15054 * @ctxt: an XML parser context
15055 * @buffer: a pointer to a char array
15056 * @size: the size of the array
15057 * @URL: the base URL to use for the document
15058 * @encoding: the document encoding, or NULL
15059 * @options: a combination of xmlParserOption
15061 * parse an XML in-memory document and build a tree.
15062 * This reuses the existing @ctxt parser context
15064 * Returns the resulting document tree
15066 xmlDocPtr
15067 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15068 const char *URL, const char *encoding, int options)
15070 xmlParserInputBufferPtr input;
15071 xmlParserInputPtr stream;
15073 if (ctxt == NULL)
15074 return (NULL);
15075 if (buffer == NULL)
15076 return (NULL);
15077 xmlInitParser();
15079 xmlCtxtReset(ctxt);
15081 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15082 if (input == NULL) {
15083 return(NULL);
15086 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15087 if (stream == NULL) {
15088 xmlFreeParserInputBuffer(input);
15089 return(NULL);
15092 inputPush(ctxt, stream);
15093 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15097 * xmlCtxtReadFd:
15098 * @ctxt: an XML parser context
15099 * @fd: an open file descriptor
15100 * @URL: the base URL to use for the document
15101 * @encoding: the document encoding, or NULL
15102 * @options: a combination of xmlParserOption
15104 * parse an XML from a file descriptor and build a tree.
15105 * This reuses the existing @ctxt parser context
15106 * NOTE that the file descriptor will not be closed when the
15107 * reader is closed or reset.
15109 * Returns the resulting document tree
15111 xmlDocPtr
15112 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15113 const char *URL, const char *encoding, int options)
15115 xmlParserInputBufferPtr input;
15116 xmlParserInputPtr stream;
15118 if (fd < 0)
15119 return (NULL);
15120 if (ctxt == NULL)
15121 return (NULL);
15122 xmlInitParser();
15124 xmlCtxtReset(ctxt);
15127 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15128 if (input == NULL)
15129 return (NULL);
15130 input->closecallback = NULL;
15131 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15132 if (stream == NULL) {
15133 xmlFreeParserInputBuffer(input);
15134 return (NULL);
15136 inputPush(ctxt, stream);
15137 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15141 * xmlCtxtReadIO:
15142 * @ctxt: an XML parser context
15143 * @ioread: an I/O read function
15144 * @ioclose: an I/O close function
15145 * @ioctx: an I/O handler
15146 * @URL: the base URL to use for the document
15147 * @encoding: the document encoding, or NULL
15148 * @options: a combination of xmlParserOption
15150 * parse an XML document from I/O functions and source and build a tree.
15151 * This reuses the existing @ctxt parser context
15153 * Returns the resulting document tree
15155 xmlDocPtr
15156 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15157 xmlInputCloseCallback ioclose, void *ioctx,
15158 const char *URL,
15159 const char *encoding, int options)
15161 xmlParserInputBufferPtr input;
15162 xmlParserInputPtr stream;
15164 if (ioread == NULL)
15165 return (NULL);
15166 if (ctxt == NULL)
15167 return (NULL);
15168 xmlInitParser();
15170 xmlCtxtReset(ctxt);
15172 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15173 XML_CHAR_ENCODING_NONE);
15174 if (input == NULL) {
15175 if (ioclose != NULL)
15176 ioclose(ioctx);
15177 return (NULL);
15179 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15180 if (stream == NULL) {
15181 xmlFreeParserInputBuffer(input);
15182 return (NULL);
15184 inputPush(ctxt, stream);
15185 return (xmlDoRead(ctxt, URL, encoding, options, 1));