libs/xml2/parser.c

   1 /*
   2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
   3  *            implemented on top of the SAX interfaces
   4  *
   5  * References:
   6  *   The XML specification:
   7  *     http://www.w3.org/TR/REC-xml
   8  *   Original 1.0 version:
   9  *     http://www.w3.org/TR/1998/REC-xml-19980210
  10  *   XML second edition working draft
  11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
  12  *
  13  * Okay this is a big file, the parser core is around 7000 lines, then it
  14  * is followed by the progressive parser top routines, then the various
  15  * high level APIs to call the parser and a few miscellaneous functions.
  16  * A number of helper functions and deprecated ones have been moved to
  17  * parserInternals.c to reduce this file size.
  18  * As much as possible the functions are associated with their relative
  19  * production in the XML specification. A few productions defining the
  20  * different ranges of character are actually implanted either in
  21  * parserInternals.h or parserInternals.c
  22  * The DOM tree build is realized from the default SAX callbacks in
  23  * the module SAX.c.
  24  * The routines doing the validation checks are in valid.c and called either
  25  * from the SAX callbacks or as standalone functions using a preparsed
  26  * document.
  27  *
  28  * See Copyright for the status of this software.
  29  *
  30  * daniel@veillard.com
  31  */
  32
  33 /* To avoid EBCDIC trouble when parsing on zOS */
  34 #if defined(__MVS__)
  35 #pragma convert("ISO8859-1")
  36 #endif
  37
  38 #define IN_LIBXML
  39 #include "libxml.h"
  40
  41 #if defined(_WIN32)
  42 #define XML_DIR_SEP '\\'
  43 #else
  44 #define XML_DIR_SEP '/'
  45 #endif
  46
  47 #include <stdlib.h>
  48 #include <limits.h>
  49 #include <string.h>
  50 #include <stdarg.h>
  51 #include <stddef.h>
  52 #include <ctype.h>
  53 #include <stdlib.h>
  54 #include <libxml/xmlmemory.h>
  55 #include <libxml/threads.h>
  56 #include <libxml/globals.h>
  57 #include <libxml/tree.h>
  58 #include <libxml/parser.h>
  59 #include <libxml/parserInternals.h>
  60 #include <libxml/valid.h>
  61 #include <libxml/entities.h>
  62 #include <libxml/xmlerror.h>
  63 #include <libxml/encoding.h>
  64 #include <libxml/xmlIO.h>
  65 #include <libxml/uri.h>
  66 #ifdef LIBXML_CATALOG_ENABLED
  67 #include <libxml/catalog.h>
  68 #endif
  69 #ifdef LIBXML_SCHEMAS_ENABLED
  70 #include <libxml/xmlschemastypes.h>
  71 #include <libxml/relaxng.h>
  72 #endif
  73
  74 #include "buf.h"
  75 #include "enc.h"
  76
  77 struct _xmlStartTag {
  78     const xmlChar *prefix;
  79     const xmlChar *URI;
  80     int line;
  81     int nsNr;
  82 };
  83
  84 static void
  85 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
  86
  87 static xmlParserCtxtPtr
  88 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
  89                           const xmlChar *base, xmlParserCtxtPtr pctx);
  90
  91 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
  92
  93 static int
  94 xmlParseElementStart(xmlParserCtxtPtr ctxt);
  95
  96 static void
  97 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
  98
  99 /************************************************************************
 100  *                                                                      *
 101  *      Arbitrary limits set in the parser. See XML_PARSE_HUGE          *
 102  *                                                                      *
 103  ************************************************************************/
 104
 105 #define XML_PARSER_BIG_ENTITY 1000
 106 #define XML_PARSER_LOT_ENTITY 5000
 107
 108 /*
 109  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
 110  *    replacement over the size in byte of the input indicates that you have
 111  *    and exponential behaviour. A value of 10 correspond to at least 3 entity
 112  *    replacement per byte of input.
 113  */
 114 #define XML_PARSER_NON_LINEAR 10
 115
 116 /*
 117  * xmlParserEntityCheck
 118  *
 119  * Function to check non-linear entity expansion behaviour
 120  * This is here to detect and stop exponential linear entity expansion
 121  * This is not a limitation of the parser but a safety
 122  * boundary feature. It can be disabled with the XML_PARSE_HUGE
 123  * parser option.
 124  */
 125 static int
 126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
 127                      xmlEntityPtr ent, size_t replacement)
 128 {
 129     size_t consumed = 0;
 130     int i;
 131
 132     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
 133         return (0);
 134     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
 135         return (1);
 136
 137     /*
 138      * This may look absurd but is needed to detect
 139      * entities problems
 140      */
 141     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
 142         (ent->content != NULL) && (ent->checked == 0) &&
 143         (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
 144         unsigned long oldnbent = ctxt->nbentities, diff;
 145         xmlChar *rep;
 146
 147         ent->checked = 1;
 148
 149         ++ctxt->depth;
 150         rep = xmlStringDecodeEntities(ctxt, ent->content,
 151                                   XML_SUBSTITUTE_REF, 0, 0, 0);
 152         --ctxt->depth;
 153         if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
 154             ent->content[0] = 0;
 155         }
 156
 157         diff = ctxt->nbentities - oldnbent + 1;
 158         if (diff > INT_MAX / 2)
 159             diff = INT_MAX / 2;
 160         ent->checked = diff * 2;
 161         if (rep != NULL) {
 162             if (xmlStrchr(rep, '<'))
 163                 ent->checked |= 1;
 164             xmlFree(rep);
 165             rep = NULL;
 166         }
 167     }
 168
 169     /*
 170      * Prevent entity exponential check, not just replacement while
 171      * parsing the DTD
 172      * The check is potentially costly so do that only once in a thousand
 173      */
 174     if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
 175         (ctxt->nbentities % 1024 == 0)) {
 176         for (i = 0;i < ctxt->inputNr;i++) {
 177             consumed += ctxt->inputTab[i]->consumed +
 178                        (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
 179         }
 180         if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
 181             xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
 182             ctxt->instate = XML_PARSER_EOF;
 183             return (1);
 184         }
 185         consumed = 0;
 186     }
 187
 188
 189
 190     if (replacement != 0) {
 191         if (replacement < XML_MAX_TEXT_LENGTH)
 192             return(0);
 193
 194         /*
 195          * If the volume of entity copy reaches 10 times the
 196          * amount of parsed data and over the large text threshold
 197          * then that's very likely to be an abuse.
 198          */
 199         if (ctxt->input != NULL) {
 200             consumed = ctxt->input->consumed +
 201                        (ctxt->input->cur - ctxt->input->base);
 202         }
 203         consumed += ctxt->sizeentities;
 204
 205         if (replacement < XML_PARSER_NON_LINEAR * consumed)
 206             return(0);
 207     } else if (size != 0) {
 208         /*
 209          * Do the check based on the replacement size of the entity
 210          */
 211         if (size < XML_PARSER_BIG_ENTITY)
 212             return(0);
 213
 214         /*
 215          * A limit on the amount of text data reasonably used
 216          */
 217         if (ctxt->input != NULL) {
 218             consumed = ctxt->input->consumed +
 219                 (ctxt->input->cur - ctxt->input->base);
 220         }
 221         consumed += ctxt->sizeentities;
 222
 223         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
 224             (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
 225             return (0);
 226     } else if (ent != NULL) {
 227         /*
 228          * use the number of parsed entities in the replacement
 229          */
 230         size = ent->checked / 2;
 231
 232         /*
 233          * The amount of data parsed counting entities size only once
 234          */
 235         if (ctxt->input != NULL) {
 236             consumed = ctxt->input->consumed +
 237                 (ctxt->input->cur - ctxt->input->base);
 238         }
 239         consumed += ctxt->sizeentities;
 240
 241         /*
 242          * Check the density of entities for the amount of data
 243          * knowing an entity reference will take at least 3 bytes
 244          */
 245         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
 246             return (0);
 247     } else {
 248         /*
 249          * strange we got no data for checking
 250          */
 251         if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
 252              (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
 253             (ctxt->nbentities <= 10000))
 254             return (0);
 255     }
 256     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
 257     return (1);
 258 }
 259
 260 /**
 261  * xmlParserMaxDepth:
 262  *
 263  * arbitrary depth limit for the XML documents that we allow to
 264  * process. This is not a limitation of the parser but a safety
 265  * boundary feature. It can be disabled with the XML_PARSE_HUGE
 266  * parser option.
 267  */
 268 unsigned int xmlParserMaxDepth = 256;
 269
 270
 271
 272 #define SAX2 1
 273 #define XML_PARSER_BIG_BUFFER_SIZE 300
 274 #define XML_PARSER_BUFFER_SIZE 100
 275 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
 276
 277 /**
 278  * XML_PARSER_CHUNK_SIZE
 279  *
 280  * When calling GROW that's the minimal amount of data
 281  * the parser expected to have received. It is not a hard
 282  * limit but an optimization when reading strings like Names
 283  * It is not strictly needed as long as inputs available characters
 284  * are followed by 0, which should be provided by the I/O level
 285  */
 286 #define XML_PARSER_CHUNK_SIZE 100
 287
 288 /*
 289  * List of XML prefixed PI allowed by W3C specs
 290  */
 291
 292 static const char* const xmlW3CPIs[] = {
 293     "xml-stylesheet",
 294     "xml-model",
 295     NULL
 296 };
 297
 298
 299 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
 300 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
 301                                               const xmlChar **str);
 302
 303 static xmlParserErrors
 304 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
 305                       xmlSAXHandlerPtr sax,
 306                       void *user_data, int depth, const xmlChar *URL,
 307                       const xmlChar *ID, xmlNodePtr *list);
 308
 309 static int
 310 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
 311                           const char *encoding);
 312 #ifdef LIBXML_LEGACY_ENABLED
 313 static void
 314 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
 315                       xmlNodePtr lastNode);
 316 #endif /* LIBXML_LEGACY_ENABLED */
 317
 318 static xmlParserErrors
 319 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
 320                       const xmlChar *string, void *user_data, xmlNodePtr *lst);
 321
 322 static int
 323 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
 324
 325 /************************************************************************
 326  *                                                                      *
 327  *              Some factorized error routines                          *
 328  *                                                                      *
 329  ************************************************************************/
 330
 331 /**
 332  * xmlErrAttributeDup:
 333  * @ctxt:  an XML parser context
 334  * @prefix:  the attribute prefix
 335  * @localname:  the attribute localname
 336  *
 337  * Handle a redefinition of attribute error
 338  */
 339 static void
 340 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
 341                    const xmlChar * localname)
 342 {
 343     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 344         (ctxt->instate == XML_PARSER_EOF))
 345         return;
 346     if (ctxt != NULL)
 347         ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
 348
 349     if (prefix == NULL)
 350         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
 351                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
 352                         (const char *) localname, NULL, NULL, 0, 0,
 353                         "Attribute %s redefined\n", localname);
 354     else
 355         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
 356                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
 357                         (const char *) prefix, (const char *) localname,
 358                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
 359                         localname);
 360     if (ctxt != NULL) {
 361         ctxt->wellFormed = 0;
 362         if (ctxt->recovery == 0)
 363             ctxt->disableSAX = 1;
 364     }
 365 }
 366
 367 /**
 368  * xmlFatalErr:
 369  * @ctxt:  an XML parser context
 370  * @error:  the error number
 371  * @extra:  extra information string
 372  *
 373  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 374  */
 375 static void
 376 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
 377 {
 378     const char *errmsg;
 379
 380     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 381         (ctxt->instate == XML_PARSER_EOF))
 382         return;
 383     switch (error) {
 384         case XML_ERR_INVALID_HEX_CHARREF:
 385             errmsg = "CharRef: invalid hexadecimal value";
 386             break;
 387         case XML_ERR_INVALID_DEC_CHARREF:
 388             errmsg = "CharRef: invalid decimal value";
 389             break;
 390         case XML_ERR_INVALID_CHARREF:
 391             errmsg = "CharRef: invalid value";
 392             break;
 393         case XML_ERR_INTERNAL_ERROR:
 394             errmsg = "internal error";
 395             break;
 396         case XML_ERR_PEREF_AT_EOF:
 397             errmsg = "PEReference at end of document";
 398             break;
 399         case XML_ERR_PEREF_IN_PROLOG:
 400             errmsg = "PEReference in prolog";
 401             break;
 402         case XML_ERR_PEREF_IN_EPILOG:
 403             errmsg = "PEReference in epilog";
 404             break;
 405         case XML_ERR_PEREF_NO_NAME:
 406             errmsg = "PEReference: no name";
 407             break;
 408         case XML_ERR_PEREF_SEMICOL_MISSING:
 409             errmsg = "PEReference: expecting ';'";
 410             break;
 411         case XML_ERR_ENTITY_LOOP:
 412             errmsg = "Detected an entity reference loop";
 413             break;
 414         case XML_ERR_ENTITY_NOT_STARTED:
 415             errmsg = "EntityValue: \" or ' expected";
 416             break;
 417         case XML_ERR_ENTITY_PE_INTERNAL:
 418             errmsg = "PEReferences forbidden in internal subset";
 419             break;
 420         case XML_ERR_ENTITY_NOT_FINISHED:
 421             errmsg = "EntityValue: \" or ' expected";
 422             break;
 423         case XML_ERR_ATTRIBUTE_NOT_STARTED:
 424             errmsg = "AttValue: \" or ' expected";
 425             break;
 426         case XML_ERR_LT_IN_ATTRIBUTE:
 427             errmsg = "Unescaped '<' not allowed in attributes values";
 428             break;
 429         case XML_ERR_LITERAL_NOT_STARTED:
 430             errmsg = "SystemLiteral \" or ' expected";
 431             break;
 432         case XML_ERR_LITERAL_NOT_FINISHED:
 433             errmsg = "Unfinished System or Public ID \" or ' expected";
 434             break;
 435         case XML_ERR_MISPLACED_CDATA_END:
 436             errmsg = "Sequence ']]>' not allowed in content";
 437             break;
 438         case XML_ERR_URI_REQUIRED:
 439             errmsg = "SYSTEM or PUBLIC, the URI is missing";
 440             break;
 441         case XML_ERR_PUBID_REQUIRED:
 442             errmsg = "PUBLIC, the Public Identifier is missing";
 443             break;
 444         case XML_ERR_HYPHEN_IN_COMMENT:
 445             errmsg = "Comment must not contain '--' (double-hyphen)";
 446             break;
 447         case XML_ERR_PI_NOT_STARTED:
 448             errmsg = "xmlParsePI : no target name";
 449             break;
 450         case XML_ERR_RESERVED_XML_NAME:
 451             errmsg = "Invalid PI name";
 452             break;
 453         case XML_ERR_NOTATION_NOT_STARTED:
 454             errmsg = "NOTATION: Name expected here";
 455             break;
 456         case XML_ERR_NOTATION_NOT_FINISHED:
 457             errmsg = "'>' required to close NOTATION declaration";
 458             break;
 459         case XML_ERR_VALUE_REQUIRED:
 460             errmsg = "Entity value required";
 461             break;
 462         case XML_ERR_URI_FRAGMENT:
 463             errmsg = "Fragment not allowed";
 464             break;
 465         case XML_ERR_ATTLIST_NOT_STARTED:
 466             errmsg = "'(' required to start ATTLIST enumeration";
 467             break;
 468         case XML_ERR_NMTOKEN_REQUIRED:
 469             errmsg = "NmToken expected in ATTLIST enumeration";
 470             break;
 471         case XML_ERR_ATTLIST_NOT_FINISHED:
 472             errmsg = "')' required to finish ATTLIST enumeration";
 473             break;
 474         case XML_ERR_MIXED_NOT_STARTED:
 475             errmsg = "MixedContentDecl : '|' or ')*' expected";
 476             break;
 477         case XML_ERR_PCDATA_REQUIRED:
 478             errmsg = "MixedContentDecl : '#PCDATA' expected";
 479             break;
 480         case XML_ERR_ELEMCONTENT_NOT_STARTED:
 481             errmsg = "ContentDecl : Name or '(' expected";
 482             break;
 483         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
 484             errmsg = "ContentDecl : ',' '|' or ')' expected";
 485             break;
 486         case XML_ERR_PEREF_IN_INT_SUBSET:
 487             errmsg =
 488                 "PEReference: forbidden within markup decl in internal subset";
 489             break;
 490         case XML_ERR_GT_REQUIRED:
 491             errmsg = "expected '>'";
 492             break;
 493         case XML_ERR_CONDSEC_INVALID:
 494             errmsg = "XML conditional section '[' expected";
 495             break;
 496         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
 497             errmsg = "Content error in the external subset";
 498             break;
 499         case XML_ERR_CONDSEC_INVALID_KEYWORD:
 500             errmsg =
 501                 "conditional section INCLUDE or IGNORE keyword expected";
 502             break;
 503         case XML_ERR_CONDSEC_NOT_FINISHED:
 504             errmsg = "XML conditional section not closed";
 505             break;
 506         case XML_ERR_XMLDECL_NOT_STARTED:
 507             errmsg = "Text declaration '<?xml' required";
 508             break;
 509         case XML_ERR_XMLDECL_NOT_FINISHED:
 510             errmsg = "parsing XML declaration: '?>' expected";
 511             break;
 512         case XML_ERR_EXT_ENTITY_STANDALONE:
 513             errmsg = "external parsed entities cannot be standalone";
 514             break;
 515         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
 516             errmsg = "EntityRef: expecting ';'";
 517             break;
 518         case XML_ERR_DOCTYPE_NOT_FINISHED:
 519             errmsg = "DOCTYPE improperly terminated";
 520             break;
 521         case XML_ERR_LTSLASH_REQUIRED:
 522             errmsg = "EndTag: '</' not found";
 523             break;
 524         case XML_ERR_EQUAL_REQUIRED:
 525             errmsg = "expected '='";
 526             break;
 527         case XML_ERR_STRING_NOT_CLOSED:
 528             errmsg = "String not closed expecting \" or '";
 529             break;
 530         case XML_ERR_STRING_NOT_STARTED:
 531             errmsg = "String not started expecting ' or \"";
 532             break;
 533         case XML_ERR_ENCODING_NAME:
 534             errmsg = "Invalid XML encoding name";
 535             break;
 536         case XML_ERR_STANDALONE_VALUE:
 537             errmsg = "standalone accepts only 'yes' or 'no'";
 538             break;
 539         case XML_ERR_DOCUMENT_EMPTY:
 540             errmsg = "Document is empty";
 541             break;
 542         case XML_ERR_DOCUMENT_END:
 543             errmsg = "Extra content at the end of the document";
 544             break;
 545         case XML_ERR_NOT_WELL_BALANCED:
 546             errmsg = "chunk is not well balanced";
 547             break;
 548         case XML_ERR_EXTRA_CONTENT:
 549             errmsg = "extra content at the end of well balanced chunk";
 550             break;
 551         case XML_ERR_VERSION_MISSING:
 552             errmsg = "Malformed declaration expecting version";
 553             break;
 554         case XML_ERR_NAME_TOO_LONG:
 555             errmsg = "Name too long use XML_PARSE_HUGE option";
 556             break;
 557 #if 0
 558         case:
 559             errmsg = "";
 560             break;
 561 #endif
 562         default:
 563             errmsg = "Unregistered error message";
 564     }
 565     if (ctxt != NULL)
 566         ctxt->errNo = error;
 567     if (info == NULL) {
 568         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
 569                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
 570                         errmsg);
 571     } else {
 572         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
 573                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
 574                         errmsg, info);
 575     }
 576     if (ctxt != NULL) {
 577         ctxt->wellFormed = 0;
 578         if (ctxt->recovery == 0)
 579             ctxt->disableSAX = 1;
 580     }
 581 }
 582
 583 /**
 584  * xmlFatalErrMsg:
 585  * @ctxt:  an XML parser context
 586  * @error:  the error number
 587  * @msg:  the error message
 588  *
 589  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 590  */
 591 static void LIBXML_ATTR_FORMAT(3,0)
 592 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 593                const char *msg)
 594 {
 595     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 596         (ctxt->instate == XML_PARSER_EOF))
 597         return;
 598     if (ctxt != NULL)
 599         ctxt->errNo = error;
 600     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
 601                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
 602     if (ctxt != NULL) {
 603         ctxt->wellFormed = 0;
 604         if (ctxt->recovery == 0)
 605             ctxt->disableSAX = 1;
 606     }
 607 }
 608
 609 /**
 610  * xmlWarningMsg:
 611  * @ctxt:  an XML parser context
 612  * @error:  the error number
 613  * @msg:  the error message
 614  * @str1:  extra data
 615  * @str2:  extra data
 616  *
 617  * Handle a warning.
 618  */
 619 static void LIBXML_ATTR_FORMAT(3,0)
 620 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 621               const char *msg, const xmlChar *str1, const xmlChar *str2)
 622 {
 623     xmlStructuredErrorFunc schannel = NULL;
 624
 625     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 626         (ctxt->instate == XML_PARSER_EOF))
 627         return;
 628     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
 629         (ctxt->sax->initialized == XML_SAX2_MAGIC))
 630         schannel = ctxt->sax->serror;
 631     if (ctxt != NULL) {
 632         __xmlRaiseError(schannel,
 633                     (ctxt->sax) ? ctxt->sax->warning : NULL,
 634                     ctxt->userData,
 635                     ctxt, NULL, XML_FROM_PARSER, error,
 636                     XML_ERR_WARNING, NULL, 0,
 637                     (const char *) str1, (const char *) str2, NULL, 0, 0,
 638                     msg, (const char *) str1, (const char *) str2);
 639     } else {
 640         __xmlRaiseError(schannel, NULL, NULL,
 641                     ctxt, NULL, XML_FROM_PARSER, error,
 642                     XML_ERR_WARNING, NULL, 0,
 643                     (const char *) str1, (const char *) str2, NULL, 0, 0,
 644                     msg, (const char *) str1, (const char *) str2);
 645     }
 646 }
 647
 648 /**
 649  * xmlValidityError:
 650  * @ctxt:  an XML parser context
 651  * @error:  the error number
 652  * @msg:  the error message
 653  * @str1:  extra data
 654  *
 655  * Handle a validity error.
 656  */
 657 static void LIBXML_ATTR_FORMAT(3,0)
 658 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 659               const char *msg, const xmlChar *str1, const xmlChar *str2)
 660 {
 661     xmlStructuredErrorFunc schannel = NULL;
 662
 663     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 664         (ctxt->instate == XML_PARSER_EOF))
 665         return;
 666     if (ctxt != NULL) {
 667         ctxt->errNo = error;
 668         if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
 669             schannel = ctxt->sax->serror;
 670     }
 671     if (ctxt != NULL) {
 672         __xmlRaiseError(schannel,
 673                     ctxt->vctxt.error, ctxt->vctxt.userData,
 674                     ctxt, NULL, XML_FROM_DTD, error,
 675                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
 676                     (const char *) str2, NULL, 0, 0,
 677                     msg, (const char *) str1, (const char *) str2);
 678         ctxt->valid = 0;
 679     } else {
 680         __xmlRaiseError(schannel, NULL, NULL,
 681                     ctxt, NULL, XML_FROM_DTD, error,
 682                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
 683                     (const char *) str2, NULL, 0, 0,
 684                     msg, (const char *) str1, (const char *) str2);
 685     }
 686 }
 687
 688 /**
 689  * xmlFatalErrMsgInt:
 690  * @ctxt:  an XML parser context
 691  * @error:  the error number
 692  * @msg:  the error message
 693  * @val:  an integer value
 694  *
 695  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 696  */
 697 static void LIBXML_ATTR_FORMAT(3,0)
 698 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 699                   const char *msg, int val)
 700 {
 701     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 702         (ctxt->instate == XML_PARSER_EOF))
 703         return;
 704     if (ctxt != NULL)
 705         ctxt->errNo = error;
 706     __xmlRaiseError(NULL, NULL, NULL,
 707                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
 708                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
 709     if (ctxt != NULL) {
 710         ctxt->wellFormed = 0;
 711         if (ctxt->recovery == 0)
 712             ctxt->disableSAX = 1;
 713     }
 714 }
 715
 716 /**
 717  * xmlFatalErrMsgStrIntStr:
 718  * @ctxt:  an XML parser context
 719  * @error:  the error number
 720  * @msg:  the error message
 721  * @str1:  an string info
 722  * @val:  an integer value
 723  * @str2:  an string info
 724  *
 725  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 726  */
 727 static void LIBXML_ATTR_FORMAT(3,0)
 728 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 729                   const char *msg, const xmlChar *str1, int val,
 730                   const xmlChar *str2)
 731 {
 732     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 733         (ctxt->instate == XML_PARSER_EOF))
 734         return;
 735     if (ctxt != NULL)
 736         ctxt->errNo = error;
 737     __xmlRaiseError(NULL, NULL, NULL,
 738                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
 739                     NULL, 0, (const char *) str1, (const char *) str2,
 740                     NULL, val, 0, msg, str1, val, str2);
 741     if (ctxt != NULL) {
 742         ctxt->wellFormed = 0;
 743         if (ctxt->recovery == 0)
 744             ctxt->disableSAX = 1;
 745     }
 746 }
 747
 748 /**
 749  * xmlFatalErrMsgStr:
 750  * @ctxt:  an XML parser context
 751  * @error:  the error number
 752  * @msg:  the error message
 753  * @val:  a string value
 754  *
 755  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 756  */
 757 static void LIBXML_ATTR_FORMAT(3,0)
 758 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 759                   const char *msg, const xmlChar * val)
 760 {
 761     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 762         (ctxt->instate == XML_PARSER_EOF))
 763         return;
 764     if (ctxt != NULL)
 765         ctxt->errNo = error;
 766     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
 767                     XML_FROM_PARSER, error, XML_ERR_FATAL,
 768                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
 769                     val);
 770     if (ctxt != NULL) {
 771         ctxt->wellFormed = 0;
 772         if (ctxt->recovery == 0)
 773             ctxt->disableSAX = 1;
 774     }
 775 }
 776
 777 /**
 778  * xmlErrMsgStr:
 779  * @ctxt:  an XML parser context
 780  * @error:  the error number
 781  * @msg:  the error message
 782  * @val:  a string value
 783  *
 784  * Handle a non fatal parser error
 785  */
 786 static void LIBXML_ATTR_FORMAT(3,0)
 787 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 788                   const char *msg, const xmlChar * val)
 789 {
 790     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 791         (ctxt->instate == XML_PARSER_EOF))
 792         return;
 793     if (ctxt != NULL)
 794         ctxt->errNo = error;
 795     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
 796                     XML_FROM_PARSER, error, XML_ERR_ERROR,
 797                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
 798                     val);
 799 }
 800
 801 /**
 802  * xmlNsErr:
 803  * @ctxt:  an XML parser context
 804  * @error:  the error number
 805  * @msg:  the message
 806  * @info1:  extra information string
 807  * @info2:  extra information string
 808  *
 809  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 810  */
 811 static void LIBXML_ATTR_FORMAT(3,0)
 812 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 813          const char *msg,
 814          const xmlChar * info1, const xmlChar * info2,
 815          const xmlChar * info3)
 816 {
 817     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 818         (ctxt->instate == XML_PARSER_EOF))
 819         return;
 820     if (ctxt != NULL)
 821         ctxt->errNo = error;
 822     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
 823                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
 824                     (const char *) info2, (const char *) info3, 0, 0, msg,
 825                     info1, info2, info3);
 826     if (ctxt != NULL)
 827         ctxt->nsWellFormed = 0;
 828 }
 829
 830 /**
 831  * xmlNsWarn
 832  * @ctxt:  an XML parser context
 833  * @error:  the error number
 834  * @msg:  the message
 835  * @info1:  extra information string
 836  * @info2:  extra information string
 837  *
 838  * Handle a namespace warning error
 839  */
 840 static void LIBXML_ATTR_FORMAT(3,0)
 841 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 842          const char *msg,
 843          const xmlChar * info1, const xmlChar * info2,
 844          const xmlChar * info3)
 845 {
 846     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 847         (ctxt->instate == XML_PARSER_EOF))
 848         return;
 849     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
 850                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
 851                     (const char *) info2, (const char *) info3, 0, 0, msg,
 852                     info1, info2, info3);
 853 }
 854
 855 /************************************************************************
 856  *                                                                      *
 857  *              Library wide options                                    *
 858  *                                                                      *
 859  ************************************************************************/
 860
 861 /**
 862   * xmlHasFeature:
 863   * @feature: the feature to be examined
 864   *
 865   * Examines if the library has been compiled with a given feature.
 866   *
 867   * Returns a non-zero value if the feature exist, otherwise zero.
 868   * Returns zero (0) if the feature does not exist or an unknown
 869   * unknown feature is requested, non-zero otherwise.
 870   */
 871 int
 872 xmlHasFeature(xmlFeature feature)
 873 {
 874     switch (feature) {
 875         case XML_WITH_THREAD:
 876 #ifdef LIBXML_THREAD_ENABLED
 877             return(1);
 878 #else
 879             return(0);
 880 #endif
 881         case XML_WITH_TREE:
 882 #ifdef LIBXML_TREE_ENABLED
 883             return(1);
 884 #else
 885             return(0);
 886 #endif
 887         case XML_WITH_OUTPUT:
 888 #ifdef LIBXML_OUTPUT_ENABLED
 889             return(1);
 890 #else
 891             return(0);
 892 #endif
 893         case XML_WITH_PUSH:
 894 #ifdef LIBXML_PUSH_ENABLED
 895             return(1);
 896 #else
 897             return(0);
 898 #endif
 899         case XML_WITH_READER:
 900 #ifdef LIBXML_READER_ENABLED
 901             return(1);
 902 #else
 903             return(0);
 904 #endif
 905         case XML_WITH_PATTERN:
 906 #ifdef LIBXML_PATTERN_ENABLED
 907             return(1);
 908 #else
 909             return(0);
 910 #endif
 911         case XML_WITH_WRITER:
 912 #ifdef LIBXML_WRITER_ENABLED
 913             return(1);
 914 #else
 915             return(0);
 916 #endif
 917         case XML_WITH_SAX1:
 918 #ifdef LIBXML_SAX1_ENABLED
 919             return(1);
 920 #else
 921             return(0);
 922 #endif
 923         case XML_WITH_FTP:
 924 #ifdef LIBXML_FTP_ENABLED
 925             return(1);
 926 #else
 927             return(0);
 928 #endif
 929         case XML_WITH_HTTP:
 930 #ifdef LIBXML_HTTP_ENABLED
 931             return(1);
 932 #else
 933             return(0);
 934 #endif
 935         case XML_WITH_VALID:
 936 #ifdef LIBXML_VALID_ENABLED
 937             return(1);
 938 #else
 939             return(0);
 940 #endif
 941         case XML_WITH_HTML:
 942 #ifdef LIBXML_HTML_ENABLED
 943             return(1);
 944 #else
 945             return(0);
 946 #endif
 947         case XML_WITH_LEGACY:
 948 #ifdef LIBXML_LEGACY_ENABLED
 949             return(1);
 950 #else
 951             return(0);
 952 #endif
 953         case XML_WITH_C14N:
 954 #ifdef LIBXML_C14N_ENABLED
 955             return(1);
 956 #else
 957             return(0);
 958 #endif
 959         case XML_WITH_CATALOG:
 960 #ifdef LIBXML_CATALOG_ENABLED
 961             return(1);
 962 #else
 963             return(0);
 964 #endif
 965         case XML_WITH_XPATH:
 966 #ifdef LIBXML_XPATH_ENABLED
 967             return(1);
 968 #else
 969             return(0);
 970 #endif
 971         case XML_WITH_XPTR:
 972 #ifdef LIBXML_XPTR_ENABLED
 973             return(1);
 974 #else
 975             return(0);
 976 #endif
 977         case XML_WITH_XINCLUDE:
 978 #ifdef LIBXML_XINCLUDE_ENABLED
 979             return(1);
 980 #else
 981             return(0);
 982 #endif
 983         case XML_WITH_ICONV:
 984 #ifdef LIBXML_ICONV_ENABLED
 985             return(1);
 986 #else
 987             return(0);
 988 #endif
 989         case XML_WITH_ISO8859X:
 990 #ifdef LIBXML_ISO8859X_ENABLED
 991             return(1);
 992 #else
 993             return(0);
 994 #endif
 995         case XML_WITH_UNICODE:
 996 #ifdef LIBXML_UNICODE_ENABLED
 997             return(1);
 998 #else
 999             return(0);
1000 #endif
1001         case XML_WITH_REGEXP:
1002 #ifdef LIBXML_REGEXP_ENABLED
1003             return(1);
1004 #else
1005             return(0);
1006 #endif
1007         case XML_WITH_AUTOMATA:
1008 #ifdef LIBXML_AUTOMATA_ENABLED
1009             return(1);
1010 #else
1011             return(0);
1012 #endif
1013         case XML_WITH_EXPR:
1014 #ifdef LIBXML_EXPR_ENABLED
1015             return(1);
1016 #else
1017             return(0);
1018 #endif
1019         case XML_WITH_SCHEMAS:
1020 #ifdef LIBXML_SCHEMAS_ENABLED
1021             return(1);
1022 #else
1023             return(0);
1024 #endif
1025         case XML_WITH_SCHEMATRON:
1026 #ifdef LIBXML_SCHEMATRON_ENABLED
1027             return(1);
1028 #else
1029             return(0);
1030 #endif
1031         case XML_WITH_MODULES:
1032 #ifdef LIBXML_MODULES_ENABLED
1033             return(1);
1034 #else
1035             return(0);
1036 #endif
1037         case XML_WITH_DEBUG:
1038 #ifdef LIBXML_DEBUG_ENABLED
1039             return(1);
1040 #else
1041             return(0);
1042 #endif
1043         case XML_WITH_DEBUG_MEM:
1044 #ifdef DEBUG_MEMORY_LOCATION
1045             return(1);
1046 #else
1047             return(0);
1048 #endif
1049         case XML_WITH_DEBUG_RUN:
1050 #ifdef LIBXML_DEBUG_RUNTIME
1051             return(1);
1052 #else
1053             return(0);
1054 #endif
1055         case XML_WITH_ZLIB:
1056 #ifdef LIBXML_ZLIB_ENABLED
1057             return(1);
1058 #else
1059             return(0);
1060 #endif
1061         case XML_WITH_LZMA:
1062 #ifdef LIBXML_LZMA_ENABLED
1063             return(1);
1064 #else
1065             return(0);
1066 #endif
1067         case XML_WITH_ICU:
1068 #ifdef LIBXML_ICU_ENABLED
1069             return(1);
1070 #else
1071             return(0);
1072 #endif
1073         default:
1074             break;
1075      }
1076      return(0);
1077 }
1078
1079 /************************************************************************
1080  *                                                                      *
1081  *              SAX2 defaulted attributes handling                      *
1082  *                                                                      *
1083  ************************************************************************/
1084
1085 /**
1086  * xmlDetectSAX2:
1087  * @ctxt:  an XML parser context
1088  *
1089  * Do the SAX2 detection and specific initialization
1090  */
1091 static void
1092 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1093     xmlSAXHandlerPtr sax;
1094
1095     /* Avoid unused variable warning if features are disabled. */
1096     (void) sax;
1097
1098     if (ctxt == NULL) return;
1099     sax = ctxt->sax;
1100 #ifdef LIBXML_SAX1_ENABLED
1101     if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1102         ((sax->startElementNs != NULL) ||
1103          (sax->endElementNs != NULL) ||
1104          ((sax->startElement == NULL) && (sax->endElement == NULL))))
1105         ctxt->sax2 = 1;
1106 #else
1107     ctxt->sax2 = 1;
1108 #endif /* LIBXML_SAX1_ENABLED */
1109
1110     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1111     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1112     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1113     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1114                 (ctxt->str_xml_ns == NULL)) {
1115         xmlErrMemory(ctxt, NULL);
1116     }
1117 }
1118
1119 typedef struct _xmlDefAttrs xmlDefAttrs;
1120 typedef xmlDefAttrs *xmlDefAttrsPtr;
1121 struct _xmlDefAttrs {
1122     int nbAttrs;        /* number of defaulted attributes on that element */
1123     int maxAttrs;       /* the size of the array */
1124 #if __STDC_VERSION__ >= 199901L
1125     /* Using a C99 flexible array member avoids UBSan errors. */
1126     const xmlChar *values[]; /* array of localname/prefix/values/external */
1127 #else
1128     const xmlChar *values[5];
1129 #endif
1130 };
1131
1132 /**
1133  * xmlAttrNormalizeSpace:
1134  * @src: the source string
1135  * @dst: the target string
1136  *
1137  * Normalize the space in non CDATA attribute values:
1138  * If the attribute type is not CDATA, then the XML processor MUST further
1139  * process the normalized attribute value by discarding any leading and
1140  * trailing space (#x20) characters, and by replacing sequences of space
1141  * (#x20) characters by a single space (#x20) character.
1142  * Note that the size of dst need to be at least src, and if one doesn't need
1143  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1144  * passing src as dst is just fine.
1145  *
1146  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1147  *         is needed.
1148  */
1149 static xmlChar *
1150 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1151 {
1152     if ((src == NULL) || (dst == NULL))
1153         return(NULL);
1154
1155     while (*src == 0x20) src++;
1156     while (*src != 0) {
1157         if (*src == 0x20) {
1158             while (*src == 0x20) src++;
1159             if (*src != 0)
1160                 *dst++ = 0x20;
1161         } else {
1162             *dst++ = *src++;
1163         }
1164     }
1165     *dst = 0;
1166     if (dst == src)
1167        return(NULL);
1168     return(dst);
1169 }
1170
1171 /**
1172  * xmlAttrNormalizeSpace2:
1173  * @src: the source string
1174  *
1175  * Normalize the space in non CDATA attribute values, a slightly more complex
1176  * front end to avoid allocation problems when running on attribute values
1177  * coming from the input.
1178  *
1179  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1180  *         is needed.
1181  */
1182 static const xmlChar *
1183 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1184 {
1185     int i;
1186     int remove_head = 0;
1187     int need_realloc = 0;
1188     const xmlChar *cur;
1189
1190     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1191         return(NULL);
1192     i = *len;
1193     if (i <= 0)
1194         return(NULL);
1195
1196     cur = src;
1197     while (*cur == 0x20) {
1198         cur++;
1199         remove_head++;
1200     }
1201     while (*cur != 0) {
1202         if (*cur == 0x20) {
1203             cur++;
1204             if ((*cur == 0x20) || (*cur == 0)) {
1205                 need_realloc = 1;
1206                 break;
1207             }
1208         } else
1209             cur++;
1210     }
1211     if (need_realloc) {
1212         xmlChar *ret;
1213
1214         ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1215         if (ret == NULL) {
1216             xmlErrMemory(ctxt, NULL);
1217             return(NULL);
1218         }
1219         xmlAttrNormalizeSpace(ret, ret);
1220         *len = (int) strlen((const char *)ret);
1221         return(ret);
1222     } else if (remove_head) {
1223         *len -= remove_head;
1224         memmove(src, src + remove_head, 1 + *len);
1225         return(src);
1226     }
1227     return(NULL);
1228 }
1229
1230 /**
1231  * xmlAddDefAttrs:
1232  * @ctxt:  an XML parser context
1233  * @fullname:  the element fullname
1234  * @fullattr:  the attribute fullname
1235  * @value:  the attribute value
1236  *
1237  * Add a defaulted attribute for an element
1238  */
1239 static void
1240 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1241                const xmlChar *fullname,
1242                const xmlChar *fullattr,
1243                const xmlChar *value) {
1244     xmlDefAttrsPtr defaults;
1245     int len;
1246     const xmlChar *name;
1247     const xmlChar *prefix;
1248
1249     /*
1250      * Allows to detect attribute redefinitions
1251      */
1252     if (ctxt->attsSpecial != NULL) {
1253         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1254             return;
1255     }
1256
1257     if (ctxt->attsDefault == NULL) {
1258         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1259         if (ctxt->attsDefault == NULL)
1260             goto mem_error;
1261     }
1262
1263     /*
1264      * split the element name into prefix:localname , the string found
1265      * are within the DTD and then not associated to namespace names.
1266      */
1267     name = xmlSplitQName3(fullname, &len);
1268     if (name == NULL) {
1269         name = xmlDictLookup(ctxt->dict, fullname, -1);
1270         prefix = NULL;
1271     } else {
1272         name = xmlDictLookup(ctxt->dict, name, -1);
1273         prefix = xmlDictLookup(ctxt->dict, fullname, len);
1274     }
1275
1276     /*
1277      * make sure there is some storage
1278      */
1279     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1280     if (defaults == NULL) {
1281         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1282                            (4 * 5) * sizeof(const xmlChar *));
1283         if (defaults == NULL)
1284             goto mem_error;
1285         defaults->nbAttrs = 0;
1286         defaults->maxAttrs = 4;
1287         if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1288                                 defaults, NULL) < 0) {
1289             xmlFree(defaults);
1290             goto mem_error;
1291         }
1292     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1293         xmlDefAttrsPtr temp;
1294
1295         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1296                        (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1297         if (temp == NULL)
1298             goto mem_error;
1299         defaults = temp;
1300         defaults->maxAttrs *= 2;
1301         if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1302                                 defaults, NULL) < 0) {
1303             xmlFree(defaults);
1304             goto mem_error;
1305         }
1306     }
1307
1308     /*
1309      * Split the element name into prefix:localname , the string found
1310      * are within the DTD and hen not associated to namespace names.
1311      */
1312     name = xmlSplitQName3(fullattr, &len);
1313     if (name == NULL) {
1314         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1315         prefix = NULL;
1316     } else {
1317         name = xmlDictLookup(ctxt->dict, name, -1);
1318         prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1319     }
1320
1321     defaults->values[5 * defaults->nbAttrs] = name;
1322     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1323     /* intern the string and precompute the end */
1324     len = xmlStrlen(value);
1325     value = xmlDictLookup(ctxt->dict, value, len);
1326     defaults->values[5 * defaults->nbAttrs + 2] = value;
1327     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1328     if (ctxt->external)
1329         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1330     else
1331         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1332     defaults->nbAttrs++;
1333
1334     return;
1335
1336 mem_error:
1337     xmlErrMemory(ctxt, NULL);
1338     return;
1339 }
1340
1341 /**
1342  * xmlAddSpecialAttr:
1343  * @ctxt:  an XML parser context
1344  * @fullname:  the element fullname
1345  * @fullattr:  the attribute fullname
1346  * @type:  the attribute type
1347  *
1348  * Register this attribute type
1349  */
1350 static void
1351 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1352                   const xmlChar *fullname,
1353                   const xmlChar *fullattr,
1354                   int type)
1355 {
1356     if (ctxt->attsSpecial == NULL) {
1357         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1358         if (ctxt->attsSpecial == NULL)
1359             goto mem_error;
1360     }
1361
1362     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1363         return;
1364
1365     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1366                      (void *) (ptrdiff_t) type);
1367     return;
1368
1369 mem_error:
1370     xmlErrMemory(ctxt, NULL);
1371     return;
1372 }
1373
1374 /**
1375  * xmlCleanSpecialAttrCallback:
1376  *
1377  * Removes CDATA attributes from the special attribute table
1378  */
1379 static void
1380 xmlCleanSpecialAttrCallback(void *payload, void *data,
1381                             const xmlChar *fullname, const xmlChar *fullattr,
1382                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1383     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1384
1385     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1386         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1387     }
1388 }
1389
1390 /**
1391  * xmlCleanSpecialAttr:
1392  * @ctxt:  an XML parser context
1393  *
1394  * Trim the list of attributes defined to remove all those of type
1395  * CDATA as they are not special. This call should be done when finishing
1396  * to parse the DTD and before starting to parse the document root.
1397  */
1398 static void
1399 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1400 {
1401     if (ctxt->attsSpecial == NULL)
1402         return;
1403
1404     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1405
1406     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1407         xmlHashFree(ctxt->attsSpecial, NULL);
1408         ctxt->attsSpecial = NULL;
1409     }
1410     return;
1411 }
1412
1413 /**
1414  * xmlCheckLanguageID:
1415  * @lang:  pointer to the string value
1416  *
1417  * Checks that the value conforms to the LanguageID production:
1418  *
1419  * NOTE: this is somewhat deprecated, those productions were removed from
1420  *       the XML Second edition.
1421  *
1422  * [33] LanguageID ::= Langcode ('-' Subcode)*
1423  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1424  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1425  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1426  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1427  * [38] Subcode ::= ([a-z] | [A-Z])+
1428  *
1429  * The current REC reference the successors of RFC 1766, currently 5646
1430  *
1431  * http://www.rfc-editor.org/rfc/rfc5646.txt
1432  * langtag       = language
1433  *                 ["-" script]
1434  *                 ["-" region]
1435  *                 *("-" variant)
1436  *                 *("-" extension)
1437  *                 ["-" privateuse]
1438  * language      = 2*3ALPHA            ; shortest ISO 639 code
1439  *                 ["-" extlang]       ; sometimes followed by
1440  *                                     ; extended language subtags
1441  *               / 4ALPHA              ; or reserved for future use
1442  *               / 5*8ALPHA            ; or registered language subtag
1443  *
1444  * extlang       = 3ALPHA              ; selected ISO 639 codes
1445  *                 *2("-" 3ALPHA)      ; permanently reserved
1446  *
1447  * script        = 4ALPHA              ; ISO 15924 code
1448  *
1449  * region        = 2ALPHA              ; ISO 3166-1 code
1450  *               / 3DIGIT              ; UN M.49 code
1451  *
1452  * variant       = 5*8alphanum         ; registered variants
1453  *               / (DIGIT 3alphanum)
1454  *
1455  * extension     = singleton 1*("-" (2*8alphanum))
1456  *
1457  *                                     ; Single alphanumerics
1458  *                                     ; "x" reserved for private use
1459  * singleton     = DIGIT               ; 0 - 9
1460  *               / %x41-57             ; A - W
1461  *               / %x59-5A             ; Y - Z
1462  *               / %x61-77             ; a - w
1463  *               / %x79-7A             ; y - z
1464  *
1465  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1466  * The parser below doesn't try to cope with extension or privateuse
1467  * that could be added but that's not interoperable anyway
1468  *
1469  * Returns 1 if correct 0 otherwise
1470  **/
1471 int
1472 xmlCheckLanguageID(const xmlChar * lang)
1473 {
1474     const xmlChar *cur = lang, *nxt;
1475
1476     if (cur == NULL)
1477         return (0);
1478     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1479         ((cur[0] == 'I') && (cur[1] == '-')) ||
1480         ((cur[0] == 'x') && (cur[1] == '-')) ||
1481         ((cur[0] == 'X') && (cur[1] == '-'))) {
1482         /*
1483          * Still allow IANA code and user code which were coming
1484          * from the previous version of the XML-1.0 specification
1485          * it's deprecated but we should not fail
1486          */
1487         cur += 2;
1488         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1489                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1490             cur++;
1491         return(cur[0] == 0);
1492     }
1493     nxt = cur;
1494     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496            nxt++;
1497     if (nxt - cur >= 4) {
1498         /*
1499          * Reserved
1500          */
1501         if ((nxt - cur > 8) || (nxt[0] != 0))
1502             return(0);
1503         return(1);
1504     }
1505     if (nxt - cur < 2)
1506         return(0);
1507     /* we got an ISO 639 code */
1508     if (nxt[0] == 0)
1509         return(1);
1510     if (nxt[0] != '-')
1511         return(0);
1512
1513     nxt++;
1514     cur = nxt;
1515     /* now we can have extlang or script or region or variant */
1516     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1517         goto region_m49;
1518
1519     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1520            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1521            nxt++;
1522     if (nxt - cur == 4)
1523         goto script;
1524     if (nxt - cur == 2)
1525         goto region;
1526     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1527         goto variant;
1528     if (nxt - cur != 3)
1529         return(0);
1530     /* we parsed an extlang */
1531     if (nxt[0] == 0)
1532         return(1);
1533     if (nxt[0] != '-')
1534         return(0);
1535
1536     nxt++;
1537     cur = nxt;
1538     /* now we can have script or region or variant */
1539     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1540         goto region_m49;
1541
1542     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1543            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1544            nxt++;
1545     if (nxt - cur == 2)
1546         goto region;
1547     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1548         goto variant;
1549     if (nxt - cur != 4)
1550         return(0);
1551     /* we parsed a script */
1552 script:
1553     if (nxt[0] == 0)
1554         return(1);
1555     if (nxt[0] != '-')
1556         return(0);
1557
1558     nxt++;
1559     cur = nxt;
1560     /* now we can have region or variant */
1561     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1562         goto region_m49;
1563
1564     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1565            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1566            nxt++;
1567
1568     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1569         goto variant;
1570     if (nxt - cur != 2)
1571         return(0);
1572     /* we parsed a region */
1573 region:
1574     if (nxt[0] == 0)
1575         return(1);
1576     if (nxt[0] != '-')
1577         return(0);
1578
1579     nxt++;
1580     cur = nxt;
1581     /* now we can just have a variant */
1582     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1583            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1584            nxt++;
1585
1586     if ((nxt - cur < 5) || (nxt - cur > 8))
1587         return(0);
1588
1589     /* we parsed a variant */
1590 variant:
1591     if (nxt[0] == 0)
1592         return(1);
1593     if (nxt[0] != '-')
1594         return(0);
1595     /* extensions and private use subtags not checked */
1596     return (1);
1597
1598 region_m49:
1599     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1600         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1601         nxt += 3;
1602         goto region;
1603     }
1604     return(0);
1605 }
1606
1607 /************************************************************************
1608  *                                                                      *
1609  *              Parser stacks related functions and macros              *
1610  *                                                                      *
1611  ************************************************************************/
1612
1613 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1614                                             const xmlChar ** str);
1615
1616 #ifdef SAX2
1617 /**
1618  * nsPush:
1619  * @ctxt:  an XML parser context
1620  * @prefix:  the namespace prefix or NULL
1621  * @URL:  the namespace name
1622  *
1623  * Pushes a new parser namespace on top of the ns stack
1624  *
1625  * Returns -1 in case of error, -2 if the namespace should be discarded
1626  *         and the index in the stack otherwise.
1627  */
1628 static int
1629 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1630 {
1631     if (ctxt->options & XML_PARSE_NSCLEAN) {
1632         int i;
1633         for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1634             if (ctxt->nsTab[i] == prefix) {
1635                 /* in scope */
1636                 if (ctxt->nsTab[i + 1] == URL)
1637                     return(-2);
1638                 /* out of scope keep it */
1639                 break;
1640             }
1641         }
1642     }
1643     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1644         ctxt->nsMax = 10;
1645         ctxt->nsNr = 0;
1646         ctxt->nsTab = (const xmlChar **)
1647                       xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1648         if (ctxt->nsTab == NULL) {
1649             xmlErrMemory(ctxt, NULL);
1650             ctxt->nsMax = 0;
1651             return (-1);
1652         }
1653     } else if (ctxt->nsNr >= ctxt->nsMax) {
1654         const xmlChar ** tmp;
1655         ctxt->nsMax *= 2;
1656         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1657                                     ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1658         if (tmp == NULL) {
1659             xmlErrMemory(ctxt, NULL);
1660             ctxt->nsMax /= 2;
1661             return (-1);
1662         }
1663         ctxt->nsTab = tmp;
1664     }
1665     ctxt->nsTab[ctxt->nsNr++] = prefix;
1666     ctxt->nsTab[ctxt->nsNr++] = URL;
1667     return (ctxt->nsNr);
1668 }
1669 /**
1670  * nsPop:
1671  * @ctxt: an XML parser context
1672  * @nr:  the number to pop
1673  *
1674  * Pops the top @nr parser prefix/namespace from the ns stack
1675  *
1676  * Returns the number of namespaces removed
1677  */
1678 static int
1679 nsPop(xmlParserCtxtPtr ctxt, int nr)
1680 {
1681     int i;
1682
1683     if (ctxt->nsTab == NULL) return(0);
1684     if (ctxt->nsNr < nr) {
1685         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1686         nr = ctxt->nsNr;
1687     }
1688     if (ctxt->nsNr <= 0)
1689         return (0);
1690
1691     for (i = 0;i < nr;i++) {
1692          ctxt->nsNr--;
1693          ctxt->nsTab[ctxt->nsNr] = NULL;
1694     }
1695     return(nr);
1696 }
1697 #endif
1698
1699 static int
1700 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1701     const xmlChar **atts;
1702     int *attallocs;
1703     int maxatts;
1704
1705     if (ctxt->atts == NULL) {
1706         maxatts = 55; /* allow for 10 attrs by default */
1707         atts = (const xmlChar **)
1708                xmlMalloc(maxatts * sizeof(xmlChar *));
1709         if (atts == NULL) goto mem_error;
1710         ctxt->atts = atts;
1711         attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1712         if (attallocs == NULL) goto mem_error;
1713         ctxt->attallocs = attallocs;
1714         ctxt->maxatts = maxatts;
1715     } else if (nr + 5 > ctxt->maxatts) {
1716         maxatts = (nr + 5) * 2;
1717         atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1718                                      maxatts * sizeof(const xmlChar *));
1719         if (atts == NULL) goto mem_error;
1720         ctxt->atts = atts;
1721         attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1722                                      (maxatts / 5) * sizeof(int));
1723         if (attallocs == NULL) goto mem_error;
1724         ctxt->attallocs = attallocs;
1725         ctxt->maxatts = maxatts;
1726     }
1727     return(ctxt->maxatts);
1728 mem_error:
1729     xmlErrMemory(ctxt, NULL);
1730     return(-1);
1731 }
1732
1733 /**
1734  * inputPush:
1735  * @ctxt:  an XML parser context
1736  * @value:  the parser input
1737  *
1738  * Pushes a new parser input on top of the input stack
1739  *
1740  * Returns -1 in case of error, the index in the stack otherwise
1741  */
1742 int
1743 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1744 {
1745     if ((ctxt == NULL) || (value == NULL))
1746         return(-1);
1747     if (ctxt->inputNr >= ctxt->inputMax) {
1748         ctxt->inputMax *= 2;
1749         ctxt->inputTab =
1750             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1751                                              ctxt->inputMax *
1752                                              sizeof(ctxt->inputTab[0]));
1753         if (ctxt->inputTab == NULL) {
1754             xmlErrMemory(ctxt, NULL);
1755             ctxt->inputMax /= 2;
1756             return (-1);
1757         }
1758     }
1759     ctxt->inputTab[ctxt->inputNr] = value;
1760     ctxt->input = value;
1761     return (ctxt->inputNr++);
1762 }
1763 /**
1764  * inputPop:
1765  * @ctxt: an XML parser context
1766  *
1767  * Pops the top parser input from the input stack
1768  *
1769  * Returns the input just removed
1770  */
1771 xmlParserInputPtr
1772 inputPop(xmlParserCtxtPtr ctxt)
1773 {
1774     xmlParserInputPtr ret;
1775
1776     if (ctxt == NULL)
1777         return(NULL);
1778     if (ctxt->inputNr <= 0)
1779         return (NULL);
1780     ctxt->inputNr--;
1781     if (ctxt->inputNr > 0)
1782         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1783     else
1784         ctxt->input = NULL;
1785     ret = ctxt->inputTab[ctxt->inputNr];
1786     ctxt->inputTab[ctxt->inputNr] = NULL;
1787     return (ret);
1788 }
1789 /**
1790  * nodePush:
1791  * @ctxt:  an XML parser context
1792  * @value:  the element node
1793  *
1794  * Pushes a new element node on top of the node stack
1795  *
1796  * Returns -1 in case of error, the index in the stack otherwise
1797  */
1798 int
1799 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1800 {
1801     if (ctxt == NULL) return(0);
1802     if (ctxt->nodeNr >= ctxt->nodeMax) {
1803         xmlNodePtr *tmp;
1804
1805         tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1806                                       ctxt->nodeMax * 2 *
1807                                       sizeof(ctxt->nodeTab[0]));
1808         if (tmp == NULL) {
1809             xmlErrMemory(ctxt, NULL);
1810             return (-1);
1811         }
1812         ctxt->nodeTab = tmp;
1813         ctxt->nodeMax *= 2;
1814     }
1815     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1816         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1817         xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1818                  "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1819                           xmlParserMaxDepth);
1820         xmlHaltParser(ctxt);
1821         return(-1);
1822     }
1823     ctxt->nodeTab[ctxt->nodeNr] = value;
1824     ctxt->node = value;
1825     return (ctxt->nodeNr++);
1826 }
1827
1828 /**
1829  * nodePop:
1830  * @ctxt: an XML parser context
1831  *
1832  * Pops the top element node from the node stack
1833  *
1834  * Returns the node just removed
1835  */
1836 xmlNodePtr
1837 nodePop(xmlParserCtxtPtr ctxt)
1838 {
1839     xmlNodePtr ret;
1840
1841     if (ctxt == NULL) return(NULL);
1842     if (ctxt->nodeNr <= 0)
1843         return (NULL);
1844     ctxt->nodeNr--;
1845     if (ctxt->nodeNr > 0)
1846         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1847     else
1848         ctxt->node = NULL;
1849     ret = ctxt->nodeTab[ctxt->nodeNr];
1850     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1851     return (ret);
1852 }
1853
1854 /**
1855  * nameNsPush:
1856  * @ctxt:  an XML parser context
1857  * @value:  the element name
1858  * @prefix:  the element prefix
1859  * @URI:  the element namespace name
1860  * @line:  the current line number for error messages
1861  * @nsNr:  the number of namespaces pushed on the namespace table
1862  *
1863  * Pushes a new element name/prefix/URL on top of the name stack
1864  *
1865  * Returns -1 in case of error, the index in the stack otherwise
1866  */
1867 static int
1868 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1869            const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1870 {
1871     xmlStartTag *tag;
1872
1873     if (ctxt->nameNr >= ctxt->nameMax) {
1874         const xmlChar * *tmp;
1875         xmlStartTag *tmp2;
1876         ctxt->nameMax *= 2;
1877         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1878                                     ctxt->nameMax *
1879                                     sizeof(ctxt->nameTab[0]));
1880         if (tmp == NULL) {
1881             ctxt->nameMax /= 2;
1882             goto mem_error;
1883         }
1884         ctxt->nameTab = tmp;
1885         tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1886                                     ctxt->nameMax *
1887                                     sizeof(ctxt->pushTab[0]));
1888         if (tmp2 == NULL) {
1889             ctxt->nameMax /= 2;
1890             goto mem_error;
1891         }
1892         ctxt->pushTab = tmp2;
1893     } else if (ctxt->pushTab == NULL) {
1894         ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1895                                             sizeof(ctxt->pushTab[0]));
1896         if (ctxt->pushTab == NULL)
1897             goto mem_error;
1898     }
1899     ctxt->nameTab[ctxt->nameNr] = value;
1900     ctxt->name = value;
1901     tag = &ctxt->pushTab[ctxt->nameNr];
1902     tag->prefix = prefix;
1903     tag->URI = URI;
1904     tag->line = line;
1905     tag->nsNr = nsNr;
1906     return (ctxt->nameNr++);
1907 mem_error:
1908     xmlErrMemory(ctxt, NULL);
1909     return (-1);
1910 }
1911 #ifdef LIBXML_PUSH_ENABLED
1912 /**
1913  * nameNsPop:
1914  * @ctxt: an XML parser context
1915  *
1916  * Pops the top element/prefix/URI name from the name stack
1917  *
1918  * Returns the name just removed
1919  */
1920 static const xmlChar *
1921 nameNsPop(xmlParserCtxtPtr ctxt)
1922 {
1923     const xmlChar *ret;
1924
1925     if (ctxt->nameNr <= 0)
1926         return (NULL);
1927     ctxt->nameNr--;
1928     if (ctxt->nameNr > 0)
1929         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1930     else
1931         ctxt->name = NULL;
1932     ret = ctxt->nameTab[ctxt->nameNr];
1933     ctxt->nameTab[ctxt->nameNr] = NULL;
1934     return (ret);
1935 }
1936 #endif /* LIBXML_PUSH_ENABLED */
1937
1938 /**
1939  * namePush:
1940  * @ctxt:  an XML parser context
1941  * @value:  the element name
1942  *
1943  * Pushes a new element name on top of the name stack
1944  *
1945  * Returns -1 in case of error, the index in the stack otherwise
1946  */
1947 int
1948 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1949 {
1950     if (ctxt == NULL) return (-1);
1951
1952     if (ctxt->nameNr >= ctxt->nameMax) {
1953         const xmlChar * *tmp;
1954         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1955                                     ctxt->nameMax * 2 *
1956                                     sizeof(ctxt->nameTab[0]));
1957         if (tmp == NULL) {
1958             goto mem_error;
1959         }
1960         ctxt->nameTab = tmp;
1961         ctxt->nameMax *= 2;
1962     }
1963     ctxt->nameTab[ctxt->nameNr] = value;
1964     ctxt->name = value;
1965     return (ctxt->nameNr++);
1966 mem_error:
1967     xmlErrMemory(ctxt, NULL);
1968     return (-1);
1969 }
1970 /**
1971  * namePop:
1972  * @ctxt: an XML parser context
1973  *
1974  * Pops the top element name from the name stack
1975  *
1976  * Returns the name just removed
1977  */
1978 const xmlChar *
1979 namePop(xmlParserCtxtPtr ctxt)
1980 {
1981     const xmlChar *ret;
1982
1983     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1984         return (NULL);
1985     ctxt->nameNr--;
1986     if (ctxt->nameNr > 0)
1987         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1988     else
1989         ctxt->name = NULL;
1990     ret = ctxt->nameTab[ctxt->nameNr];
1991     ctxt->nameTab[ctxt->nameNr] = NULL;
1992     return (ret);
1993 }
1994
1995 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1996     if (ctxt->spaceNr >= ctxt->spaceMax) {
1997         int *tmp;
1998
1999         ctxt->spaceMax *= 2;
2000         tmp = (int *) xmlRealloc(ctxt->spaceTab,
2001                                  ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2002         if (tmp == NULL) {
2003             xmlErrMemory(ctxt, NULL);
2004             ctxt->spaceMax /=2;
2005             return(-1);
2006         }
2007         ctxt->spaceTab = tmp;
2008     }
2009     ctxt->spaceTab[ctxt->spaceNr] = val;
2010     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2011     return(ctxt->spaceNr++);
2012 }
2013
2014 static int spacePop(xmlParserCtxtPtr ctxt) {
2015     int ret;
2016     if (ctxt->spaceNr <= 0) return(0);
2017     ctxt->spaceNr--;
2018     if (ctxt->spaceNr > 0)
2019         ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2020     else
2021         ctxt->space = &ctxt->spaceTab[0];
2022     ret = ctxt->spaceTab[ctxt->spaceNr];
2023     ctxt->spaceTab[ctxt->spaceNr] = -1;
2024     return(ret);
2025 }
2026
2027 /*
2028  * Macros for accessing the content. Those should be used only by the parser,
2029  * and not exported.
2030  *
2031  * Dirty macros, i.e. one often need to make assumption on the context to
2032  * use them
2033  *
2034  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2035  *           To be used with extreme caution since operations consuming
2036  *           characters may move the input buffer to a different location !
2037  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2038  *           This should be used internally by the parser
2039  *           only to compare to ASCII values otherwise it would break when
2040  *           running with UTF-8 encoding.
2041  *   RAW     same as CUR but in the input buffer, bypass any token
2042  *           extraction that may have been done
2043  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2044  *           to compare on ASCII based substring.
2045  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2046  *           strings without newlines within the parser.
2047  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2048  *           defined char within the parser.
2049  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2050  *
2051  *   NEXT    Skip to the next character, this does the proper decoding
2052  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2053  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2054  *   CUR_CHAR(l) returns the current unicode character (int), set l
2055  *           to the number of xmlChars used for the encoding [0-5].
2056  *   CUR_SCHAR  same but operate on a string instead of the context
2057  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2058  *            the index
2059  *   GROW, SHRINK  handling of input buffers
2060  */
2061
2062 #define RAW (*ctxt->input->cur)
2063 #define CUR (*ctxt->input->cur)
2064 #define NXT(val) ctxt->input->cur[(val)]
2065 #define CUR_PTR ctxt->input->cur
2066 #define BASE_PTR ctxt->input->base
2067
2068 #define CMP4( s, c1, c2, c3, c4 ) \
2069   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2070     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2071 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2072   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2073 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2074   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2075 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2076   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2077 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2078   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2079 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2080   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2081     ((unsigned char *) s)[ 8 ] == c9 )
2082 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2083   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2084     ((unsigned char *) s)[ 9 ] == c10 )
2085
2086 #define SKIP(val) do {                                                  \
2087     ctxt->input->cur += (val),ctxt->input->col+=(val);                  \
2088     if (*ctxt->input->cur == 0)                                         \
2089         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);                   \
2090   } while (0)
2091
2092 #define SKIPL(val) do {                                                 \
2093     int skipl;                                                          \
2094     for(skipl=0; skipl<val; skipl++) {                                  \
2095         if (*(ctxt->input->cur) == '\n') {                              \
2096         ctxt->input->line++; ctxt->input->col = 1;                      \
2097         } else ctxt->input->col++;                                      \
2098         ctxt->input->cur++;                                             \
2099     }                                                                   \
2100     if (*ctxt->input->cur == 0)                                         \
2101         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);                   \
2102   } while (0)
2103
2104 #define SHRINK if ((ctxt->progressive == 0) &&                          \
2105                    (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2106                    (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2107         xmlSHRINK (ctxt);
2108
2109 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2110     xmlParserInputShrink(ctxt->input);
2111     if (*ctxt->input->cur == 0)
2112         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2113 }
2114
2115 #define GROW if ((ctxt->progressive == 0) &&                            \
2116                  (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))   \
2117         xmlGROW (ctxt);
2118
2119 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2120     ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2121     ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2122
2123     if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2124          (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2125          ((ctxt->input->buf) &&
2126           (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2127         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2128         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2129         xmlHaltParser(ctxt);
2130         return;
2131     }
2132     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2133     if ((ctxt->input->cur > ctxt->input->end) ||
2134         (ctxt->input->cur < ctxt->input->base)) {
2135         xmlHaltParser(ctxt);
2136         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2137         return;
2138     }
2139     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2140         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2141 }
2142
2143 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2144
2145 #define NEXT xmlNextChar(ctxt)
2146
2147 #define NEXT1 {                                                         \
2148         ctxt->input->col++;                                             \
2149         ctxt->input->cur++;                                             \
2150         if (*ctxt->input->cur == 0)                                     \
2151             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);               \
2152     }
2153
2154 #define NEXTL(l) do {                                                   \
2155     if (*(ctxt->input->cur) == '\n') {                                  \
2156         ctxt->input->line++; ctxt->input->col = 1;                      \
2157     } else ctxt->input->col++;                                          \
2158     ctxt->input->cur += l;                              \
2159   } while (0)
2160
2161 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2162 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2163
2164 #define COPY_BUF(l,b,i,v)                                               \
2165     if (l == 1) b[i++] = (xmlChar) v;                                   \
2166     else i += xmlCopyCharMultiByte(&b[i],v)
2167
2168 #define CUR_CONSUMED \
2169     (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2170
2171 /**
2172  * xmlSkipBlankChars:
2173  * @ctxt:  the XML parser context
2174  *
2175  * skip all blanks character found at that point in the input streams.
2176  * It pops up finished entities in the process if allowable at that point.
2177  *
2178  * Returns the number of space chars skipped
2179  */
2180
2181 int
2182 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2183     int res = 0;
2184
2185     /*
2186      * It's Okay to use CUR/NEXT here since all the blanks are on
2187      * the ASCII range.
2188      */
2189     if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2190         (ctxt->instate == XML_PARSER_START)) {
2191         const xmlChar *cur;
2192         /*
2193          * if we are in the document content, go really fast
2194          */
2195         cur = ctxt->input->cur;
2196         while (IS_BLANK_CH(*cur)) {
2197             if (*cur == '\n') {
2198                 ctxt->input->line++; ctxt->input->col = 1;
2199             } else {
2200                 ctxt->input->col++;
2201             }
2202             cur++;
2203             if (res < INT_MAX)
2204                 res++;
2205             if (*cur == 0) {
2206                 ctxt->input->cur = cur;
2207                 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2208                 cur = ctxt->input->cur;
2209             }
2210         }
2211         ctxt->input->cur = cur;
2212     } else {
2213         int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2214
2215         while (1) {
2216             if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2217                 NEXT;
2218             } else if (CUR == '%') {
2219                 /*
2220                  * Need to handle support of entities branching here
2221                  */
2222                 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2223                     break;
2224                 xmlParsePEReference(ctxt);
2225             } else if (CUR == 0) {
2226                 if (ctxt->inputNr <= 1)
2227                     break;
2228                 xmlPopInput(ctxt);
2229             } else {
2230                 break;
2231             }
2232
2233             /*
2234              * Also increase the counter when entering or exiting a PERef.
2235              * The spec says: "When a parameter-entity reference is recognized
2236              * in the DTD and included, its replacement text MUST be enlarged
2237              * by the attachment of one leading and one following space (#x20)
2238              * character."
2239              */
2240             if (res < INT_MAX)
2241                 res++;
2242         }
2243     }
2244     return(res);
2245 }
2246
2247 /************************************************************************
2248  *                                                                      *
2249  *              Commodity functions to handle entities                  *
2250  *                                                                      *
2251  ************************************************************************/
2252
2253 /**
2254  * xmlPopInput:
2255  * @ctxt:  an XML parser context
2256  *
2257  * xmlPopInput: the current input pointed by ctxt->input came to an end
2258  *          pop it and return the next char.
2259  *
2260  * Returns the current xmlChar in the parser context
2261  */
2262 xmlChar
2263 xmlPopInput(xmlParserCtxtPtr ctxt) {
2264     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2265     if (xmlParserDebugEntities)
2266         xmlGenericError(xmlGenericErrorContext,
2267                 "Popping input %d\n", ctxt->inputNr);
2268     if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2269         (ctxt->instate != XML_PARSER_EOF))
2270         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2271                     "Unfinished entity outside the DTD");
2272     xmlFreeInputStream(inputPop(ctxt));
2273     if (*ctxt->input->cur == 0)
2274         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2275     return(CUR);
2276 }
2277
2278 /**
2279  * xmlPushInput:
2280  * @ctxt:  an XML parser context
2281  * @input:  an XML parser input fragment (entity, XML fragment ...).
2282  *
2283  * xmlPushInput: switch to a new input stream which is stacked on top
2284  *               of the previous one(s).
2285  * Returns -1 in case of error or the index in the input stack
2286  */
2287 int
2288 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2289     int ret;
2290     if (input == NULL) return(-1);
2291
2292     if (xmlParserDebugEntities) {
2293         if ((ctxt->input != NULL) && (ctxt->input->filename))
2294             xmlGenericError(xmlGenericErrorContext,
2295                     "%s(%d): ", ctxt->input->filename,
2296                     ctxt->input->line);
2297         xmlGenericError(xmlGenericErrorContext,
2298                 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2299     }
2300     if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2301         (ctxt->inputNr > 1024)) {
2302         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2303         while (ctxt->inputNr > 1)
2304             xmlFreeInputStream(inputPop(ctxt));
2305         return(-1);
2306     }
2307     ret = inputPush(ctxt, input);
2308     if (ctxt->instate == XML_PARSER_EOF)
2309         return(-1);
2310     GROW;
2311     return(ret);
2312 }
2313
2314 /**
2315  * xmlParseCharRef:
2316  * @ctxt:  an XML parser context
2317  *
2318  * parse Reference declarations
2319  *
2320  * [66] CharRef ::= '&#' [0-9]+ ';' |
2321  *                  '&#x' [0-9a-fA-F]+ ';'
2322  *
2323  * [ WFC: Legal Character ]
2324  * Characters referred to using character references must match the
2325  * production for Char.
2326  *
2327  * Returns the value parsed (as an int), 0 in case of error
2328  */
2329 int
2330 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2331     int val = 0;
2332     int count = 0;
2333
2334     /*
2335      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2336      */
2337     if ((RAW == '&') && (NXT(1) == '#') &&
2338         (NXT(2) == 'x')) {
2339         SKIP(3);
2340         GROW;
2341         while (RAW != ';') { /* loop blocked by count */
2342             if (count++ > 20) {
2343                 count = 0;
2344                 GROW;
2345                 if (ctxt->instate == XML_PARSER_EOF)
2346                     return(0);
2347             }
2348             if ((RAW >= '0') && (RAW <= '9'))
2349                 val = val * 16 + (CUR - '0');
2350             else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2351                 val = val * 16 + (CUR - 'a') + 10;
2352             else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2353                 val = val * 16 + (CUR - 'A') + 10;
2354             else {
2355                 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2356                 val = 0;
2357                 break;
2358             }
2359             if (val > 0x110000)
2360                 val = 0x110000;
2361
2362             NEXT;
2363             count++;
2364         }
2365         if (RAW == ';') {
2366             /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2367             ctxt->input->col++;
2368             ctxt->input->cur++;
2369         }
2370     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2371         SKIP(2);
2372         GROW;
2373         while (RAW != ';') { /* loop blocked by count */
2374             if (count++ > 20) {
2375                 count = 0;
2376                 GROW;
2377                 if (ctxt->instate == XML_PARSER_EOF)
2378                     return(0);
2379             }
2380             if ((RAW >= '0') && (RAW <= '9'))
2381                 val = val * 10 + (CUR - '0');
2382             else {
2383                 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2384                 val = 0;
2385                 break;
2386             }
2387             if (val > 0x110000)
2388                 val = 0x110000;
2389
2390             NEXT;
2391             count++;
2392         }
2393         if (RAW == ';') {
2394             /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2395             ctxt->input->col++;
2396             ctxt->input->cur++;
2397         }
2398     } else {
2399         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2400     }
2401
2402     /*
2403      * [ WFC: Legal Character ]
2404      * Characters referred to using character references must match the
2405      * production for Char.
2406      */
2407     if (val >= 0x110000) {
2408         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2409                 "xmlParseCharRef: character reference out of bounds\n",
2410                 val);
2411     } else if (IS_CHAR(val)) {
2412         return(val);
2413     } else {
2414         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2415                           "xmlParseCharRef: invalid xmlChar value %d\n",
2416                           val);
2417     }
2418     return(0);
2419 }
2420
2421 /**
2422  * xmlParseStringCharRef:
2423  * @ctxt:  an XML parser context
2424  * @str:  a pointer to an index in the string
2425  *
2426  * parse Reference declarations, variant parsing from a string rather
2427  * than an an input flow.
2428  *
2429  * [66] CharRef ::= '&#' [0-9]+ ';' |
2430  *                  '&#x' [0-9a-fA-F]+ ';'
2431  *
2432  * [ WFC: Legal Character ]
2433  * Characters referred to using character references must match the
2434  * production for Char.
2435  *
2436  * Returns the value parsed (as an int), 0 in case of error, str will be
2437  *         updated to the current value of the index
2438  */
2439 static int
2440 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2441     const xmlChar *ptr;
2442     xmlChar cur;
2443     int val = 0;
2444
2445     if ((str == NULL) || (*str == NULL)) return(0);
2446     ptr = *str;
2447     cur = *ptr;
2448     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2449         ptr += 3;
2450         cur = *ptr;
2451         while (cur != ';') { /* Non input consuming loop */
2452             if ((cur >= '0') && (cur <= '9'))
2453                 val = val * 16 + (cur - '0');
2454             else if ((cur >= 'a') && (cur <= 'f'))
2455                 val = val * 16 + (cur - 'a') + 10;
2456             else if ((cur >= 'A') && (cur <= 'F'))
2457                 val = val * 16 + (cur - 'A') + 10;
2458             else {
2459                 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2460                 val = 0;
2461                 break;
2462             }
2463             if (val > 0x110000)
2464                 val = 0x110000;
2465
2466             ptr++;
2467             cur = *ptr;
2468         }
2469         if (cur == ';')
2470             ptr++;
2471     } else if  ((cur == '&') && (ptr[1] == '#')){
2472         ptr += 2;
2473         cur = *ptr;
2474         while (cur != ';') { /* Non input consuming loops */
2475             if ((cur >= '0') && (cur <= '9'))
2476                 val = val * 10 + (cur - '0');
2477             else {
2478                 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2479                 val = 0;
2480                 break;
2481             }
2482             if (val > 0x110000)
2483                 val = 0x110000;
2484
2485             ptr++;
2486             cur = *ptr;
2487         }
2488         if (cur == ';')
2489             ptr++;
2490     } else {
2491         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2492         return(0);
2493     }
2494     *str = ptr;
2495
2496     /*
2497      * [ WFC: Legal Character ]
2498      * Characters referred to using character references must match the
2499      * production for Char.
2500      */
2501     if (val >= 0x110000) {
2502         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2503                 "xmlParseStringCharRef: character reference out of bounds\n",
2504                 val);
2505     } else if (IS_CHAR(val)) {
2506         return(val);
2507     } else {
2508         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2509                           "xmlParseStringCharRef: invalid xmlChar value %d\n",
2510                           val);
2511     }
2512     return(0);
2513 }
2514
2515 /**
2516  * xmlParserHandlePEReference:
2517  * @ctxt:  the parser context
2518  *
2519  * [69] PEReference ::= '%' Name ';'
2520  *
2521  * [ WFC: No Recursion ]
2522  * A parsed entity must not contain a recursive
2523  * reference to itself, either directly or indirectly.
2524  *
2525  * [ WFC: Entity Declared ]
2526  * In a document without any DTD, a document with only an internal DTD
2527  * subset which contains no parameter entity references, or a document
2528  * with "standalone='yes'", ...  ... The declaration of a parameter
2529  * entity must precede any reference to it...
2530  *
2531  * [ VC: Entity Declared ]
2532  * In a document with an external subset or external parameter entities
2533  * with "standalone='no'", ...  ... The declaration of a parameter entity
2534  * must precede any reference to it...
2535  *
2536  * [ WFC: In DTD ]
2537  * Parameter-entity references may only appear in the DTD.
2538  * NOTE: misleading but this is handled.
2539  *
2540  * A PEReference may have been detected in the current input stream
2541  * the handling is done accordingly to
2542  *      http://www.w3.org/TR/REC-xml#entproc
2543  * i.e.
2544  *   - Included in literal in entity values
2545  *   - Included as Parameter Entity reference within DTDs
2546  */
2547 void
2548 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2549     switch(ctxt->instate) {
2550         case XML_PARSER_CDATA_SECTION:
2551             return;
2552         case XML_PARSER_COMMENT:
2553             return;
2554         case XML_PARSER_START_TAG:
2555             return;
2556         case XML_PARSER_END_TAG:
2557             return;
2558         case XML_PARSER_EOF:
2559             xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2560             return;
2561         case XML_PARSER_PROLOG:
2562         case XML_PARSER_START:
2563         case XML_PARSER_MISC:
2564             xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2565             return;
2566         case XML_PARSER_ENTITY_DECL:
2567         case XML_PARSER_CONTENT:
2568         case XML_PARSER_ATTRIBUTE_VALUE:
2569         case XML_PARSER_PI:
2570         case XML_PARSER_SYSTEM_LITERAL:
2571         case XML_PARSER_PUBLIC_LITERAL:
2572             /* we just ignore it there */
2573             return;
2574         case XML_PARSER_EPILOG:
2575             xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2576             return;
2577         case XML_PARSER_ENTITY_VALUE:
2578             /*
2579              * NOTE: in the case of entity values, we don't do the
2580              *       substitution here since we need the literal
2581              *       entity value to be able to save the internal
2582              *       subset of the document.
2583              *       This will be handled by xmlStringDecodeEntities
2584              */
2585             return;
2586         case XML_PARSER_DTD:
2587             /*
2588              * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2589              * In the internal DTD subset, parameter-entity references
2590              * can occur only where markup declarations can occur, not
2591              * within markup declarations.
2592              * In that case this is handled in xmlParseMarkupDecl
2593              */
2594             if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2595                 return;
2596             if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2597                 return;
2598             break;
2599         case XML_PARSER_IGNORE:
2600             return;
2601     }
2602
2603     xmlParsePEReference(ctxt);
2604 }
2605
2606 /*
2607  * Macro used to grow the current buffer.
2608  * buffer##_size is expected to be a size_t
2609  * mem_error: is expected to handle memory allocation failures
2610  */
2611 #define growBuffer(buffer, n) {                                         \
2612     xmlChar *tmp;                                                       \
2613     size_t new_size = buffer##_size * 2 + n;                            \
2614     if (new_size < buffer##_size) goto mem_error;                       \
2615     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2616     if (tmp == NULL) goto mem_error;                                    \
2617     buffer = tmp;                                                       \
2618     buffer##_size = new_size;                                           \
2619 }
2620
2621 /**
2622  * xmlStringLenDecodeEntities:
2623  * @ctxt:  the parser context
2624  * @str:  the input string
2625  * @len: the string length
2626  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2627  * @end:  an end marker xmlChar, 0 if none
2628  * @end2:  an end marker xmlChar, 0 if none
2629  * @end3:  an end marker xmlChar, 0 if none
2630  *
2631  * Takes a entity string content and process to do the adequate substitutions.
2632  *
2633  * [67] Reference ::= EntityRef | CharRef
2634  *
2635  * [69] PEReference ::= '%' Name ';'
2636  *
2637  * Returns A newly allocated string with the substitution done. The caller
2638  *      must deallocate it !
2639  */
2640 xmlChar *
2641 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2642                       int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2643     xmlChar *buffer = NULL;
2644     size_t buffer_size = 0;
2645     size_t nbchars = 0;
2646
2647     xmlChar *current = NULL;
2648     xmlChar *rep = NULL;
2649     const xmlChar *last;
2650     xmlEntityPtr ent;
2651     int c,l;
2652
2653     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2654         return(NULL);
2655     last = str + len;
2656
2657     if (((ctxt->depth > 40) &&
2658          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2659         (ctxt->depth > 1024)) {
2660         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2661         return(NULL);
2662     }
2663
2664     /*
2665      * allocate a translation buffer.
2666      */
2667     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2668     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2669     if (buffer == NULL) goto mem_error;
2670
2671     /*
2672      * OK loop until we reach one of the ending char or a size limit.
2673      * we are operating on already parsed values.
2674      */
2675     if (str < last)
2676         c = CUR_SCHAR(str, l);
2677     else
2678         c = 0;
2679     while ((c != 0) && (c != end) && /* non input consuming loop */
2680            (c != end2) && (c != end3) &&
2681            (ctxt->instate != XML_PARSER_EOF)) {
2682
2683         if (c == 0) break;
2684         if ((c == '&') && (str[1] == '#')) {
2685             int val = xmlParseStringCharRef(ctxt, &str);
2686             if (val == 0)
2687                 goto int_error;
2688             COPY_BUF(0,buffer,nbchars,val);
2689             if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2690                 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2691             }
2692         } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2693             if (xmlParserDebugEntities)
2694                 xmlGenericError(xmlGenericErrorContext,
2695                         "String decoding Entity Reference: %.30s\n",
2696                         str);
2697             ent = xmlParseStringEntityRef(ctxt, &str);
2698             xmlParserEntityCheck(ctxt, 0, ent, 0);
2699             if (ent != NULL)
2700                 ctxt->nbentities += ent->checked / 2;
2701             if ((ent != NULL) &&
2702                 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2703                 if (ent->content != NULL) {
2704                     COPY_BUF(0,buffer,nbchars,ent->content[0]);
2705                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2706                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2707                     }
2708                 } else {
2709                     xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2710                             "predefined entity has no content\n");
2711                     goto int_error;
2712                 }
2713             } else if ((ent != NULL) && (ent->content != NULL)) {
2714                 ctxt->depth++;
2715                 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2716                                               0, 0, 0);
2717                 ctxt->depth--;
2718                 if (rep == NULL) {
2719                     ent->content[0] = 0;
2720                     goto int_error;
2721                 }
2722
2723                 current = rep;
2724                 while (*current != 0) { /* non input consuming loop */
2725                     buffer[nbchars++] = *current++;
2726                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2727                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2728                             goto int_error;
2729                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2730                     }
2731                 }
2732                 xmlFree(rep);
2733                 rep = NULL;
2734             } else if (ent != NULL) {
2735                 int i = xmlStrlen(ent->name);
2736                 const xmlChar *cur = ent->name;
2737
2738                 buffer[nbchars++] = '&';
2739                 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740                     growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2741                 }
2742                 for (;i > 0;i--)
2743                     buffer[nbchars++] = *cur++;
2744                 buffer[nbchars++] = ';';
2745             }
2746         } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2747             if (xmlParserDebugEntities)
2748                 xmlGenericError(xmlGenericErrorContext,
2749                         "String decoding PE Reference: %.30s\n", str);
2750             ent = xmlParseStringPEReference(ctxt, &str);
2751             xmlParserEntityCheck(ctxt, 0, ent, 0);
2752             if (ent != NULL)
2753                 ctxt->nbentities += ent->checked / 2;
2754             if (ent != NULL) {
2755                 if (ent->content == NULL) {
2756                     /*
2757                      * Note: external parsed entities will not be loaded,
2758                      * it is not required for a non-validating parser to
2759                      * complete external PEReferences coming from the
2760                      * internal subset
2761                      */
2762                     if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2763                         ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2764                         (ctxt->validate != 0)) {
2765                         xmlLoadEntityContent(ctxt, ent);
2766                     } else {
2767                         xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2768                   "not validating will not read content for PE entity %s\n",
2769                                       ent->name, NULL);
2770                     }
2771                 }
2772                 ctxt->depth++;
2773                 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2774                                               0, 0, 0);
2775                 ctxt->depth--;
2776                 if (rep == NULL) {
2777                     if (ent->content != NULL)
2778                         ent->content[0] = 0;
2779                     goto int_error;
2780                 }
2781                 current = rep;
2782                 while (*current != 0) { /* non input consuming loop */
2783                     buffer[nbchars++] = *current++;
2784                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2785                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2786                             goto int_error;
2787                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2788                     }
2789                 }
2790                 xmlFree(rep);
2791                 rep = NULL;
2792             }
2793         } else {
2794             COPY_BUF(l,buffer,nbchars,c);
2795             str += l;
2796             if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797                 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798             }
2799         }
2800         if (str < last)
2801             c = CUR_SCHAR(str, l);
2802         else
2803             c = 0;
2804     }
2805     buffer[nbchars] = 0;
2806     return(buffer);
2807
2808 mem_error:
2809     xmlErrMemory(ctxt, NULL);
2810 int_error:
2811     if (rep != NULL)
2812         xmlFree(rep);
2813     if (buffer != NULL)
2814         xmlFree(buffer);
2815     return(NULL);
2816 }
2817
2818 /**
2819  * xmlStringDecodeEntities:
2820  * @ctxt:  the parser context
2821  * @str:  the input string
2822  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2823  * @end:  an end marker xmlChar, 0 if none
2824  * @end2:  an end marker xmlChar, 0 if none
2825  * @end3:  an end marker xmlChar, 0 if none
2826  *
2827  * Takes a entity string content and process to do the adequate substitutions.
2828  *
2829  * [67] Reference ::= EntityRef | CharRef
2830  *
2831  * [69] PEReference ::= '%' Name ';'
2832  *
2833  * Returns A newly allocated string with the substitution done. The caller
2834  *      must deallocate it !
2835  */
2836 xmlChar *
2837 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2838                         xmlChar end, xmlChar  end2, xmlChar end3) {
2839     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2840     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2841            end, end2, end3));
2842 }
2843
2844 /************************************************************************
2845  *                                                                      *
2846  *              Commodity functions, cleanup needed ?                   *
2847  *                                                                      *
2848  ************************************************************************/
2849
2850 /**
2851  * areBlanks:
2852  * @ctxt:  an XML parser context
2853  * @str:  a xmlChar *
2854  * @len:  the size of @str
2855  * @blank_chars: we know the chars are blanks
2856  *
2857  * Is this a sequence of blank chars that one can ignore ?
2858  *
2859  * Returns 1 if ignorable 0 otherwise.
2860  */
2861
2862 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2863                      int blank_chars) {
2864     int i, ret;
2865     xmlNodePtr lastChild;
2866
2867     /*
2868      * Don't spend time trying to differentiate them, the same callback is
2869      * used !
2870      */
2871     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2872         return(0);
2873
2874     /*
2875      * Check for xml:space value.
2876      */
2877     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2878         (*(ctxt->space) == -2))
2879         return(0);
2880
2881     /*
2882      * Check that the string is made of blanks
2883      */
2884     if (blank_chars == 0) {
2885         for (i = 0;i < len;i++)
2886             if (!(IS_BLANK_CH(str[i]))) return(0);
2887     }
2888
2889     /*
2890      * Look if the element is mixed content in the DTD if available
2891      */
2892     if (ctxt->node == NULL) return(0);
2893     if (ctxt->myDoc != NULL) {
2894         ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2895         if (ret == 0) return(1);
2896         if (ret == 1) return(0);
2897     }
2898
2899     /*
2900      * Otherwise, heuristic :-\
2901      */
2902     if ((RAW != '<') && (RAW != 0xD)) return(0);
2903     if ((ctxt->node->children == NULL) &&
2904         (RAW == '<') && (NXT(1) == '/')) return(0);
2905
2906     lastChild = xmlGetLastChild(ctxt->node);
2907     if (lastChild == NULL) {
2908         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2909             (ctxt->node->content != NULL)) return(0);
2910     } else if (xmlNodeIsText(lastChild))
2911         return(0);
2912     else if ((ctxt->node->children != NULL) &&
2913              (xmlNodeIsText(ctxt->node->children)))
2914         return(0);
2915     return(1);
2916 }
2917
2918 /************************************************************************
2919  *                                                                      *
2920  *              Extra stuff for namespace support                       *
2921  *      Relates to http://www.w3.org/TR/WD-xml-names                    *
2922  *                                                                      *
2923  ************************************************************************/
2924
2925 /**
2926  * xmlSplitQName:
2927  * @ctxt:  an XML parser context
2928  * @name:  an XML parser context
2929  * @prefix:  a xmlChar **
2930  *
2931  * parse an UTF8 encoded XML qualified name string
2932  *
2933  * [NS 5] QName ::= (Prefix ':')? LocalPart
2934  *
2935  * [NS 6] Prefix ::= NCName
2936  *
2937  * [NS 7] LocalPart ::= NCName
2938  *
2939  * Returns the local part, and prefix is updated
2940  *   to get the Prefix if any.
2941  */
2942
2943 xmlChar *
2944 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2945     xmlChar buf[XML_MAX_NAMELEN + 5];
2946     xmlChar *buffer = NULL;
2947     int len = 0;
2948     int max = XML_MAX_NAMELEN;
2949     xmlChar *ret = NULL;
2950     const xmlChar *cur = name;
2951     int c;
2952
2953     if (prefix == NULL) return(NULL);
2954     *prefix = NULL;
2955
2956     if (cur == NULL) return(NULL);
2957
2958 #ifndef XML_XML_NAMESPACE
2959     /* xml: prefix is not really a namespace */
2960     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2961         (cur[2] == 'l') && (cur[3] == ':'))
2962         return(xmlStrdup(name));
2963 #endif
2964
2965     /* nasty but well=formed */
2966     if (cur[0] == ':')
2967         return(xmlStrdup(name));
2968
2969     c = *cur++;
2970     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2971         buf[len++] = c;
2972         c = *cur++;
2973     }
2974     if (len >= max) {
2975         /*
2976          * Okay someone managed to make a huge name, so he's ready to pay
2977          * for the processing speed.
2978          */
2979         max = len * 2;
2980
2981         buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2982         if (buffer == NULL) {
2983             xmlErrMemory(ctxt, NULL);
2984             return(NULL);
2985         }
2986         memcpy(buffer, buf, len);
2987         while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2988             if (len + 10 > max) {
2989                 xmlChar *tmp;
2990
2991                 max *= 2;
2992                 tmp = (xmlChar *) xmlRealloc(buffer,
2993                                                 max * sizeof(xmlChar));
2994                 if (tmp == NULL) {
2995                     xmlFree(buffer);
2996                     xmlErrMemory(ctxt, NULL);
2997                     return(NULL);
2998                 }
2999                 buffer = tmp;
3000             }
3001             buffer[len++] = c;
3002             c = *cur++;
3003         }
3004         buffer[len] = 0;
3005     }
3006
3007     if ((c == ':') && (*cur == 0)) {
3008         if (buffer != NULL)
3009             xmlFree(buffer);
3010         *prefix = NULL;
3011         return(xmlStrdup(name));
3012     }
3013
3014     if (buffer == NULL)
3015         ret = xmlStrndup(buf, len);
3016     else {
3017         ret = buffer;
3018         buffer = NULL;
3019         max = XML_MAX_NAMELEN;
3020     }
3021
3022
3023     if (c == ':') {
3024         c = *cur;
3025         *prefix = ret;
3026         if (c == 0) {
3027             return(xmlStrndup(BAD_CAST "", 0));
3028         }
3029         len = 0;
3030
3031         /*
3032          * Check that the first character is proper to start
3033          * a new name
3034          */
3035         if (!(((c >= 0x61) && (c <= 0x7A)) ||
3036               ((c >= 0x41) && (c <= 0x5A)) ||
3037               (c == '_') || (c == ':'))) {
3038             int l;
3039             int first = CUR_SCHAR(cur, l);
3040
3041             if (!IS_LETTER(first) && (first != '_')) {
3042                 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3043                             "Name %s is not XML Namespace compliant\n",
3044                                   name);
3045             }
3046         }
3047         cur++;
3048
3049         while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3050             buf[len++] = c;
3051             c = *cur++;
3052         }
3053         if (len >= max) {
3054             /*
3055              * Okay someone managed to make a huge name, so he's ready to pay
3056              * for the processing speed.
3057              */
3058             max = len * 2;
3059
3060             buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3061             if (buffer == NULL) {
3062                 xmlErrMemory(ctxt, NULL);
3063                 return(NULL);
3064             }
3065             memcpy(buffer, buf, len);
3066             while (c != 0) { /* tested bigname2.xml */
3067                 if (len + 10 > max) {
3068                     xmlChar *tmp;
3069
3070                     max *= 2;
3071                     tmp = (xmlChar *) xmlRealloc(buffer,
3072                                                     max * sizeof(xmlChar));
3073                     if (tmp == NULL) {
3074                         xmlErrMemory(ctxt, NULL);
3075                         xmlFree(buffer);
3076                         return(NULL);
3077                     }
3078                     buffer = tmp;
3079                 }
3080                 buffer[len++] = c;
3081                 c = *cur++;
3082             }
3083             buffer[len] = 0;
3084         }
3085
3086         if (buffer == NULL)
3087             ret = xmlStrndup(buf, len);
3088         else {
3089             ret = buffer;
3090         }
3091     }
3092
3093     return(ret);
3094 }
3095
3096 /************************************************************************
3097  *                                                                      *
3098  *                      The parser itself                               *
3099  *      Relates to http://www.w3.org/TR/REC-xml                         *
3100  *                                                                      *
3101  ************************************************************************/
3102
3103 /************************************************************************
3104  *                                                                      *
3105  *      Routines to parse Name, NCName and NmToken                      *
3106  *                                                                      *
3107  ************************************************************************/
3108 #ifdef DEBUG
3109 static unsigned long nbParseName = 0;
3110 static unsigned long nbParseNmToken = 0;
3111 static unsigned long nbParseNCName = 0;
3112 static unsigned long nbParseNCNameComplex = 0;
3113 static unsigned long nbParseNameComplex = 0;
3114 static unsigned long nbParseStringName = 0;
3115 #endif
3116
3117 /*
3118  * The two following functions are related to the change of accepted
3119  * characters for Name and NmToken in the Revision 5 of XML-1.0
3120  * They correspond to the modified production [4] and the new production [4a]
3121  * changes in that revision. Also note that the macros used for the
3122  * productions Letter, Digit, CombiningChar and Extender are not needed
3123  * anymore.
3124  * We still keep compatibility to pre-revision5 parsing semantic if the
3125  * new XML_PARSE_OLD10 option is given to the parser.
3126  */
3127 static int
3128 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3129     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3130         /*
3131          * Use the new checks of production [4] [4a] amd [5] of the
3132          * Update 5 of XML-1.0
3133          */
3134         if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3135             (((c >= 'a') && (c <= 'z')) ||
3136              ((c >= 'A') && (c <= 'Z')) ||
3137              (c == '_') || (c == ':') ||
3138              ((c >= 0xC0) && (c <= 0xD6)) ||
3139              ((c >= 0xD8) && (c <= 0xF6)) ||
3140              ((c >= 0xF8) && (c <= 0x2FF)) ||
3141              ((c >= 0x370) && (c <= 0x37D)) ||
3142              ((c >= 0x37F) && (c <= 0x1FFF)) ||
3143              ((c >= 0x200C) && (c <= 0x200D)) ||
3144              ((c >= 0x2070) && (c <= 0x218F)) ||
3145              ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3146              ((c >= 0x3001) && (c <= 0xD7FF)) ||
3147              ((c >= 0xF900) && (c <= 0xFDCF)) ||
3148              ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3149              ((c >= 0x10000) && (c <= 0xEFFFF))))
3150             return(1);
3151     } else {
3152         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3153             return(1);
3154     }
3155     return(0);
3156 }
3157
3158 static int
3159 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3160     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161         /*
3162          * Use the new checks of production [4] [4a] amd [5] of the
3163          * Update 5 of XML-1.0
3164          */
3165         if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3166             (((c >= 'a') && (c <= 'z')) ||
3167              ((c >= 'A') && (c <= 'Z')) ||
3168              ((c >= '0') && (c <= '9')) || /* !start */
3169              (c == '_') || (c == ':') ||
3170              (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3171              ((c >= 0xC0) && (c <= 0xD6)) ||
3172              ((c >= 0xD8) && (c <= 0xF6)) ||
3173              ((c >= 0xF8) && (c <= 0x2FF)) ||
3174              ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3175              ((c >= 0x370) && (c <= 0x37D)) ||
3176              ((c >= 0x37F) && (c <= 0x1FFF)) ||
3177              ((c >= 0x200C) && (c <= 0x200D)) ||
3178              ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3179              ((c >= 0x2070) && (c <= 0x218F)) ||
3180              ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3181              ((c >= 0x3001) && (c <= 0xD7FF)) ||
3182              ((c >= 0xF900) && (c <= 0xFDCF)) ||
3183              ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3184              ((c >= 0x10000) && (c <= 0xEFFFF))))
3185              return(1);
3186     } else {
3187         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3188             (c == '.') || (c == '-') ||
3189             (c == '_') || (c == ':') ||
3190             (IS_COMBINING(c)) ||
3191             (IS_EXTENDER(c)))
3192             return(1);
3193     }
3194     return(0);
3195 }
3196
3197 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3198                                           int *len, int *alloc, int normalize);
3199
3200 static const xmlChar *
3201 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3202     int len = 0, l;
3203     int c;
3204     int count = 0;
3205
3206 #ifdef DEBUG
3207     nbParseNameComplex++;
3208 #endif
3209
3210     /*
3211      * Handler for more complex cases
3212      */
3213     GROW;
3214     if (ctxt->instate == XML_PARSER_EOF)
3215         return(NULL);
3216     c = CUR_CHAR(l);
3217     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3218         /*
3219          * Use the new checks of production [4] [4a] amd [5] of the
3220          * Update 5 of XML-1.0
3221          */
3222         if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3223             (!(((c >= 'a') && (c <= 'z')) ||
3224                ((c >= 'A') && (c <= 'Z')) ||
3225                (c == '_') || (c == ':') ||
3226                ((c >= 0xC0) && (c <= 0xD6)) ||
3227                ((c >= 0xD8) && (c <= 0xF6)) ||
3228                ((c >= 0xF8) && (c <= 0x2FF)) ||
3229                ((c >= 0x370) && (c <= 0x37D)) ||
3230                ((c >= 0x37F) && (c <= 0x1FFF)) ||
3231                ((c >= 0x200C) && (c <= 0x200D)) ||
3232                ((c >= 0x2070) && (c <= 0x218F)) ||
3233                ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3234                ((c >= 0x3001) && (c <= 0xD7FF)) ||
3235                ((c >= 0xF900) && (c <= 0xFDCF)) ||
3236                ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3237                ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3238             return(NULL);
3239         }
3240         len += l;
3241         NEXTL(l);
3242         c = CUR_CHAR(l);
3243         while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3244                (((c >= 'a') && (c <= 'z')) ||
3245                 ((c >= 'A') && (c <= 'Z')) ||
3246                 ((c >= '0') && (c <= '9')) || /* !start */
3247                 (c == '_') || (c == ':') ||
3248                 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3249                 ((c >= 0xC0) && (c <= 0xD6)) ||
3250                 ((c >= 0xD8) && (c <= 0xF6)) ||
3251                 ((c >= 0xF8) && (c <= 0x2FF)) ||
3252                 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3253                 ((c >= 0x370) && (c <= 0x37D)) ||
3254                 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3255                 ((c >= 0x200C) && (c <= 0x200D)) ||
3256                 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3257                 ((c >= 0x2070) && (c <= 0x218F)) ||
3258                 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259                 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260                 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261                 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262                 ((c >= 0x10000) && (c <= 0xEFFFF))
3263                 )) {
3264             if (count++ > XML_PARSER_CHUNK_SIZE) {
3265                 count = 0;
3266                 GROW;
3267                 if (ctxt->instate == XML_PARSER_EOF)
3268                     return(NULL);
3269             }
3270             len += l;
3271             NEXTL(l);
3272             c = CUR_CHAR(l);
3273         }
3274     } else {
3275         if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3276             (!IS_LETTER(c) && (c != '_') &&
3277              (c != ':'))) {
3278             return(NULL);
3279         }
3280         len += l;
3281         NEXTL(l);
3282         c = CUR_CHAR(l);
3283
3284         while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3285                ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3286                 (c == '.') || (c == '-') ||
3287                 (c == '_') || (c == ':') ||
3288                 (IS_COMBINING(c)) ||
3289                 (IS_EXTENDER(c)))) {
3290             if (count++ > XML_PARSER_CHUNK_SIZE) {
3291                 count = 0;
3292                 GROW;
3293                 if (ctxt->instate == XML_PARSER_EOF)
3294                     return(NULL);
3295             }
3296             len += l;
3297             NEXTL(l);
3298             c = CUR_CHAR(l);
3299         }
3300     }
3301     if ((len > XML_MAX_NAME_LENGTH) &&
3302         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3303         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3304         return(NULL);
3305     }
3306     if (ctxt->input->cur - ctxt->input->base < len) {
3307         /*
3308          * There were a couple of bugs where PERefs lead to to a change
3309          * of the buffer. Check the buffer size to avoid passing an invalid
3310          * pointer to xmlDictLookup.
3311          */
3312         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3313                     "unexpected change of input buffer");
3314         return (NULL);
3315     }
3316     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3317         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3318     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3319 }
3320
3321 /**
3322  * xmlParseName:
3323  * @ctxt:  an XML parser context
3324  *
3325  * parse an XML name.
3326  *
3327  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3328  *                  CombiningChar | Extender
3329  *
3330  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3331  *
3332  * [6] Names ::= Name (#x20 Name)*
3333  *
3334  * Returns the Name parsed or NULL
3335  */
3336
3337 const xmlChar *
3338 xmlParseName(xmlParserCtxtPtr ctxt) {
3339     const xmlChar *in;
3340     const xmlChar *ret;
3341     int count = 0;
3342
3343     GROW;
3344
3345 #ifdef DEBUG
3346     nbParseName++;
3347 #endif
3348
3349     /*
3350      * Accelerator for simple ASCII names
3351      */
3352     in = ctxt->input->cur;
3353     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3354         ((*in >= 0x41) && (*in <= 0x5A)) ||
3355         (*in == '_') || (*in == ':')) {
3356         in++;
3357         while (((*in >= 0x61) && (*in <= 0x7A)) ||
3358                ((*in >= 0x41) && (*in <= 0x5A)) ||
3359                ((*in >= 0x30) && (*in <= 0x39)) ||
3360                (*in == '_') || (*in == '-') ||
3361                (*in == ':') || (*in == '.'))
3362             in++;
3363         if ((*in > 0) && (*in < 0x80)) {
3364             count = in - ctxt->input->cur;
3365             if ((count > XML_MAX_NAME_LENGTH) &&
3366                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3367                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3368                 return(NULL);
3369             }
3370             ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3371             ctxt->input->cur = in;
3372             ctxt->input->col += count;
3373             if (ret == NULL)
3374                 xmlErrMemory(ctxt, NULL);
3375             return(ret);
3376         }
3377     }
3378     /* accelerator for special cases */
3379     return(xmlParseNameComplex(ctxt));
3380 }
3381
3382 static const xmlChar *
3383 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3384     int len = 0, l;
3385     int c;
3386     int count = 0;
3387     size_t startPosition = 0;
3388
3389 #ifdef DEBUG
3390     nbParseNCNameComplex++;
3391 #endif
3392
3393     /*
3394      * Handler for more complex cases
3395      */
3396     GROW;
3397     startPosition = CUR_PTR - BASE_PTR;
3398     c = CUR_CHAR(l);
3399     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3400         (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3401         return(NULL);
3402     }
3403
3404     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3405            (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3406         if (count++ > XML_PARSER_CHUNK_SIZE) {
3407             if ((len > XML_MAX_NAME_LENGTH) &&
3408                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3409                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3410                 return(NULL);
3411             }
3412             count = 0;
3413             GROW;
3414             if (ctxt->instate == XML_PARSER_EOF)
3415                 return(NULL);
3416         }
3417         len += l;
3418         NEXTL(l);
3419         c = CUR_CHAR(l);
3420         if (c == 0) {
3421             count = 0;
3422             /*
3423              * when shrinking to extend the buffer we really need to preserve
3424              * the part of the name we already parsed. Hence rolling back
3425              * by current length.
3426              */
3427             ctxt->input->cur -= l;
3428             GROW;
3429             if (ctxt->instate == XML_PARSER_EOF)
3430                 return(NULL);
3431             ctxt->input->cur += l;
3432             c = CUR_CHAR(l);
3433         }
3434     }
3435     if ((len > XML_MAX_NAME_LENGTH) &&
3436         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3437         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3438         return(NULL);
3439     }
3440     return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3441 }
3442
3443 /**
3444  * xmlParseNCName:
3445  * @ctxt:  an XML parser context
3446  * @len:  length of the string parsed
3447  *
3448  * parse an XML name.
3449  *
3450  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3451  *                      CombiningChar | Extender
3452  *
3453  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3454  *
3455  * Returns the Name parsed or NULL
3456  */
3457
3458 static const xmlChar *
3459 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3460     const xmlChar *in, *e;
3461     const xmlChar *ret;
3462     int count = 0;
3463
3464 #ifdef DEBUG
3465     nbParseNCName++;
3466 #endif
3467
3468     /*
3469      * Accelerator for simple ASCII names
3470      */
3471     in = ctxt->input->cur;
3472     e = ctxt->input->end;
3473     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3474          ((*in >= 0x41) && (*in <= 0x5A)) ||
3475          (*in == '_')) && (in < e)) {
3476         in++;
3477         while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3478                 ((*in >= 0x41) && (*in <= 0x5A)) ||
3479                 ((*in >= 0x30) && (*in <= 0x39)) ||
3480                 (*in == '_') || (*in == '-') ||
3481                 (*in == '.')) && (in < e))
3482             in++;
3483         if (in >= e)
3484             goto complex;
3485         if ((*in > 0) && (*in < 0x80)) {
3486             count = in - ctxt->input->cur;
3487             if ((count > XML_MAX_NAME_LENGTH) &&
3488                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3489                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3490                 return(NULL);
3491             }
3492             ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3493             ctxt->input->cur = in;
3494             ctxt->input->col += count;
3495             if (ret == NULL) {
3496                 xmlErrMemory(ctxt, NULL);
3497             }
3498             return(ret);
3499         }
3500     }
3501 complex:
3502     return(xmlParseNCNameComplex(ctxt));
3503 }
3504
3505 /**
3506  * xmlParseNameAndCompare:
3507  * @ctxt:  an XML parser context
3508  *
3509  * parse an XML name and compares for match
3510  * (specialized for endtag parsing)
3511  *
3512  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3513  * and the name for mismatch
3514  */
3515
3516 static const xmlChar *
3517 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3518     register const xmlChar *cmp = other;
3519     register const xmlChar *in;
3520     const xmlChar *ret;
3521
3522     GROW;
3523     if (ctxt->instate == XML_PARSER_EOF)
3524         return(NULL);
3525
3526     in = ctxt->input->cur;
3527     while (*in != 0 && *in == *cmp) {
3528         ++in;
3529         ++cmp;
3530     }
3531     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3532         /* success */
3533         ctxt->input->col += in - ctxt->input->cur;
3534         ctxt->input->cur = in;
3535         return (const xmlChar*) 1;
3536     }
3537     /* failure (or end of input buffer), check with full function */
3538     ret = xmlParseName (ctxt);
3539     /* strings coming from the dictionary direct compare possible */
3540     if (ret == other) {
3541         return (const xmlChar*) 1;
3542     }
3543     return ret;
3544 }
3545
3546 /**
3547  * xmlParseStringName:
3548  * @ctxt:  an XML parser context
3549  * @str:  a pointer to the string pointer (IN/OUT)
3550  *
3551  * parse an XML name.
3552  *
3553  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3554  *                  CombiningChar | Extender
3555  *
3556  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3557  *
3558  * [6] Names ::= Name (#x20 Name)*
3559  *
3560  * Returns the Name parsed or NULL. The @str pointer
3561  * is updated to the current location in the string.
3562  */
3563
3564 static xmlChar *
3565 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3566     xmlChar buf[XML_MAX_NAMELEN + 5];
3567     const xmlChar *cur = *str;
3568     int len = 0, l;
3569     int c;
3570
3571 #ifdef DEBUG
3572     nbParseStringName++;
3573 #endif
3574
3575     c = CUR_SCHAR(cur, l);
3576     if (!xmlIsNameStartChar(ctxt, c)) {
3577         return(NULL);
3578     }
3579
3580     COPY_BUF(l,buf,len,c);
3581     cur += l;
3582     c = CUR_SCHAR(cur, l);
3583     while (xmlIsNameChar(ctxt, c)) {
3584         COPY_BUF(l,buf,len,c);
3585         cur += l;
3586         c = CUR_SCHAR(cur, l);
3587         if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3588             /*
3589              * Okay someone managed to make a huge name, so he's ready to pay
3590              * for the processing speed.
3591              */
3592             xmlChar *buffer;
3593             int max = len * 2;
3594
3595             buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3596             if (buffer == NULL) {
3597                 xmlErrMemory(ctxt, NULL);
3598                 return(NULL);
3599             }
3600             memcpy(buffer, buf, len);
3601             while (xmlIsNameChar(ctxt, c)) {
3602                 if (len + 10 > max) {
3603                     xmlChar *tmp;
3604
3605                     if ((len > XML_MAX_NAME_LENGTH) &&
3606                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3607                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3608                         xmlFree(buffer);
3609                         return(NULL);
3610                     }
3611                     max *= 2;
3612                     tmp = (xmlChar *) xmlRealloc(buffer,
3613                                                     max * sizeof(xmlChar));
3614                     if (tmp == NULL) {
3615                         xmlErrMemory(ctxt, NULL);
3616                         xmlFree(buffer);
3617                         return(NULL);
3618                     }
3619                     buffer = tmp;
3620                 }
3621                 COPY_BUF(l,buffer,len,c);
3622                 cur += l;
3623                 c = CUR_SCHAR(cur, l);
3624             }
3625             buffer[len] = 0;
3626             *str = cur;
3627             return(buffer);
3628         }
3629     }
3630     if ((len > XML_MAX_NAME_LENGTH) &&
3631         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3632         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3633         return(NULL);
3634     }
3635     *str = cur;
3636     return(xmlStrndup(buf, len));
3637 }
3638
3639 /**
3640  * xmlParseNmtoken:
3641  * @ctxt:  an XML parser context
3642  *
3643  * parse an XML Nmtoken.
3644  *
3645  * [7] Nmtoken ::= (NameChar)+
3646  *
3647  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3648  *
3649  * Returns the Nmtoken parsed or NULL
3650  */
3651
3652 xmlChar *
3653 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3654     xmlChar buf[XML_MAX_NAMELEN + 5];
3655     int len = 0, l;
3656     int c;
3657     int count = 0;
3658
3659 #ifdef DEBUG
3660     nbParseNmToken++;
3661 #endif
3662
3663     GROW;
3664     if (ctxt->instate == XML_PARSER_EOF)
3665         return(NULL);
3666     c = CUR_CHAR(l);
3667
3668     while (xmlIsNameChar(ctxt, c)) {
3669         if (count++ > XML_PARSER_CHUNK_SIZE) {
3670             count = 0;
3671             GROW;
3672         }
3673         COPY_BUF(l,buf,len,c);
3674         NEXTL(l);
3675         c = CUR_CHAR(l);
3676         if (c == 0) {
3677             count = 0;
3678             GROW;
3679             if (ctxt->instate == XML_PARSER_EOF)
3680                 return(NULL);
3681             c = CUR_CHAR(l);
3682         }
3683         if (len >= XML_MAX_NAMELEN) {
3684             /*
3685              * Okay someone managed to make a huge token, so he's ready to pay
3686              * for the processing speed.
3687              */
3688             xmlChar *buffer;
3689             int max = len * 2;
3690
3691             buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3692             if (buffer == NULL) {
3693                 xmlErrMemory(ctxt, NULL);
3694                 return(NULL);
3695             }
3696             memcpy(buffer, buf, len);
3697             while (xmlIsNameChar(ctxt, c)) {
3698                 if (count++ > XML_PARSER_CHUNK_SIZE) {
3699                     count = 0;
3700                     GROW;
3701                     if (ctxt->instate == XML_PARSER_EOF) {
3702                         xmlFree(buffer);
3703                         return(NULL);
3704                     }
3705                 }
3706                 if (len + 10 > max) {
3707                     xmlChar *tmp;
3708
3709                     if ((max > XML_MAX_NAME_LENGTH) &&
3710                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3711                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3712                         xmlFree(buffer);
3713                         return(NULL);
3714                     }
3715                     max *= 2;
3716                     tmp = (xmlChar *) xmlRealloc(buffer,
3717                                                     max * sizeof(xmlChar));
3718                     if (tmp == NULL) {
3719                         xmlErrMemory(ctxt, NULL);
3720                         xmlFree(buffer);
3721                         return(NULL);
3722                     }
3723                     buffer = tmp;
3724                 }
3725                 COPY_BUF(l,buffer,len,c);
3726                 NEXTL(l);
3727                 c = CUR_CHAR(l);
3728             }
3729             buffer[len] = 0;
3730             return(buffer);
3731         }
3732     }
3733     if (len == 0)
3734         return(NULL);
3735     if ((len > XML_MAX_NAME_LENGTH) &&
3736         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3737         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3738         return(NULL);
3739     }
3740     return(xmlStrndup(buf, len));
3741 }
3742
3743 /**
3744  * xmlParseEntityValue:
3745  * @ctxt:  an XML parser context
3746  * @orig:  if non-NULL store a copy of the original entity value
3747  *
3748  * parse a value for ENTITY declarations
3749  *
3750  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3751  *                     "'" ([^%&'] | PEReference | Reference)* "'"
3752  *
3753  * Returns the EntityValue parsed with reference substituted or NULL
3754  */
3755
3756 xmlChar *
3757 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3758     xmlChar *buf = NULL;
3759     int len = 0;
3760     int size = XML_PARSER_BUFFER_SIZE;
3761     int c, l;
3762     xmlChar stop;
3763     xmlChar *ret = NULL;
3764     const xmlChar *cur = NULL;
3765     xmlParserInputPtr input;
3766
3767     if (RAW == '"') stop = '"';
3768     else if (RAW == '\'') stop = '\'';
3769     else {
3770         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3771         return(NULL);
3772     }
3773     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3774     if (buf == NULL) {
3775         xmlErrMemory(ctxt, NULL);
3776         return(NULL);
3777     }
3778
3779     /*
3780      * The content of the entity definition is copied in a buffer.
3781      */
3782
3783     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3784     input = ctxt->input;
3785     GROW;
3786     if (ctxt->instate == XML_PARSER_EOF)
3787         goto error;
3788     NEXT;
3789     c = CUR_CHAR(l);
3790     /*
3791      * NOTE: 4.4.5 Included in Literal
3792      * When a parameter entity reference appears in a literal entity
3793      * value, ... a single or double quote character in the replacement
3794      * text is always treated as a normal data character and will not
3795      * terminate the literal.
3796      * In practice it means we stop the loop only when back at parsing
3797      * the initial entity and the quote is found
3798      */
3799     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3800             (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3801         if (len + 5 >= size) {
3802             xmlChar *tmp;
3803
3804             size *= 2;
3805             tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3806             if (tmp == NULL) {
3807                 xmlErrMemory(ctxt, NULL);
3808                 goto error;
3809             }
3810             buf = tmp;
3811         }
3812         COPY_BUF(l,buf,len,c);
3813         NEXTL(l);
3814
3815         GROW;
3816         c = CUR_CHAR(l);
3817         if (c == 0) {
3818             GROW;
3819             c = CUR_CHAR(l);
3820         }
3821     }
3822     buf[len] = 0;
3823     if (ctxt->instate == XML_PARSER_EOF)
3824         goto error;
3825     if (c != stop) {
3826         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3827         goto error;
3828     }
3829     NEXT;
3830
3831     /*
3832      * Raise problem w.r.t. '&' and '%' being used in non-entities
3833      * reference constructs. Note Charref will be handled in
3834      * xmlStringDecodeEntities()
3835      */
3836     cur = buf;
3837     while (*cur != 0) { /* non input consuming */
3838         if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3839             xmlChar *name;
3840             xmlChar tmp = *cur;
3841             int nameOk = 0;
3842
3843             cur++;
3844             name = xmlParseStringName(ctxt, &cur);
3845             if (name != NULL) {
3846                 nameOk = 1;
3847                 xmlFree(name);
3848             }
3849             if ((nameOk == 0) || (*cur != ';')) {
3850                 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3851             "EntityValue: '%c' forbidden except for entities references\n",
3852                                   tmp);
3853                 goto error;
3854             }
3855             if ((tmp == '%') && (ctxt->inSubset == 1) &&
3856                 (ctxt->inputNr == 1)) {
3857                 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3858                 goto error;
3859             }
3860             if (*cur == 0)
3861                 break;
3862         }
3863         cur++;
3864     }
3865
3866     /*
3867      * Then PEReference entities are substituted.
3868      *
3869      * NOTE: 4.4.7 Bypassed
3870      * When a general entity reference appears in the EntityValue in
3871      * an entity declaration, it is bypassed and left as is.
3872      * so XML_SUBSTITUTE_REF is not set here.
3873      */
3874     ++ctxt->depth;
3875     ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3876                                   0, 0, 0);
3877     --ctxt->depth;
3878     if (orig != NULL) {
3879         *orig = buf;
3880         buf = NULL;
3881     }
3882
3883 error:
3884     if (buf != NULL)
3885         xmlFree(buf);
3886     return(ret);
3887 }
3888
3889 /**
3890  * xmlParseAttValueComplex:
3891  * @ctxt:  an XML parser context
3892  * @len:   the resulting attribute len
3893  * @normalize:  whether to apply the inner normalization
3894  *
3895  * parse a value for an attribute, this is the fallback function
3896  * of xmlParseAttValue() when the attribute parsing requires handling
3897  * of non-ASCII characters, or normalization compaction.
3898  *
3899  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3900  */
3901 static xmlChar *
3902 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3903     xmlChar limit = 0;
3904     xmlChar *buf = NULL;
3905     xmlChar *rep = NULL;
3906     size_t len = 0;
3907     size_t buf_size = 0;
3908     int c, l, in_space = 0;
3909     xmlChar *current = NULL;
3910     xmlEntityPtr ent;
3911
3912     if (NXT(0) == '"') {
3913         ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3914         limit = '"';
3915         NEXT;
3916     } else if (NXT(0) == '\'') {
3917         limit = '\'';
3918         ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3919         NEXT;
3920     } else {
3921         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3922         return(NULL);
3923     }
3924
3925     /*
3926      * allocate a translation buffer.
3927      */
3928     buf_size = XML_PARSER_BUFFER_SIZE;
3929     buf = (xmlChar *) xmlMallocAtomic(buf_size);
3930     if (buf == NULL) goto mem_error;
3931
3932     /*
3933      * OK loop until we reach one of the ending char or a size limit.
3934      */
3935     c = CUR_CHAR(l);
3936     while (((NXT(0) != limit) && /* checked */
3937             (IS_CHAR(c)) && (c != '<')) &&
3938             (ctxt->instate != XML_PARSER_EOF)) {
3939         /*
3940          * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3941          * special option is given
3942          */
3943         if ((len > XML_MAX_TEXT_LENGTH) &&
3944             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3945             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3946                            "AttValue length too long\n");
3947             goto mem_error;
3948         }
3949         if (c == '&') {
3950             in_space = 0;
3951             if (NXT(1) == '#') {
3952                 int val = xmlParseCharRef(ctxt);
3953
3954                 if (val == '&') {
3955                     if (ctxt->replaceEntities) {
3956                         if (len + 10 > buf_size) {
3957                             growBuffer(buf, 10);
3958                         }
3959                         buf[len++] = '&';
3960                     } else {
3961                         /*
3962                          * The reparsing will be done in xmlStringGetNodeList()
3963                          * called by the attribute() function in SAX.c
3964                          */
3965                         if (len + 10 > buf_size) {
3966                             growBuffer(buf, 10);
3967                         }
3968                         buf[len++] = '&';
3969                         buf[len++] = '#';
3970                         buf[len++] = '3';
3971                         buf[len++] = '8';
3972                         buf[len++] = ';';
3973                     }
3974                 } else if (val != 0) {
3975                     if (len + 10 > buf_size) {
3976                         growBuffer(buf, 10);
3977                     }
3978                     len += xmlCopyChar(0, &buf[len], val);
3979                 }
3980             } else {
3981                 ent = xmlParseEntityRef(ctxt);
3982                 ctxt->nbentities++;
3983                 if (ent != NULL)
3984                     ctxt->nbentities += ent->owner;
3985                 if ((ent != NULL) &&
3986                     (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3987                     if (len + 10 > buf_size) {
3988                         growBuffer(buf, 10);
3989                     }
3990                     if ((ctxt->replaceEntities == 0) &&
3991                         (ent->content[0] == '&')) {
3992                         buf[len++] = '&';
3993                         buf[len++] = '#';
3994                         buf[len++] = '3';
3995                         buf[len++] = '8';
3996                         buf[len++] = ';';
3997                     } else {
3998                         buf[len++] = ent->content[0];
3999                     }
4000                 } else if ((ent != NULL) &&
4001                            (ctxt->replaceEntities != 0)) {
4002                     if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4003                         ++ctxt->depth;
4004                         rep = xmlStringDecodeEntities(ctxt, ent->content,
4005                                                       XML_SUBSTITUTE_REF,
4006                                                       0, 0, 0);
4007                         --ctxt->depth;
4008                         if (rep != NULL) {
4009                             current = rep;
4010                             while (*current != 0) { /* non input consuming */
4011                                 if ((*current == 0xD) || (*current == 0xA) ||
4012                                     (*current == 0x9)) {
4013                                     buf[len++] = 0x20;
4014                                     current++;
4015                                 } else
4016                                     buf[len++] = *current++;
4017                                 if (len + 10 > buf_size) {
4018                                     growBuffer(buf, 10);
4019                                 }
4020                             }
4021                             xmlFree(rep);
4022                             rep = NULL;
4023                         }
4024                     } else {
4025                         if (len + 10 > buf_size) {
4026                             growBuffer(buf, 10);
4027                         }
4028                         if (ent->content != NULL)
4029                             buf[len++] = ent->content[0];
4030                     }
4031                 } else if (ent != NULL) {
4032                     int i = xmlStrlen(ent->name);
4033                     const xmlChar *cur = ent->name;
4034
4035                     /*
4036                      * This may look absurd but is needed to detect
4037                      * entities problems
4038                      */
4039                     if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4040                         (ent->content != NULL) && (ent->checked == 0)) {
4041                         unsigned long oldnbent = ctxt->nbentities, diff;
4042
4043                         ++ctxt->depth;
4044                         rep = xmlStringDecodeEntities(ctxt, ent->content,
4045                                                   XML_SUBSTITUTE_REF, 0, 0, 0);
4046                         --ctxt->depth;
4047
4048                         diff = ctxt->nbentities - oldnbent + 1;
4049                         if (diff > INT_MAX / 2)
4050                             diff = INT_MAX / 2;
4051                         ent->checked = diff * 2;
4052                         if (rep != NULL) {
4053                             if (xmlStrchr(rep, '<'))
4054                                 ent->checked |= 1;
4055                             xmlFree(rep);
4056                             rep = NULL;
4057                         } else {
4058                             ent->content[0] = 0;
4059                         }
4060                     }
4061
4062                     /*
4063                      * Just output the reference
4064                      */
4065                     buf[len++] = '&';
4066                     while (len + i + 10 > buf_size) {
4067                         growBuffer(buf, i + 10);
4068                     }
4069                     for (;i > 0;i--)
4070                         buf[len++] = *cur++;
4071                     buf[len++] = ';';
4072                 }
4073             }
4074         } else {
4075             if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4076                 if ((len != 0) || (!normalize)) {
4077                     if ((!normalize) || (!in_space)) {
4078                         COPY_BUF(l,buf,len,0x20);
4079                         while (len + 10 > buf_size) {
4080                             growBuffer(buf, 10);
4081                         }
4082                     }
4083                     in_space = 1;
4084                 }
4085             } else {
4086                 in_space = 0;
4087                 COPY_BUF(l,buf,len,c);
4088                 if (len + 10 > buf_size) {
4089                     growBuffer(buf, 10);
4090                 }
4091             }
4092             NEXTL(l);
4093         }
4094         GROW;
4095         c = CUR_CHAR(l);
4096     }
4097     if (ctxt->instate == XML_PARSER_EOF)
4098         goto error;
4099
4100     if ((in_space) && (normalize)) {
4101         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4102     }
4103     buf[len] = 0;
4104     if (RAW == '<') {
4105         xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4106     } else if (RAW != limit) {
4107         if ((c != 0) && (!IS_CHAR(c))) {
4108             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4109                            "invalid character in attribute value\n");
4110         } else {
4111             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4112                            "AttValue: ' expected\n");
4113         }
4114     } else
4115         NEXT;
4116
4117     /*
4118      * There we potentially risk an overflow, don't allow attribute value of
4119      * length more than INT_MAX it is a very reasonable assumption !
4120      */
4121     if (len >= INT_MAX) {
4122         xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4123                        "AttValue length too long\n");
4124         goto mem_error;
4125     }
4126
4127     if (attlen != NULL) *attlen = (int) len;
4128     return(buf);
4129
4130 mem_error:
4131     xmlErrMemory(ctxt, NULL);
4132 error:
4133     if (buf != NULL)
4134         xmlFree(buf);
4135     if (rep != NULL)
4136         xmlFree(rep);
4137     return(NULL);
4138 }
4139
4140 /**
4141  * xmlParseAttValue:
4142  * @ctxt:  an XML parser context
4143  *
4144  * parse a value for an attribute
4145  * Note: the parser won't do substitution of entities here, this
4146  * will be handled later in xmlStringGetNodeList
4147  *
4148  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4149  *                   "'" ([^<&'] | Reference)* "'"
4150  *
4151  * 3.3.3 Attribute-Value Normalization:
4152  * Before the value of an attribute is passed to the application or
4153  * checked for validity, the XML processor must normalize it as follows:
4154  * - a character reference is processed by appending the referenced
4155  *   character to the attribute value
4156  * - an entity reference is processed by recursively processing the
4157  *   replacement text of the entity
4158  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4159  *   appending #x20 to the normalized value, except that only a single
4160  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4161  *   parsed entity or the literal entity value of an internal parsed entity
4162  * - other characters are processed by appending them to the normalized value
4163  * If the declared value is not CDATA, then the XML processor must further
4164  * process the normalized attribute value by discarding any leading and
4165  * trailing space (#x20) characters, and by replacing sequences of space
4166  * (#x20) characters by a single space (#x20) character.
4167  * All attributes for which no declaration has been read should be treated
4168  * by a non-validating parser as if declared CDATA.
4169  *
4170  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4171  */
4172
4173
4174 xmlChar *
4175 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4176     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4177     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4178 }
4179
4180 /**
4181  * xmlParseSystemLiteral:
4182  * @ctxt:  an XML parser context
4183  *
4184  * parse an XML Literal
4185  *
4186  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4187  *
4188  * Returns the SystemLiteral parsed or NULL
4189  */
4190
4191 xmlChar *
4192 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4193     xmlChar *buf = NULL;
4194     int len = 0;
4195     int size = XML_PARSER_BUFFER_SIZE;
4196     int cur, l;
4197     xmlChar stop;
4198     int state = ctxt->instate;
4199     int count = 0;
4200
4201     SHRINK;
4202     if (RAW == '"') {
4203         NEXT;
4204         stop = '"';
4205     } else if (RAW == '\'') {
4206         NEXT;
4207         stop = '\'';
4208     } else {
4209         xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4210         return(NULL);
4211     }
4212
4213     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4214     if (buf == NULL) {
4215         xmlErrMemory(ctxt, NULL);
4216         return(NULL);
4217     }
4218     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4219     cur = CUR_CHAR(l);
4220     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4221         if (len + 5 >= size) {
4222             xmlChar *tmp;
4223
4224             if ((size > XML_MAX_NAME_LENGTH) &&
4225                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4226                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4227                 xmlFree(buf);
4228                 ctxt->instate = (xmlParserInputState) state;
4229                 return(NULL);
4230             }
4231             size *= 2;
4232             tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4233             if (tmp == NULL) {
4234                 xmlFree(buf);
4235                 xmlErrMemory(ctxt, NULL);
4236                 ctxt->instate = (xmlParserInputState) state;
4237                 return(NULL);
4238             }
4239             buf = tmp;
4240         }
4241         count++;
4242         if (count > 50) {
4243             SHRINK;
4244             GROW;
4245             count = 0;
4246             if (ctxt->instate == XML_PARSER_EOF) {
4247                 xmlFree(buf);
4248                 return(NULL);
4249             }
4250         }
4251         COPY_BUF(l,buf,len,cur);
4252         NEXTL(l);
4253         cur = CUR_CHAR(l);
4254         if (cur == 0) {
4255             GROW;
4256             SHRINK;
4257             cur = CUR_CHAR(l);
4258         }
4259     }
4260     buf[len] = 0;
4261     ctxt->instate = (xmlParserInputState) state;
4262     if (!IS_CHAR(cur)) {
4263         xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4264     } else {
4265         NEXT;
4266     }
4267     return(buf);
4268 }
4269
4270 /**
4271  * xmlParsePubidLiteral:
4272  * @ctxt:  an XML parser context
4273  *
4274  * parse an XML public literal
4275  *
4276  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4277  *
4278  * Returns the PubidLiteral parsed or NULL.
4279  */
4280
4281 xmlChar *
4282 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4283     xmlChar *buf = NULL;
4284     int len = 0;
4285     int size = XML_PARSER_BUFFER_SIZE;
4286     xmlChar cur;
4287     xmlChar stop;
4288     int count = 0;
4289     xmlParserInputState oldstate = ctxt->instate;
4290
4291     SHRINK;
4292     if (RAW == '"') {
4293         NEXT;
4294         stop = '"';
4295     } else if (RAW == '\'') {
4296         NEXT;
4297         stop = '\'';
4298     } else {
4299         xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4300         return(NULL);
4301     }
4302     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4303     if (buf == NULL) {
4304         xmlErrMemory(ctxt, NULL);
4305         return(NULL);
4306     }
4307     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4308     cur = CUR;
4309     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4310         if (len + 1 >= size) {
4311             xmlChar *tmp;
4312
4313             if ((size > XML_MAX_NAME_LENGTH) &&
4314                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4315                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4316                 xmlFree(buf);
4317                 return(NULL);
4318             }
4319             size *= 2;
4320             tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4321             if (tmp == NULL) {
4322                 xmlErrMemory(ctxt, NULL);
4323                 xmlFree(buf);
4324                 return(NULL);
4325             }
4326             buf = tmp;
4327         }
4328         buf[len++] = cur;
4329         count++;
4330         if (count > 50) {
4331             SHRINK;
4332             GROW;
4333             count = 0;
4334             if (ctxt->instate == XML_PARSER_EOF) {
4335                 xmlFree(buf);
4336                 return(NULL);
4337             }
4338         }
4339         NEXT;
4340         cur = CUR;
4341         if (cur == 0) {
4342             GROW;
4343             SHRINK;
4344             cur = CUR;
4345         }
4346     }
4347     buf[len] = 0;
4348     if (cur != stop) {
4349         xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4350     } else {
4351         NEXT;
4352     }
4353     ctxt->instate = oldstate;
4354     return(buf);
4355 }
4356
4357 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4358
4359 /*
4360  * used for the test in the inner loop of the char data testing
4361  */
4362 static const unsigned char test_char_data[256] = {
4363     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4365     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4368     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4369     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4370     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4371     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4372     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4373     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4374     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4375     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4376     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4377     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4378     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4379     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4380     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4395 };
4396
4397 /**
4398  * xmlParseCharData:
4399  * @ctxt:  an XML parser context
4400  * @cdata:  int indicating whether we are within a CDATA section
4401  *
4402  * parse a CharData section.
4403  * if we are within a CDATA section ']]>' marks an end of section.
4404  *
4405  * The right angle bracket (>) may be represented using the string "&gt;",
4406  * and must, for compatibility, be escaped using "&gt;" or a character
4407  * reference when it appears in the string "]]>" in content, when that
4408  * string is not marking the end of a CDATA section.
4409  *
4410  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4411  */
4412
4413 void
4414 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4415     const xmlChar *in;
4416     int nbchar = 0;
4417     int line = ctxt->input->line;
4418     int col = ctxt->input->col;
4419     int ccol;
4420
4421     SHRINK;
4422     GROW;
4423     /*
4424      * Accelerated common case where input don't need to be
4425      * modified before passing it to the handler.
4426      */
4427     if (!cdata) {
4428         in = ctxt->input->cur;
4429         do {
4430 get_more_space:
4431             while (*in == 0x20) { in++; ctxt->input->col++; }
4432             if (*in == 0xA) {
4433                 do {
4434                     ctxt->input->line++; ctxt->input->col = 1;
4435                     in++;
4436                 } while (*in == 0xA);
4437                 goto get_more_space;
4438             }
4439             if (*in == '<') {
4440                 nbchar = in - ctxt->input->cur;
4441                 if (nbchar > 0) {
4442                     const xmlChar *tmp = ctxt->input->cur;
4443                     ctxt->input->cur = in;
4444
4445                     if ((ctxt->sax != NULL) &&
4446                         (ctxt->sax->ignorableWhitespace !=
4447                          ctxt->sax->characters)) {
4448                         if (areBlanks(ctxt, tmp, nbchar, 1)) {
4449                             if (ctxt->sax->ignorableWhitespace != NULL)
4450                                 ctxt->sax->ignorableWhitespace(ctxt->userData,
4451                                                        tmp, nbchar);
4452                         } else {
4453                             if (ctxt->sax->characters != NULL)
4454                                 ctxt->sax->characters(ctxt->userData,
4455                                                       tmp, nbchar);
4456                             if (*ctxt->space == -1)
4457                                 *ctxt->space = -2;
4458                         }
4459                     } else if ((ctxt->sax != NULL) &&
4460                                (ctxt->sax->characters != NULL)) {
4461                         ctxt->sax->characters(ctxt->userData,
4462                                               tmp, nbchar);
4463                     }
4464                 }
4465                 return;
4466             }
4467
4468 get_more:
4469             ccol = ctxt->input->col;
4470             while (test_char_data[*in]) {
4471                 in++;
4472                 ccol++;
4473             }
4474             ctxt->input->col = ccol;
4475             if (*in == 0xA) {
4476                 do {
4477                     ctxt->input->line++; ctxt->input->col = 1;
4478                     in++;
4479                 } while (*in == 0xA);
4480                 goto get_more;
4481             }
4482             if (*in == ']') {
4483                 if ((in[1] == ']') && (in[2] == '>')) {
4484                     xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4485                     ctxt->input->cur = in + 1;
4486                     return;
4487                 }
4488                 in++;
4489                 ctxt->input->col++;
4490                 goto get_more;
4491             }
4492             nbchar = in - ctxt->input->cur;
4493             if (nbchar > 0) {
4494                 if ((ctxt->sax != NULL) &&
4495                     (ctxt->sax->ignorableWhitespace !=
4496                      ctxt->sax->characters) &&
4497                     (IS_BLANK_CH(*ctxt->input->cur))) {
4498                     const xmlChar *tmp = ctxt->input->cur;
4499                     ctxt->input->cur = in;
4500
4501                     if (areBlanks(ctxt, tmp, nbchar, 0)) {
4502                         if (ctxt->sax->ignorableWhitespace != NULL)
4503                             ctxt->sax->ignorableWhitespace(ctxt->userData,
4504                                                            tmp, nbchar);
4505                     } else {
4506                         if (ctxt->sax->characters != NULL)
4507                             ctxt->sax->characters(ctxt->userData,
4508                                                   tmp, nbchar);
4509                         if (*ctxt->space == -1)
4510                             *ctxt->space = -2;
4511                     }
4512                     line = ctxt->input->line;
4513                     col = ctxt->input->col;
4514                 } else if (ctxt->sax != NULL) {
4515                     if (ctxt->sax->characters != NULL)
4516                         ctxt->sax->characters(ctxt->userData,
4517                                               ctxt->input->cur, nbchar);
4518                     line = ctxt->input->line;
4519                     col = ctxt->input->col;
4520                 }
4521                 /* something really bad happened in the SAX callback */
4522                 if (ctxt->instate != XML_PARSER_CONTENT)
4523                     return;
4524             }
4525             ctxt->input->cur = in;
4526             if (*in == 0xD) {
4527                 in++;
4528                 if (*in == 0xA) {
4529                     ctxt->input->cur = in;
4530                     in++;
4531                     ctxt->input->line++; ctxt->input->col = 1;
4532                     continue; /* while */
4533                 }
4534                 in--;
4535             }
4536             if (*in == '<') {
4537                 return;
4538             }
4539             if (*in == '&') {
4540                 return;
4541             }
4542             SHRINK;
4543             GROW;
4544             if (ctxt->instate == XML_PARSER_EOF)
4545                 return;
4546             in = ctxt->input->cur;
4547         } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4548         nbchar = 0;
4549     }
4550     ctxt->input->line = line;
4551     ctxt->input->col = col;
4552     xmlParseCharDataComplex(ctxt, cdata);
4553 }
4554
4555 /**
4556  * xmlParseCharDataComplex:
4557  * @ctxt:  an XML parser context
4558  * @cdata:  int indicating whether we are within a CDATA section
4559  *
4560  * parse a CharData section.this is the fallback function
4561  * of xmlParseCharData() when the parsing requires handling
4562  * of non-ASCII characters.
4563  */
4564 static void
4565 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4566     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4567     int nbchar = 0;
4568     int cur, l;
4569     int count = 0;
4570
4571     SHRINK;
4572     GROW;
4573     cur = CUR_CHAR(l);
4574     while ((cur != '<') && /* checked */
4575            (cur != '&') &&
4576            (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4577         if ((cur == ']') && (NXT(1) == ']') &&
4578             (NXT(2) == '>')) {
4579             if (cdata) break;
4580             else {
4581                 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4582             }
4583         }
4584         COPY_BUF(l,buf,nbchar,cur);
4585         /* move current position before possible calling of ctxt->sax->characters */
4586         NEXTL(l);
4587         cur = CUR_CHAR(l);
4588         if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4589             buf[nbchar] = 0;
4590
4591             /*
4592              * OK the segment is to be consumed as chars.
4593              */
4594             if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4595                 if (areBlanks(ctxt, buf, nbchar, 0)) {
4596                     if (ctxt->sax->ignorableWhitespace != NULL)
4597                         ctxt->sax->ignorableWhitespace(ctxt->userData,
4598                                                        buf, nbchar);
4599                 } else {
4600                     if (ctxt->sax->characters != NULL)
4601                         ctxt->sax->characters(ctxt->userData, buf, nbchar);
4602                     if ((ctxt->sax->characters !=
4603                          ctxt->sax->ignorableWhitespace) &&
4604                         (*ctxt->space == -1))
4605                         *ctxt->space = -2;
4606                 }
4607             }
4608             nbchar = 0;
4609             /* something really bad happened in the SAX callback */
4610             if (ctxt->instate != XML_PARSER_CONTENT)
4611                 return;
4612         }
4613         count++;
4614         if (count > 50) {
4615             SHRINK;
4616             GROW;
4617             count = 0;
4618             if (ctxt->instate == XML_PARSER_EOF)
4619                 return;
4620         }
4621     }
4622     if (nbchar != 0) {
4623         buf[nbchar] = 0;
4624         /*
4625          * OK the segment is to be consumed as chars.
4626          */
4627         if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4628             if (areBlanks(ctxt, buf, nbchar, 0)) {
4629                 if (ctxt->sax->ignorableWhitespace != NULL)
4630                     ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4631             } else {
4632                 if (ctxt->sax->characters != NULL)
4633                     ctxt->sax->characters(ctxt->userData, buf, nbchar);
4634                 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4635                     (*ctxt->space == -1))
4636                     *ctxt->space = -2;
4637             }
4638         }
4639     }
4640     if ((cur != 0) && (!IS_CHAR(cur))) {
4641         /* Generate the error and skip the offending character */
4642         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4643                           "PCDATA invalid Char value %d\n",
4644                           cur);
4645         NEXTL(l);
4646     }
4647 }
4648
4649 /**
4650  * xmlParseExternalID:
4651  * @ctxt:  an XML parser context
4652  * @publicID:  a xmlChar** receiving PubidLiteral
4653  * @strict: indicate whether we should restrict parsing to only
4654  *          production [75], see NOTE below
4655  *
4656  * Parse an External ID or a Public ID
4657  *
4658  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4659  *       'PUBLIC' S PubidLiteral S SystemLiteral
4660  *
4661  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4662  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4663  *
4664  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4665  *
4666  * Returns the function returns SystemLiteral and in the second
4667  *                case publicID receives PubidLiteral, is strict is off
4668  *                it is possible to return NULL and have publicID set.
4669  */
4670
4671 xmlChar *
4672 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4673     xmlChar *URI = NULL;
4674
4675     SHRINK;
4676
4677     *publicID = NULL;
4678     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4679         SKIP(6);
4680         if (SKIP_BLANKS == 0) {
4681             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4682                            "Space required after 'SYSTEM'\n");
4683         }
4684         URI = xmlParseSystemLiteral(ctxt);
4685         if (URI == NULL) {
4686             xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4687         }
4688     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4689         SKIP(6);
4690         if (SKIP_BLANKS == 0) {
4691             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4692                     "Space required after 'PUBLIC'\n");
4693         }
4694         *publicID = xmlParsePubidLiteral(ctxt);
4695         if (*publicID == NULL) {
4696             xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4697         }
4698         if (strict) {
4699             /*
4700              * We don't handle [83] so "S SystemLiteral" is required.
4701              */
4702             if (SKIP_BLANKS == 0) {
4703                 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4704                         "Space required after the Public Identifier\n");
4705             }
4706         } else {
4707             /*
4708              * We handle [83] so we return immediately, if
4709              * "S SystemLiteral" is not detected. We skip blanks if no
4710              * system literal was found, but this is harmless since we must
4711              * be at the end of a NotationDecl.
4712              */
4713             if (SKIP_BLANKS == 0) return(NULL);
4714             if ((CUR != '\'') && (CUR != '"')) return(NULL);
4715         }
4716         URI = xmlParseSystemLiteral(ctxt);
4717         if (URI == NULL) {
4718             xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4719         }
4720     }
4721     return(URI);
4722 }
4723
4724 /**
4725  * xmlParseCommentComplex:
4726  * @ctxt:  an XML parser context
4727  * @buf:  the already parsed part of the buffer
4728  * @len:  number of bytes in the buffer
4729  * @size:  allocated size of the buffer
4730  *
4731  * Skip an XML (SGML) comment <!-- .... -->
4732  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4733  *  must not occur within comments. "
4734  * This is the slow routine in case the accelerator for ascii didn't work
4735  *
4736  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4737  */
4738 static void
4739 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4740                        size_t len, size_t size) {
4741     int q, ql;
4742     int r, rl;
4743     int cur, l;
4744     size_t count = 0;
4745     int inputid;
4746
4747     inputid = ctxt->input->id;
4748
4749     if (buf == NULL) {
4750         len = 0;
4751         size = XML_PARSER_BUFFER_SIZE;
4752         buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4753         if (buf == NULL) {
4754             xmlErrMemory(ctxt, NULL);
4755             return;
4756         }
4757     }
4758     GROW;       /* Assure there's enough input data */
4759     q = CUR_CHAR(ql);
4760     if (q == 0)
4761         goto not_terminated;
4762     if (!IS_CHAR(q)) {
4763         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4764                           "xmlParseComment: invalid xmlChar value %d\n",
4765                           q);
4766         xmlFree (buf);
4767         return;
4768     }
4769     NEXTL(ql);
4770     r = CUR_CHAR(rl);
4771     if (r == 0)
4772         goto not_terminated;
4773     if (!IS_CHAR(r)) {
4774         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4775                           "xmlParseComment: invalid xmlChar value %d\n",
4776                           q);
4777         xmlFree (buf);
4778         return;
4779     }
4780     NEXTL(rl);
4781     cur = CUR_CHAR(l);
4782     if (cur == 0)
4783         goto not_terminated;
4784     while (IS_CHAR(cur) && /* checked */
4785            ((cur != '>') ||
4786             (r != '-') || (q != '-'))) {
4787         if ((r == '-') && (q == '-')) {
4788             xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4789         }
4790         if ((len > XML_MAX_TEXT_LENGTH) &&
4791             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4792             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4793                          "Comment too big found", NULL);
4794             xmlFree (buf);
4795             return;
4796         }
4797         if (len + 5 >= size) {
4798             xmlChar *new_buf;
4799             size_t new_size;
4800
4801             new_size = size * 2;
4802             new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4803             if (new_buf == NULL) {
4804                 xmlFree (buf);
4805                 xmlErrMemory(ctxt, NULL);
4806                 return;
4807             }
4808             buf = new_buf;
4809             size = new_size;
4810         }
4811         COPY_BUF(ql,buf,len,q);
4812         q = r;
4813         ql = rl;
4814         r = cur;
4815         rl = l;
4816
4817         count++;
4818         if (count > 50) {
4819             SHRINK;
4820             GROW;
4821             count = 0;
4822             if (ctxt->instate == XML_PARSER_EOF) {
4823                 xmlFree(buf);
4824                 return;
4825             }
4826         }
4827         NEXTL(l);
4828         cur = CUR_CHAR(l);
4829         if (cur == 0) {
4830             SHRINK;
4831             GROW;
4832             cur = CUR_CHAR(l);
4833         }
4834     }
4835     buf[len] = 0;
4836     if (cur == 0) {
4837         xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4838                              "Comment not terminated \n<!--%.50s\n", buf);
4839     } else if (!IS_CHAR(cur)) {
4840         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4841                           "xmlParseComment: invalid xmlChar value %d\n",
4842                           cur);
4843     } else {
4844         if (inputid != ctxt->input->id) {
4845             xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4846                            "Comment doesn't start and stop in the same"
4847                            " entity\n");
4848         }
4849         NEXT;
4850         if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4851             (!ctxt->disableSAX))
4852             ctxt->sax->comment(ctxt->userData, buf);
4853     }
4854     xmlFree(buf);
4855     return;
4856 not_terminated:
4857     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4858                          "Comment not terminated\n", NULL);
4859     xmlFree(buf);
4860     return;
4861 }
4862
4863 /**
4864  * xmlParseComment:
4865  * @ctxt:  an XML parser context
4866  *
4867  * Skip an XML (SGML) comment <!-- .... -->
4868  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4869  *  must not occur within comments. "
4870  *
4871  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4872  */
4873 void
4874 xmlParseComment(xmlParserCtxtPtr ctxt) {
4875     xmlChar *buf = NULL;
4876     size_t size = XML_PARSER_BUFFER_SIZE;
4877     size_t len = 0;
4878     xmlParserInputState state;
4879     const xmlChar *in;
4880     size_t nbchar = 0;
4881     int ccol;
4882     int inputid;
4883
4884     /*
4885      * Check that there is a comment right here.
4886      */
4887     if ((RAW != '<') || (NXT(1) != '!') ||
4888         (NXT(2) != '-') || (NXT(3) != '-')) return;
4889     state = ctxt->instate;
4890     ctxt->instate = XML_PARSER_COMMENT;
4891     inputid = ctxt->input->id;
4892     SKIP(4);
4893     SHRINK;
4894     GROW;
4895
4896     /*
4897      * Accelerated common case where input don't need to be
4898      * modified before passing it to the handler.
4899      */
4900     in = ctxt->input->cur;
4901     do {
4902         if (*in == 0xA) {
4903             do {
4904                 ctxt->input->line++; ctxt->input->col = 1;
4905                 in++;
4906             } while (*in == 0xA);
4907         }
4908 get_more:
4909         ccol = ctxt->input->col;
4910         while (((*in > '-') && (*in <= 0x7F)) ||
4911                ((*in >= 0x20) && (*in < '-')) ||
4912                (*in == 0x09)) {
4913                     in++;
4914                     ccol++;
4915         }
4916         ctxt->input->col = ccol;
4917         if (*in == 0xA) {
4918             do {
4919                 ctxt->input->line++; ctxt->input->col = 1;
4920                 in++;
4921             } while (*in == 0xA);
4922             goto get_more;
4923         }
4924         nbchar = in - ctxt->input->cur;
4925         /*
4926          * save current set of data
4927          */
4928         if (nbchar > 0) {
4929             if ((ctxt->sax != NULL) &&
4930                 (ctxt->sax->comment != NULL)) {
4931                 if (buf == NULL) {
4932                     if ((*in == '-') && (in[1] == '-'))
4933                         size = nbchar + 1;
4934                     else
4935                         size = XML_PARSER_BUFFER_SIZE + nbchar;
4936                     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4937                     if (buf == NULL) {
4938                         xmlErrMemory(ctxt, NULL);
4939                         ctxt->instate = state;
4940                         return;
4941                     }
4942                     len = 0;
4943                 } else if (len + nbchar + 1 >= size) {
4944                     xmlChar *new_buf;
4945                     size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4946                     new_buf = (xmlChar *) xmlRealloc(buf,
4947                                                      size * sizeof(xmlChar));
4948                     if (new_buf == NULL) {
4949                         xmlFree (buf);
4950                         xmlErrMemory(ctxt, NULL);
4951                         ctxt->instate = state;
4952                         return;
4953                     }
4954                     buf = new_buf;
4955                 }
4956                 memcpy(&buf[len], ctxt->input->cur, nbchar);
4957                 len += nbchar;
4958                 buf[len] = 0;
4959             }
4960         }
4961         if ((len > XML_MAX_TEXT_LENGTH) &&
4962             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4963             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4964                          "Comment too big found", NULL);
4965             xmlFree (buf);
4966             return;
4967         }
4968         ctxt->input->cur = in;
4969         if (*in == 0xA) {
4970             in++;
4971             ctxt->input->line++; ctxt->input->col = 1;
4972         }
4973         if (*in == 0xD) {
4974             in++;
4975             if (*in == 0xA) {
4976                 ctxt->input->cur = in;
4977                 in++;
4978                 ctxt->input->line++; ctxt->input->col = 1;
4979                 goto get_more;
4980             }
4981             in--;
4982         }
4983         SHRINK;
4984         GROW;
4985         if (ctxt->instate == XML_PARSER_EOF) {
4986             xmlFree(buf);
4987             return;
4988         }
4989         in = ctxt->input->cur;
4990         if (*in == '-') {
4991             if (in[1] == '-') {
4992                 if (in[2] == '>') {
4993                     if (ctxt->input->id != inputid) {
4994                         xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4995                                        "comment doesn't start and stop in the"
4996                                        " same entity\n");
4997                     }
4998                     SKIP(3);
4999                     if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5000                         (!ctxt->disableSAX)) {
5001                         if (buf != NULL)
5002                             ctxt->sax->comment(ctxt->userData, buf);
5003                         else
5004                             ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5005                     }
5006                     if (buf != NULL)
5007                         xmlFree(buf);
5008                     if (ctxt->instate != XML_PARSER_EOF)
5009                         ctxt->instate = state;
5010                     return;
5011                 }
5012                 if (buf != NULL) {
5013                     xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5014                                       "Double hyphen within comment: "
5015                                       "<!--%.50s\n",
5016                                       buf);
5017                 } else
5018                     xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5019                                       "Double hyphen within comment\n", NULL);
5020                 if (ctxt->instate == XML_PARSER_EOF) {
5021                     xmlFree(buf);
5022                     return;
5023                 }
5024                 in++;
5025                 ctxt->input->col++;
5026             }
5027             in++;
5028             ctxt->input->col++;
5029             goto get_more;
5030         }
5031     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5032     xmlParseCommentComplex(ctxt, buf, len, size);
5033     ctxt->instate = state;
5034     return;
5035 }
5036
5037
5038 /**
5039  * xmlParsePITarget:
5040  * @ctxt:  an XML parser context
5041  *
5042  * parse the name of a PI
5043  *
5044  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5045  *
5046  * Returns the PITarget name or NULL
5047  */
5048
5049 const xmlChar *
5050 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5051     const xmlChar *name;
5052
5053     name = xmlParseName(ctxt);
5054     if ((name != NULL) &&
5055         ((name[0] == 'x') || (name[0] == 'X')) &&
5056         ((name[1] == 'm') || (name[1] == 'M')) &&
5057         ((name[2] == 'l') || (name[2] == 'L'))) {
5058         int i;
5059         if ((name[0] == 'x') && (name[1] == 'm') &&
5060             (name[2] == 'l') && (name[3] == 0)) {
5061             xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5062                  "XML declaration allowed only at the start of the document\n");
5063             return(name);
5064         } else if (name[3] == 0) {
5065             xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5066             return(name);
5067         }
5068         for (i = 0;;i++) {
5069             if (xmlW3CPIs[i] == NULL) break;
5070             if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5071                 return(name);
5072         }
5073         xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5074                       "xmlParsePITarget: invalid name prefix 'xml'\n",
5075                       NULL, NULL);
5076     }
5077     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5078         xmlNsErr(ctxt, XML_NS_ERR_COLON,
5079                  "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5080     }
5081     return(name);
5082 }
5083
5084 #ifdef LIBXML_CATALOG_ENABLED
5085 /**
5086  * xmlParseCatalogPI:
5087  * @ctxt:  an XML parser context
5088  * @catalog:  the PI value string
5089  *
5090  * parse an XML Catalog Processing Instruction.
5091  *
5092  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5093  *
5094  * Occurs only if allowed by the user and if happening in the Misc
5095  * part of the document before any doctype information
5096  * This will add the given catalog to the parsing context in order
5097  * to be used if there is a resolution need further down in the document
5098  */
5099
5100 static void
5101 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5102     xmlChar *URL = NULL;
5103     const xmlChar *tmp, *base;
5104     xmlChar marker;
5105
5106     tmp = catalog;
5107     while (IS_BLANK_CH(*tmp)) tmp++;
5108     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5109         goto error;
5110     tmp += 7;
5111     while (IS_BLANK_CH(*tmp)) tmp++;
5112     if (*tmp != '=') {
5113         return;
5114     }
5115     tmp++;
5116     while (IS_BLANK_CH(*tmp)) tmp++;
5117     marker = *tmp;
5118     if ((marker != '\'') && (marker != '"'))
5119         goto error;
5120     tmp++;
5121     base = tmp;
5122     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5123     if (*tmp == 0)
5124         goto error;
5125     URL = xmlStrndup(base, tmp - base);
5126     tmp++;
5127     while (IS_BLANK_CH(*tmp)) tmp++;
5128     if (*tmp != 0)
5129         goto error;
5130
5131     if (URL != NULL) {
5132         ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5133         xmlFree(URL);
5134     }
5135     return;
5136
5137 error:
5138     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5139                   "Catalog PI syntax error: %s\n",
5140                   catalog, NULL);
5141     if (URL != NULL)
5142         xmlFree(URL);
5143 }
5144 #endif
5145
5146 /**
5147  * xmlParsePI:
5148  * @ctxt:  an XML parser context
5149  *
5150  * parse an XML Processing Instruction.
5151  *
5152  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5153  *
5154  * The processing is transferred to SAX once parsed.
5155  */
5156
5157 void
5158 xmlParsePI(xmlParserCtxtPtr ctxt) {
5159     xmlChar *buf = NULL;
5160     size_t len = 0;
5161     size_t size = XML_PARSER_BUFFER_SIZE;
5162     int cur, l;
5163     const xmlChar *target;
5164     xmlParserInputState state;
5165     int count = 0;
5166
5167     if ((RAW == '<') && (NXT(1) == '?')) {
5168         int inputid = ctxt->input->id;
5169         state = ctxt->instate;
5170         ctxt->instate = XML_PARSER_PI;
5171         /*
5172          * this is a Processing Instruction.
5173          */
5174         SKIP(2);
5175         SHRINK;
5176
5177         /*
5178          * Parse the target name and check for special support like
5179          * namespace.
5180          */
5181         target = xmlParsePITarget(ctxt);
5182         if (target != NULL) {
5183             if ((RAW == '?') && (NXT(1) == '>')) {
5184                 if (inputid != ctxt->input->id) {
5185                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5186                                    "PI declaration doesn't start and stop in"
5187                                    " the same entity\n");
5188                 }
5189                 SKIP(2);
5190
5191                 /*
5192                  * SAX: PI detected.
5193                  */
5194                 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5195                     (ctxt->sax->processingInstruction != NULL))
5196                     ctxt->sax->processingInstruction(ctxt->userData,
5197                                                      target, NULL);
5198                 if (ctxt->instate != XML_PARSER_EOF)
5199                     ctxt->instate = state;
5200                 return;
5201             }
5202             buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5203             if (buf == NULL) {
5204                 xmlErrMemory(ctxt, NULL);
5205                 ctxt->instate = state;
5206                 return;
5207             }
5208             if (SKIP_BLANKS == 0) {
5209                 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5210                           "ParsePI: PI %s space expected\n", target);
5211             }
5212             cur = CUR_CHAR(l);
5213             while (IS_CHAR(cur) && /* checked */
5214                    ((cur != '?') || (NXT(1) != '>'))) {
5215                 if (len + 5 >= size) {
5216                     xmlChar *tmp;
5217                     size_t new_size = size * 2;
5218                     tmp = (xmlChar *) xmlRealloc(buf, new_size);
5219                     if (tmp == NULL) {
5220                         xmlErrMemory(ctxt, NULL);
5221                         xmlFree(buf);
5222                         ctxt->instate = state;
5223                         return;
5224                     }
5225                     buf = tmp;
5226                     size = new_size;
5227                 }
5228                 count++;
5229                 if (count > 50) {
5230                     SHRINK;
5231                     GROW;
5232                     if (ctxt->instate == XML_PARSER_EOF) {
5233                         xmlFree(buf);
5234                         return;
5235                     }
5236                     count = 0;
5237                     if ((len > XML_MAX_TEXT_LENGTH) &&
5238                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5239                         xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5240                                           "PI %s too big found", target);
5241                         xmlFree(buf);
5242                         ctxt->instate = state;
5243                         return;
5244                     }
5245                 }
5246                 COPY_BUF(l,buf,len,cur);
5247                 NEXTL(l);
5248                 cur = CUR_CHAR(l);
5249                 if (cur == 0) {
5250                     SHRINK;
5251                     GROW;
5252                     cur = CUR_CHAR(l);
5253                 }
5254             }
5255             if ((len > XML_MAX_TEXT_LENGTH) &&
5256                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5257                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5258                                   "PI %s too big found", target);
5259                 xmlFree(buf);
5260                 ctxt->instate = state;
5261                 return;
5262             }
5263             buf[len] = 0;
5264             if (cur != '?') {
5265                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5266                       "ParsePI: PI %s never end ...\n", target);
5267             } else {
5268                 if (inputid != ctxt->input->id) {
5269                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5270                                    "PI declaration doesn't start and stop in"
5271                                    " the same entity\n");
5272                 }
5273                 SKIP(2);
5274
5275 #ifdef LIBXML_CATALOG_ENABLED
5276                 if (((state == XML_PARSER_MISC) ||
5277                      (state == XML_PARSER_START)) &&
5278                     (xmlStrEqual(target, XML_CATALOG_PI))) {
5279                     xmlCatalogAllow allow = xmlCatalogGetDefaults();
5280                     if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5281                         (allow == XML_CATA_ALLOW_ALL))
5282                         xmlParseCatalogPI(ctxt, buf);
5283                 }
5284 #endif
5285
5286
5287                 /*
5288                  * SAX: PI detected.
5289                  */
5290                 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5291                     (ctxt->sax->processingInstruction != NULL))
5292                     ctxt->sax->processingInstruction(ctxt->userData,
5293                                                      target, buf);
5294             }
5295             xmlFree(buf);
5296         } else {
5297             xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5298         }
5299         if (ctxt->instate != XML_PARSER_EOF)
5300             ctxt->instate = state;
5301     }
5302 }
5303
5304 /**
5305  * xmlParseNotationDecl:
5306  * @ctxt:  an XML parser context
5307  *
5308  * parse a notation declaration
5309  *
5310  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5311  *
5312  * Hence there is actually 3 choices:
5313  *     'PUBLIC' S PubidLiteral
5314  *     'PUBLIC' S PubidLiteral S SystemLiteral
5315  * and 'SYSTEM' S SystemLiteral
5316  *
5317  * See the NOTE on xmlParseExternalID().
5318  */
5319
5320 void
5321 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5322     const xmlChar *name;
5323     xmlChar *Pubid;
5324     xmlChar *Systemid;
5325
5326     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5327         int inputid = ctxt->input->id;
5328         SHRINK;
5329         SKIP(10);
5330         if (SKIP_BLANKS == 0) {
5331             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5332                            "Space required after '<!NOTATION'\n");
5333             return;
5334         }
5335
5336         name = xmlParseName(ctxt);
5337         if (name == NULL) {
5338             xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5339             return;
5340         }
5341         if (xmlStrchr(name, ':') != NULL) {
5342             xmlNsErr(ctxt, XML_NS_ERR_COLON,
5343                      "colons are forbidden from notation names '%s'\n",
5344                      name, NULL, NULL);
5345         }
5346         if (SKIP_BLANKS == 0) {
5347             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5348                      "Space required after the NOTATION name'\n");
5349             return;
5350         }
5351
5352         /*
5353          * Parse the IDs.
5354          */
5355         Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5356         SKIP_BLANKS;
5357
5358         if (RAW == '>') {
5359             if (inputid != ctxt->input->id) {
5360                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5361                                "Notation declaration doesn't start and stop"
5362                                " in the same entity\n");
5363             }
5364             NEXT;
5365             if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5366                 (ctxt->sax->notationDecl != NULL))
5367                 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5368         } else {
5369             xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5370         }
5371         if (Systemid != NULL) xmlFree(Systemid);
5372         if (Pubid != NULL) xmlFree(Pubid);
5373     }
5374 }
5375
5376 /**
5377  * xmlParseEntityDecl:
5378  * @ctxt:  an XML parser context
5379  *
5380  * parse <!ENTITY declarations
5381  *
5382  * [70] EntityDecl ::= GEDecl | PEDecl
5383  *
5384  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5385  *
5386  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5387  *
5388  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5389  *
5390  * [74] PEDef ::= EntityValue | ExternalID
5391  *
5392  * [76] NDataDecl ::= S 'NDATA' S Name
5393  *
5394  * [ VC: Notation Declared ]
5395  * The Name must match the declared name of a notation.
5396  */
5397
5398 void
5399 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5400     const xmlChar *name = NULL;
5401     xmlChar *value = NULL;
5402     xmlChar *URI = NULL, *literal = NULL;
5403     const xmlChar *ndata = NULL;
5404     int isParameter = 0;
5405     xmlChar *orig = NULL;
5406
5407     /* GROW; done in the caller */
5408     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5409         int inputid = ctxt->input->id;
5410         SHRINK;
5411         SKIP(8);
5412         if (SKIP_BLANKS == 0) {
5413             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5414                            "Space required after '<!ENTITY'\n");
5415         }
5416
5417         if (RAW == '%') {
5418             NEXT;
5419             if (SKIP_BLANKS == 0) {
5420                 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5421                                "Space required after '%%'\n");
5422             }
5423             isParameter = 1;
5424         }
5425
5426         name = xmlParseName(ctxt);
5427         if (name == NULL) {
5428             xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5429                            "xmlParseEntityDecl: no name\n");
5430             return;
5431         }
5432         if (xmlStrchr(name, ':') != NULL) {
5433             xmlNsErr(ctxt, XML_NS_ERR_COLON,
5434                      "colons are forbidden from entities names '%s'\n",
5435                      name, NULL, NULL);
5436         }
5437         if (SKIP_BLANKS == 0) {
5438             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5439                            "Space required after the entity name\n");
5440         }
5441
5442         ctxt->instate = XML_PARSER_ENTITY_DECL;
5443         /*
5444          * handle the various case of definitions...
5445          */
5446         if (isParameter) {
5447             if ((RAW == '"') || (RAW == '\'')) {
5448                 value = xmlParseEntityValue(ctxt, &orig);
5449                 if (value) {
5450                     if ((ctxt->sax != NULL) &&
5451                         (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5452                         ctxt->sax->entityDecl(ctxt->userData, name,
5453                                     XML_INTERNAL_PARAMETER_ENTITY,
5454                                     NULL, NULL, value);
5455                 }
5456             } else {
5457                 URI = xmlParseExternalID(ctxt, &literal, 1);
5458                 if ((URI == NULL) && (literal == NULL)) {
5459                     xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5460                 }
5461                 if (URI) {
5462                     xmlURIPtr uri;
5463
5464                     uri = xmlParseURI((const char *) URI);
5465                     if (uri == NULL) {
5466                         xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5467                                      "Invalid URI: %s\n", URI);
5468                         /*
5469                          * This really ought to be a well formedness error
5470                          * but the XML Core WG decided otherwise c.f. issue
5471                          * E26 of the XML erratas.
5472                          */
5473                     } else {
5474                         if (uri->fragment != NULL) {
5475                             /*
5476                              * Okay this is foolish to block those but not
5477                              * invalid URIs.
5478                              */
5479                             xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5480                         } else {
5481                             if ((ctxt->sax != NULL) &&
5482                                 (!ctxt->disableSAX) &&
5483                                 (ctxt->sax->entityDecl != NULL))
5484                                 ctxt->sax->entityDecl(ctxt->userData, name,
5485                                             XML_EXTERNAL_PARAMETER_ENTITY,
5486                                             literal, URI, NULL);
5487                         }
5488                         xmlFreeURI(uri);
5489                     }
5490                 }
5491             }
5492         } else {
5493             if ((RAW == '"') || (RAW == '\'')) {
5494                 value = xmlParseEntityValue(ctxt, &orig);
5495                 if ((ctxt->sax != NULL) &&
5496                     (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5497                     ctxt->sax->entityDecl(ctxt->userData, name,
5498                                 XML_INTERNAL_GENERAL_ENTITY,
5499                                 NULL, NULL, value);
5500                 /*
5501                  * For expat compatibility in SAX mode.
5502                  */
5503                 if ((ctxt->myDoc == NULL) ||
5504                     (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5505                     if (ctxt->myDoc == NULL) {
5506                         ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5507                         if (ctxt->myDoc == NULL) {
5508                             xmlErrMemory(ctxt, "New Doc failed");
5509                             return;
5510                         }
5511                         ctxt->myDoc->properties = XML_DOC_INTERNAL;
5512                     }
5513                     if (ctxt->myDoc->intSubset == NULL)
5514                         ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5515                                             BAD_CAST "fake", NULL, NULL);
5516
5517                     xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5518                                       NULL, NULL, value);
5519                 }
5520             } else {
5521                 URI = xmlParseExternalID(ctxt, &literal, 1);
5522                 if ((URI == NULL) && (literal == NULL)) {
5523                     xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5524                 }
5525                 if (URI) {
5526                     xmlURIPtr uri;
5527
5528                     uri = xmlParseURI((const char *)URI);
5529                     if (uri == NULL) {
5530                         xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5531                                      "Invalid URI: %s\n", URI);
5532                         /*
5533                          * This really ought to be a well formedness error
5534                          * but the XML Core WG decided otherwise c.f. issue
5535                          * E26 of the XML erratas.
5536                          */
5537                     } else {
5538                         if (uri->fragment != NULL) {
5539                             /*
5540                              * Okay this is foolish to block those but not
5541                              * invalid URIs.
5542                              */
5543                             xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5544                         }
5545                         xmlFreeURI(uri);
5546                     }
5547                 }
5548                 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5549                     xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5550                                    "Space required before 'NDATA'\n");
5551                 }
5552                 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5553                     SKIP(5);
5554                     if (SKIP_BLANKS == 0) {
5555                         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5556                                        "Space required after 'NDATA'\n");
5557                     }
5558                     ndata = xmlParseName(ctxt);
5559                     if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5560                         (ctxt->sax->unparsedEntityDecl != NULL))
5561                         ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5562                                     literal, URI, ndata);
5563                 } else {
5564                     if ((ctxt->sax != NULL) &&
5565                         (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5566                         ctxt->sax->entityDecl(ctxt->userData, name,
5567                                     XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5568                                     literal, URI, NULL);
5569                     /*
5570                      * For expat compatibility in SAX mode.
5571                      * assuming the entity replacement was asked for
5572                      */
5573                     if ((ctxt->replaceEntities != 0) &&
5574                         ((ctxt->myDoc == NULL) ||
5575                         (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5576                         if (ctxt->myDoc == NULL) {
5577                             ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5578                             if (ctxt->myDoc == NULL) {
5579                                 xmlErrMemory(ctxt, "New Doc failed");
5580                                 return;
5581                             }
5582                             ctxt->myDoc->properties = XML_DOC_INTERNAL;
5583                         }
5584
5585                         if (ctxt->myDoc->intSubset == NULL)
5586                             ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5587                                                 BAD_CAST "fake", NULL, NULL);
5588                         xmlSAX2EntityDecl(ctxt, name,
5589                                           XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5590                                           literal, URI, NULL);
5591                     }
5592                 }
5593             }
5594         }
5595         if (ctxt->instate == XML_PARSER_EOF)
5596             goto done;
5597         SKIP_BLANKS;
5598         if (RAW != '>') {
5599             xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5600                     "xmlParseEntityDecl: entity %s not terminated\n", name);
5601             xmlHaltParser(ctxt);
5602         } else {
5603             if (inputid != ctxt->input->id) {
5604                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5605                                "Entity declaration doesn't start and stop in"
5606                                " the same entity\n");
5607             }
5608             NEXT;
5609         }
5610         if (orig != NULL) {
5611             /*
5612              * Ugly mechanism to save the raw entity value.
5613              */
5614             xmlEntityPtr cur = NULL;
5615
5616             if (isParameter) {
5617                 if ((ctxt->sax != NULL) &&
5618                     (ctxt->sax->getParameterEntity != NULL))
5619                     cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5620             } else {
5621                 if ((ctxt->sax != NULL) &&
5622                     (ctxt->sax->getEntity != NULL))
5623                     cur = ctxt->sax->getEntity(ctxt->userData, name);
5624                 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5625                     cur = xmlSAX2GetEntity(ctxt, name);
5626                 }
5627             }
5628             if ((cur != NULL) && (cur->orig == NULL)) {
5629                 cur->orig = orig;
5630                 orig = NULL;
5631             }
5632         }
5633
5634 done:
5635         if (value != NULL) xmlFree(value);
5636         if (URI != NULL) xmlFree(URI);
5637         if (literal != NULL) xmlFree(literal);
5638         if (orig != NULL) xmlFree(orig);
5639     }
5640 }
5641
5642 /**
5643  * xmlParseDefaultDecl:
5644  * @ctxt:  an XML parser context
5645  * @value:  Receive a possible fixed default value for the attribute
5646  *
5647  * Parse an attribute default declaration
5648  *
5649  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5650  *
5651  * [ VC: Required Attribute ]
5652  * if the default declaration is the keyword #REQUIRED, then the
5653  * attribute must be specified for all elements of the type in the
5654  * attribute-list declaration.
5655  *
5656  * [ VC: Attribute Default Legal ]
5657  * The declared default value must meet the lexical constraints of
5658  * the declared attribute type c.f. xmlValidateAttributeDecl()
5659  *
5660  * [ VC: Fixed Attribute Default ]
5661  * if an attribute has a default value declared with the #FIXED
5662  * keyword, instances of that attribute must match the default value.
5663  *
5664  * [ WFC: No < in Attribute Values ]
5665  * handled in xmlParseAttValue()
5666  *
5667  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5668  *          or XML_ATTRIBUTE_FIXED.
5669  */
5670
5671 int
5672 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5673     int val;
5674     xmlChar *ret;
5675
5676     *value = NULL;
5677     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5678         SKIP(9);
5679         return(XML_ATTRIBUTE_REQUIRED);
5680     }
5681     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5682         SKIP(8);
5683         return(XML_ATTRIBUTE_IMPLIED);
5684     }
5685     val = XML_ATTRIBUTE_NONE;
5686     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5687         SKIP(6);
5688         val = XML_ATTRIBUTE_FIXED;
5689         if (SKIP_BLANKS == 0) {
5690             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5691                            "Space required after '#FIXED'\n");
5692         }
5693     }
5694     ret = xmlParseAttValue(ctxt);
5695     ctxt->instate = XML_PARSER_DTD;
5696     if (ret == NULL) {
5697         xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5698                        "Attribute default value declaration error\n");
5699     } else
5700         *value = ret;
5701     return(val);
5702 }
5703
5704 /**
5705  * xmlParseNotationType:
5706  * @ctxt:  an XML parser context
5707  *
5708  * parse an Notation attribute type.
5709  *
5710  * Note: the leading 'NOTATION' S part has already being parsed...
5711  *
5712  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5713  *
5714  * [ VC: Notation Attributes ]
5715  * Values of this type must match one of the notation names included
5716  * in the declaration; all notation names in the declaration must be declared.
5717  *
5718  * Returns: the notation attribute tree built while parsing
5719  */
5720
5721 xmlEnumerationPtr
5722 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5723     const xmlChar *name;
5724     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5725
5726     if (RAW != '(') {
5727         xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5728         return(NULL);
5729     }
5730     SHRINK;
5731     do {
5732         NEXT;
5733         SKIP_BLANKS;
5734         name = xmlParseName(ctxt);
5735         if (name == NULL) {
5736             xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5737                            "Name expected in NOTATION declaration\n");
5738             xmlFreeEnumeration(ret);
5739             return(NULL);
5740         }
5741         tmp = ret;
5742         while (tmp != NULL) {
5743             if (xmlStrEqual(name, tmp->name)) {
5744                 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5745           "standalone: attribute notation value token %s duplicated\n",
5746                                  name, NULL);
5747                 if (!xmlDictOwns(ctxt->dict, name))
5748                     xmlFree((xmlChar *) name);
5749                 break;
5750             }
5751             tmp = tmp->next;
5752         }
5753         if (tmp == NULL) {
5754             cur = xmlCreateEnumeration(name);
5755             if (cur == NULL) {
5756                 xmlFreeEnumeration(ret);
5757                 return(NULL);
5758             }
5759             if (last == NULL) ret = last = cur;
5760             else {
5761                 last->next = cur;
5762                 last = cur;
5763             }
5764         }
5765         SKIP_BLANKS;
5766     } while (RAW == '|');
5767     if (RAW != ')') {
5768         xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5769         xmlFreeEnumeration(ret);
5770         return(NULL);
5771     }
5772     NEXT;
5773     return(ret);
5774 }
5775
5776 /**
5777  * xmlParseEnumerationType:
5778  * @ctxt:  an XML parser context
5779  *
5780  * parse an Enumeration attribute type.
5781  *
5782  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5783  *
5784  * [ VC: Enumeration ]
5785  * Values of this type must match one of the Nmtoken tokens in
5786  * the declaration
5787  *
5788  * Returns: the enumeration attribute tree built while parsing
5789  */
5790
5791 xmlEnumerationPtr
5792 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5793     xmlChar *name;
5794     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5795
5796     if (RAW != '(') {
5797         xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5798         return(NULL);
5799     }
5800     SHRINK;
5801     do {
5802         NEXT;
5803         SKIP_BLANKS;
5804         name = xmlParseNmtoken(ctxt);
5805         if (name == NULL) {
5806             xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5807             return(ret);
5808         }
5809         tmp = ret;
5810         while (tmp != NULL) {
5811             if (xmlStrEqual(name, tmp->name)) {
5812                 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5813           "standalone: attribute enumeration value token %s duplicated\n",
5814                                  name, NULL);
5815                 if (!xmlDictOwns(ctxt->dict, name))
5816                     xmlFree(name);
5817                 break;
5818             }
5819             tmp = tmp->next;
5820         }
5821         if (tmp == NULL) {
5822             cur = xmlCreateEnumeration(name);
5823             if (!xmlDictOwns(ctxt->dict, name))
5824                 xmlFree(name);
5825             if (cur == NULL) {
5826                 xmlFreeEnumeration(ret);
5827                 return(NULL);
5828             }
5829             if (last == NULL) ret = last = cur;
5830             else {
5831                 last->next = cur;
5832                 last = cur;
5833             }
5834         }
5835         SKIP_BLANKS;
5836     } while (RAW == '|');
5837     if (RAW != ')') {
5838         xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5839         return(ret);
5840     }
5841     NEXT;
5842     return(ret);
5843 }
5844
5845 /**
5846  * xmlParseEnumeratedType:
5847  * @ctxt:  an XML parser context
5848  * @tree:  the enumeration tree built while parsing
5849  *
5850  * parse an Enumerated attribute type.
5851  *
5852  * [57] EnumeratedType ::= NotationType | Enumeration
5853  *
5854  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5855  *
5856  *
5857  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5858  */
5859
5860 int
5861 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5862     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5863         SKIP(8);
5864         if (SKIP_BLANKS == 0) {
5865             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5866                            "Space required after 'NOTATION'\n");
5867             return(0);
5868         }
5869         *tree = xmlParseNotationType(ctxt);
5870         if (*tree == NULL) return(0);
5871         return(XML_ATTRIBUTE_NOTATION);
5872     }
5873     *tree = xmlParseEnumerationType(ctxt);
5874     if (*tree == NULL) return(0);
5875     return(XML_ATTRIBUTE_ENUMERATION);
5876 }
5877
5878 /**
5879  * xmlParseAttributeType:
5880  * @ctxt:  an XML parser context
5881  * @tree:  the enumeration tree built while parsing
5882  *
5883  * parse the Attribute list def for an element
5884  *
5885  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5886  *
5887  * [55] StringType ::= 'CDATA'
5888  *
5889  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5890  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5891  *
5892  * Validity constraints for attribute values syntax are checked in
5893  * xmlValidateAttributeValue()
5894  *
5895  * [ VC: ID ]
5896  * Values of type ID must match the Name production. A name must not
5897  * appear more than once in an XML document as a value of this type;
5898  * i.e., ID values must uniquely identify the elements which bear them.
5899  *
5900  * [ VC: One ID per Element Type ]
5901  * No element type may have more than one ID attribute specified.
5902  *
5903  * [ VC: ID Attribute Default ]
5904  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5905  *
5906  * [ VC: IDREF ]
5907  * Values of type IDREF must match the Name production, and values
5908  * of type IDREFS must match Names; each IDREF Name must match the value
5909  * of an ID attribute on some element in the XML document; i.e. IDREF
5910  * values must match the value of some ID attribute.
5911  *
5912  * [ VC: Entity Name ]
5913  * Values of type ENTITY must match the Name production, values
5914  * of type ENTITIES must match Names; each Entity Name must match the
5915  * name of an unparsed entity declared in the DTD.
5916  *
5917  * [ VC: Name Token ]
5918  * Values of type NMTOKEN must match the Nmtoken production; values
5919  * of type NMTOKENS must match Nmtokens.
5920  *
5921  * Returns the attribute type
5922  */
5923 int
5924 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5925     SHRINK;
5926     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5927         SKIP(5);
5928         return(XML_ATTRIBUTE_CDATA);
5929      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5930         SKIP(6);
5931         return(XML_ATTRIBUTE_IDREFS);
5932      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5933         SKIP(5);
5934         return(XML_ATTRIBUTE_IDREF);
5935      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5936         SKIP(2);
5937         return(XML_ATTRIBUTE_ID);
5938      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5939         SKIP(6);
5940         return(XML_ATTRIBUTE_ENTITY);
5941      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5942         SKIP(8);
5943         return(XML_ATTRIBUTE_ENTITIES);
5944      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5945         SKIP(8);
5946         return(XML_ATTRIBUTE_NMTOKENS);
5947      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5948         SKIP(7);
5949         return(XML_ATTRIBUTE_NMTOKEN);
5950      }
5951      return(xmlParseEnumeratedType(ctxt, tree));
5952 }
5953
5954 /**
5955  * xmlParseAttributeListDecl:
5956  * @ctxt:  an XML parser context
5957  *
5958  * : parse the Attribute list def for an element
5959  *
5960  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5961  *
5962  * [53] AttDef ::= S Name S AttType S DefaultDecl
5963  *
5964  */
5965 void
5966 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5967     const xmlChar *elemName;
5968     const xmlChar *attrName;
5969     xmlEnumerationPtr tree;
5970
5971     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5972         int inputid = ctxt->input->id;
5973
5974         SKIP(9);
5975         if (SKIP_BLANKS == 0) {
5976             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5977                                  "Space required after '<!ATTLIST'\n");
5978         }
5979         elemName = xmlParseName(ctxt);
5980         if (elemName == NULL) {
5981             xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5982                            "ATTLIST: no name for Element\n");
5983             return;
5984         }
5985         SKIP_BLANKS;
5986         GROW;
5987         while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5988             int type;
5989             int def;
5990             xmlChar *defaultValue = NULL;
5991
5992             GROW;
5993             tree = NULL;
5994             attrName = xmlParseName(ctxt);
5995             if (attrName == NULL) {
5996                 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5997                                "ATTLIST: no name for Attribute\n");
5998                 break;
5999             }
6000             GROW;
6001             if (SKIP_BLANKS == 0) {
6002                 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6003                         "Space required after the attribute name\n");
6004                 break;
6005             }
6006
6007             type = xmlParseAttributeType(ctxt, &tree);
6008             if (type <= 0) {
6009                 break;
6010             }
6011
6012             GROW;
6013             if (SKIP_BLANKS == 0) {
6014                 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6015                                "Space required after the attribute type\n");
6016                 if (tree != NULL)
6017                     xmlFreeEnumeration(tree);
6018                 break;
6019             }
6020
6021             def = xmlParseDefaultDecl(ctxt, &defaultValue);
6022             if (def <= 0) {
6023                 if (defaultValue != NULL)
6024                     xmlFree(defaultValue);
6025                 if (tree != NULL)
6026                     xmlFreeEnumeration(tree);
6027                 break;
6028             }
6029             if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6030                 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6031
6032             GROW;
6033             if (RAW != '>') {
6034                 if (SKIP_BLANKS == 0) {
6035                     xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6036                         "Space required after the attribute default value\n");
6037                     if (defaultValue != NULL)
6038                         xmlFree(defaultValue);
6039                     if (tree != NULL)
6040                         xmlFreeEnumeration(tree);
6041                     break;
6042                 }
6043             }
6044             if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6045                 (ctxt->sax->attributeDecl != NULL))
6046                 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6047                                 type, def, defaultValue, tree);
6048             else if (tree != NULL)
6049                 xmlFreeEnumeration(tree);
6050
6051             if ((ctxt->sax2) && (defaultValue != NULL) &&
6052                 (def != XML_ATTRIBUTE_IMPLIED) &&
6053                 (def != XML_ATTRIBUTE_REQUIRED)) {
6054                 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6055             }
6056             if (ctxt->sax2) {
6057                 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6058             }
6059             if (defaultValue != NULL)
6060                 xmlFree(defaultValue);
6061             GROW;
6062         }
6063         if (RAW == '>') {
6064             if (inputid != ctxt->input->id) {
6065                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6066                                "Attribute list declaration doesn't start and"
6067                                " stop in the same entity\n");
6068             }
6069             NEXT;
6070         }
6071     }
6072 }
6073
6074 /**
6075  * xmlParseElementMixedContentDecl:
6076  * @ctxt:  an XML parser context
6077  * @inputchk:  the input used for the current entity, needed for boundary checks
6078  *
6079  * parse the declaration for a Mixed Element content
6080  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6081  *
6082  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6083  *                '(' S? '#PCDATA' S? ')'
6084  *
6085  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6086  *
6087  * [ VC: No Duplicate Types ]
6088  * The same name must not appear more than once in a single
6089  * mixed-content declaration.
6090  *
6091  * returns: the list of the xmlElementContentPtr describing the element choices
6092  */
6093 xmlElementContentPtr
6094 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6095     xmlElementContentPtr ret = NULL, cur = NULL, n;
6096     const xmlChar *elem = NULL;
6097
6098     GROW;
6099     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6100         SKIP(7);
6101         SKIP_BLANKS;
6102         SHRINK;
6103         if (RAW == ')') {
6104             if (ctxt->input->id != inputchk) {
6105                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6106                                "Element content declaration doesn't start and"
6107                                " stop in the same entity\n");
6108             }
6109             NEXT;
6110             ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6111             if (ret == NULL)
6112                 return(NULL);
6113             if (RAW == '*') {
6114                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6115                 NEXT;
6116             }
6117             return(ret);
6118         }
6119         if ((RAW == '(') || (RAW == '|')) {
6120             ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6121             if (ret == NULL) return(NULL);
6122         }
6123         while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6124             NEXT;
6125             if (elem == NULL) {
6126                 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6127                 if (ret == NULL) {
6128                     xmlFreeDocElementContent(ctxt->myDoc, cur);
6129                     return(NULL);
6130                 }
6131                 ret->c1 = cur;
6132                 if (cur != NULL)
6133                     cur->parent = ret;
6134                 cur = ret;
6135             } else {
6136                 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6137                 if (n == NULL) {
6138                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6139                     return(NULL);
6140                 }
6141                 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6142                 if (n->c1 != NULL)
6143                     n->c1->parent = n;
6144                 cur->c2 = n;
6145                 if (n != NULL)
6146                     n->parent = cur;
6147                 cur = n;
6148             }
6149             SKIP_BLANKS;
6150             elem = xmlParseName(ctxt);
6151             if (elem == NULL) {
6152                 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6153                         "xmlParseElementMixedContentDecl : Name expected\n");
6154                 xmlFreeDocElementContent(ctxt->myDoc, ret);
6155                 return(NULL);
6156             }
6157             SKIP_BLANKS;
6158             GROW;
6159         }
6160         if ((RAW == ')') && (NXT(1) == '*')) {
6161             if (elem != NULL) {
6162                 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6163                                                XML_ELEMENT_CONTENT_ELEMENT);
6164                 if (cur->c2 != NULL)
6165                     cur->c2->parent = cur;
6166             }
6167             if (ret != NULL)
6168                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6169             if (ctxt->input->id != inputchk) {
6170                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6171                                "Element content declaration doesn't start and"
6172                                " stop in the same entity\n");
6173             }
6174             SKIP(2);
6175         } else {
6176             xmlFreeDocElementContent(ctxt->myDoc, ret);
6177             xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6178             return(NULL);
6179         }
6180
6181     } else {
6182         xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6183     }
6184     return(ret);
6185 }
6186
6187 /**
6188  * xmlParseElementChildrenContentDeclPriv:
6189  * @ctxt:  an XML parser context
6190  * @inputchk:  the input used for the current entity, needed for boundary checks
6191  * @depth: the level of recursion
6192  *
6193  * parse the declaration for a Mixed Element content
6194  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6195  *
6196  *
6197  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6198  *
6199  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6200  *
6201  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6202  *
6203  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6204  *
6205  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6206  * TODO Parameter-entity replacement text must be properly nested
6207  *      with parenthesized groups. That is to say, if either of the
6208  *      opening or closing parentheses in a choice, seq, or Mixed
6209  *      construct is contained in the replacement text for a parameter
6210  *      entity, both must be contained in the same replacement text. For
6211  *      interoperability, if a parameter-entity reference appears in a
6212  *      choice, seq, or Mixed construct, its replacement text should not
6213  *      be empty, and neither the first nor last non-blank character of
6214  *      the replacement text should be a connector (| or ,).
6215  *
6216  * Returns the tree of xmlElementContentPtr describing the element
6217  *          hierarchy.
6218  */
6219 static xmlElementContentPtr
6220 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6221                                        int depth) {
6222     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6223     const xmlChar *elem;
6224     xmlChar type = 0;
6225
6226     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6227         (depth >  2048)) {
6228         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6229 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6230                           depth);
6231         return(NULL);
6232     }
6233     SKIP_BLANKS;
6234     GROW;
6235     if (RAW == '(') {
6236         int inputid = ctxt->input->id;
6237
6238         /* Recurse on first child */
6239         NEXT;
6240         SKIP_BLANKS;
6241         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6242                                                            depth + 1);
6243         if (cur == NULL)
6244             return(NULL);
6245         SKIP_BLANKS;
6246         GROW;
6247     } else {
6248         elem = xmlParseName(ctxt);
6249         if (elem == NULL) {
6250             xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6251             return(NULL);
6252         }
6253         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6254         if (cur == NULL) {
6255             xmlErrMemory(ctxt, NULL);
6256             return(NULL);
6257         }
6258         GROW;
6259         if (RAW == '?') {
6260             cur->ocur = XML_ELEMENT_CONTENT_OPT;
6261             NEXT;
6262         } else if (RAW == '*') {
6263             cur->ocur = XML_ELEMENT_CONTENT_MULT;
6264             NEXT;
6265         } else if (RAW == '+') {
6266             cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6267             NEXT;
6268         } else {
6269             cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6270         }
6271         GROW;
6272     }
6273     SKIP_BLANKS;
6274     SHRINK;
6275     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6276         /*
6277          * Each loop we parse one separator and one element.
6278          */
6279         if (RAW == ',') {
6280             if (type == 0) type = CUR;
6281
6282             /*
6283              * Detect "Name | Name , Name" error
6284              */
6285             else if (type != CUR) {
6286                 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6287                     "xmlParseElementChildrenContentDecl : '%c' expected\n",
6288                                   type);
6289                 if ((last != NULL) && (last != ret))
6290                     xmlFreeDocElementContent(ctxt->myDoc, last);
6291                 if (ret != NULL)
6292                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6293                 return(NULL);
6294             }
6295             NEXT;
6296
6297             op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6298             if (op == NULL) {
6299                 if ((last != NULL) && (last != ret))
6300                     xmlFreeDocElementContent(ctxt->myDoc, last);
6301                 xmlFreeDocElementContent(ctxt->myDoc, ret);
6302                 return(NULL);
6303             }
6304             if (last == NULL) {
6305                 op->c1 = ret;
6306                 if (ret != NULL)
6307                     ret->parent = op;
6308                 ret = cur = op;
6309             } else {
6310                 cur->c2 = op;
6311                 if (op != NULL)
6312                     op->parent = cur;
6313                 op->c1 = last;
6314                 if (last != NULL)
6315                     last->parent = op;
6316                 cur =op;
6317                 last = NULL;
6318             }
6319         } else if (RAW == '|') {
6320             if (type == 0) type = CUR;
6321
6322             /*
6323              * Detect "Name , Name | Name" error
6324              */
6325             else if (type != CUR) {
6326                 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6327                     "xmlParseElementChildrenContentDecl : '%c' expected\n",
6328                                   type);
6329                 if ((last != NULL) && (last != ret))
6330                     xmlFreeDocElementContent(ctxt->myDoc, last);
6331                 if (ret != NULL)
6332                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6333                 return(NULL);
6334             }
6335             NEXT;
6336
6337             op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6338             if (op == NULL) {
6339                 if ((last != NULL) && (last != ret))
6340                     xmlFreeDocElementContent(ctxt->myDoc, last);
6341                 if (ret != NULL)
6342                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6343                 return(NULL);
6344             }
6345             if (last == NULL) {
6346                 op->c1 = ret;
6347                 if (ret != NULL)
6348                     ret->parent = op;
6349                 ret = cur = op;
6350             } else {
6351                 cur->c2 = op;
6352                 if (op != NULL)
6353                     op->parent = cur;
6354                 op->c1 = last;
6355                 if (last != NULL)
6356                     last->parent = op;
6357                 cur =op;
6358                 last = NULL;
6359             }
6360         } else {
6361             xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6362             if ((last != NULL) && (last != ret))
6363                 xmlFreeDocElementContent(ctxt->myDoc, last);
6364             if (ret != NULL)
6365                 xmlFreeDocElementContent(ctxt->myDoc, ret);
6366             return(NULL);
6367         }
6368         GROW;
6369         SKIP_BLANKS;
6370         GROW;
6371         if (RAW == '(') {
6372             int inputid = ctxt->input->id;
6373             /* Recurse on second child */
6374             NEXT;
6375             SKIP_BLANKS;
6376             last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6377                                                           depth + 1);
6378             if (last == NULL) {
6379                 if (ret != NULL)
6380                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6381                 return(NULL);
6382             }
6383             SKIP_BLANKS;
6384         } else {
6385             elem = xmlParseName(ctxt);
6386             if (elem == NULL) {
6387                 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6388                 if (ret != NULL)
6389                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6390                 return(NULL);
6391             }
6392             last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6393             if (last == NULL) {
6394                 if (ret != NULL)
6395                     xmlFreeDocElementContent(ctxt->myDoc, ret);
6396                 return(NULL);
6397             }
6398             if (RAW == '?') {
6399                 last->ocur = XML_ELEMENT_CONTENT_OPT;
6400                 NEXT;
6401             } else if (RAW == '*') {
6402                 last->ocur = XML_ELEMENT_CONTENT_MULT;
6403                 NEXT;
6404             } else if (RAW == '+') {
6405                 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6406                 NEXT;
6407             } else {
6408                 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6409             }
6410         }
6411         SKIP_BLANKS;
6412         GROW;
6413     }
6414     if ((cur != NULL) && (last != NULL)) {
6415         cur->c2 = last;
6416         if (last != NULL)
6417             last->parent = cur;
6418     }
6419     if (ctxt->input->id != inputchk) {
6420         xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6421                        "Element content declaration doesn't start and stop in"
6422                        " the same entity\n");
6423     }
6424     NEXT;
6425     if (RAW == '?') {
6426         if (ret != NULL) {
6427             if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6428                 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6429                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6430             else
6431                 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6432         }
6433         NEXT;
6434     } else if (RAW == '*') {
6435         if (ret != NULL) {
6436             ret->ocur = XML_ELEMENT_CONTENT_MULT;
6437             cur = ret;
6438             /*
6439              * Some normalization:
6440              * (a | b* | c?)* == (a | b | c)*
6441              */
6442             while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6443                 if ((cur->c1 != NULL) &&
6444                     ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6445                      (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6446                     cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6447                 if ((cur->c2 != NULL) &&
6448                     ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6449                      (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6450                     cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6451                 cur = cur->c2;
6452             }
6453         }
6454         NEXT;
6455     } else if (RAW == '+') {
6456         if (ret != NULL) {
6457             int found = 0;
6458
6459             if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6460                 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6461                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6462             else
6463                 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6464             /*
6465              * Some normalization:
6466              * (a | b*)+ == (a | b)*
6467              * (a | b?)+ == (a | b)*
6468              */
6469             while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6470                 if ((cur->c1 != NULL) &&
6471                     ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6472                      (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6473                     cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6474                     found = 1;
6475                 }
6476                 if ((cur->c2 != NULL) &&
6477                     ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6478                      (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6479                     cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6480                     found = 1;
6481                 }
6482                 cur = cur->c2;
6483             }
6484             if (found)
6485                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6486         }
6487         NEXT;
6488     }
6489     return(ret);
6490 }
6491
6492 /**
6493  * xmlParseElementChildrenContentDecl:
6494  * @ctxt:  an XML parser context
6495  * @inputchk:  the input used for the current entity, needed for boundary checks
6496  *
6497  * parse the declaration for a Mixed Element content
6498  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6499  *
6500  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6501  *
6502  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6503  *
6504  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6505  *
6506  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6507  *
6508  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6509  * TODO Parameter-entity replacement text must be properly nested
6510  *      with parenthesized groups. That is to say, if either of the
6511  *      opening or closing parentheses in a choice, seq, or Mixed
6512  *      construct is contained in the replacement text for a parameter
6513  *      entity, both must be contained in the same replacement text. For
6514  *      interoperability, if a parameter-entity reference appears in a
6515  *      choice, seq, or Mixed construct, its replacement text should not
6516  *      be empty, and neither the first nor last non-blank character of
6517  *      the replacement text should be a connector (| or ,).
6518  *
6519  * Returns the tree of xmlElementContentPtr describing the element
6520  *          hierarchy.
6521  */
6522 xmlElementContentPtr
6523 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6524     /* stub left for API/ABI compat */
6525     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6526 }
6527
6528 /**
6529  * xmlParseElementContentDecl:
6530  * @ctxt:  an XML parser context
6531  * @name:  the name of the element being defined.
6532  * @result:  the Element Content pointer will be stored here if any
6533  *
6534  * parse the declaration for an Element content either Mixed or Children,
6535  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6536  *
6537  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6538  *
6539  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6540  */
6541
6542 int
6543 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6544                            xmlElementContentPtr *result) {
6545
6546     xmlElementContentPtr tree = NULL;
6547     int inputid = ctxt->input->id;
6548     int res;
6549
6550     *result = NULL;
6551
6552     if (RAW != '(') {
6553         xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6554                 "xmlParseElementContentDecl : %s '(' expected\n", name);
6555         return(-1);
6556     }
6557     NEXT;
6558     GROW;
6559     if (ctxt->instate == XML_PARSER_EOF)
6560         return(-1);
6561     SKIP_BLANKS;
6562     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6563         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6564         res = XML_ELEMENT_TYPE_MIXED;
6565     } else {
6566         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6567         res = XML_ELEMENT_TYPE_ELEMENT;
6568     }
6569     SKIP_BLANKS;
6570     *result = tree;
6571     return(res);
6572 }
6573
6574 /**
6575  * xmlParseElementDecl:
6576  * @ctxt:  an XML parser context
6577  *
6578  * parse an Element declaration.
6579  *
6580  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6581  *
6582  * [ VC: Unique Element Type Declaration ]
6583  * No element type may be declared more than once
6584  *
6585  * Returns the type of the element, or -1 in case of error
6586  */
6587 int
6588 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6589     const xmlChar *name;
6590     int ret = -1;
6591     xmlElementContentPtr content  = NULL;
6592
6593     /* GROW; done in the caller */
6594     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6595         int inputid = ctxt->input->id;
6596
6597         SKIP(9);
6598         if (SKIP_BLANKS == 0) {
6599             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6600                            "Space required after 'ELEMENT'\n");
6601             return(-1);
6602         }
6603         name = xmlParseName(ctxt);
6604         if (name == NULL) {
6605             xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6606                            "xmlParseElementDecl: no name for Element\n");
6607             return(-1);
6608         }
6609         if (SKIP_BLANKS == 0) {
6610             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6611                            "Space required after the element name\n");
6612         }
6613         if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6614             SKIP(5);
6615             /*
6616              * Element must always be empty.
6617              */
6618             ret = XML_ELEMENT_TYPE_EMPTY;
6619         } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6620                    (NXT(2) == 'Y')) {
6621             SKIP(3);
6622             /*
6623              * Element is a generic container.
6624              */
6625             ret = XML_ELEMENT_TYPE_ANY;
6626         } else if (RAW == '(') {
6627             ret = xmlParseElementContentDecl(ctxt, name, &content);
6628         } else {
6629             /*
6630              * [ WFC: PEs in Internal Subset ] error handling.
6631              */
6632             if ((RAW == '%') && (ctxt->external == 0) &&
6633                 (ctxt->inputNr == 1)) {
6634                 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6635           "PEReference: forbidden within markup decl in internal subset\n");
6636             } else {
6637                 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6638                       "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6639             }
6640             return(-1);
6641         }
6642
6643         SKIP_BLANKS;
6644
6645         if (RAW != '>') {
6646             xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6647             if (content != NULL) {
6648                 xmlFreeDocElementContent(ctxt->myDoc, content);
6649             }
6650         } else {
6651             if (inputid != ctxt->input->id) {
6652                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6653                                "Element declaration doesn't start and stop in"
6654                                " the same entity\n");
6655             }
6656
6657             NEXT;
6658             if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6659                 (ctxt->sax->elementDecl != NULL)) {
6660                 if (content != NULL)
6661                     content->parent = NULL;
6662                 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6663                                        content);
6664                 if ((content != NULL) && (content->parent == NULL)) {
6665                     /*
6666                      * this is a trick: if xmlAddElementDecl is called,
6667                      * instead of copying the full tree it is plugged directly
6668                      * if called from the parser. Avoid duplicating the
6669                      * interfaces or change the API/ABI
6670                      */
6671                     xmlFreeDocElementContent(ctxt->myDoc, content);
6672                 }
6673             } else if (content != NULL) {
6674                 xmlFreeDocElementContent(ctxt->myDoc, content);
6675             }
6676         }
6677     }
6678     return(ret);
6679 }
6680
6681 /**
6682  * xmlParseConditionalSections
6683  * @ctxt:  an XML parser context
6684  *
6685  * [61] conditionalSect ::= includeSect | ignoreSect
6686  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6687  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6688  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6689  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6690  */
6691
6692 static void
6693 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6694     int *inputIds = NULL;
6695     size_t inputIdsSize = 0;
6696     size_t depth = 0;
6697
6698     while (ctxt->instate != XML_PARSER_EOF) {
6699         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6700             int id = ctxt->input->id;
6701
6702             SKIP(3);
6703             SKIP_BLANKS;
6704
6705             if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6706                 SKIP(7);
6707                 SKIP_BLANKS;
6708                 if (RAW != '[') {
6709                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6710                     xmlHaltParser(ctxt);
6711                     goto error;
6712                 }
6713                 if (ctxt->input->id != id) {
6714                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6715                                    "All markup of the conditional section is"
6716                                    " not in the same entity\n");
6717                 }
6718                 NEXT;
6719
6720                 if (inputIdsSize <= depth) {
6721                     int *tmp;
6722
6723                     inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6724                     tmp = (int *) xmlRealloc(inputIds,
6725                             inputIdsSize * sizeof(int));
6726                     if (tmp == NULL) {
6727                         xmlErrMemory(ctxt, NULL);
6728                         goto error;
6729                     }
6730                     inputIds = tmp;
6731                 }
6732                 inputIds[depth] = id;
6733                 depth++;
6734             } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6735                 int state;
6736                 xmlParserInputState instate;
6737                 size_t ignoreDepth = 0;
6738
6739                 SKIP(6);
6740                 SKIP_BLANKS;
6741                 if (RAW != '[') {
6742                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6743                     xmlHaltParser(ctxt);
6744                     goto error;
6745                 }
6746                 if (ctxt->input->id != id) {
6747                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6748                                    "All markup of the conditional section is"
6749                                    " not in the same entity\n");
6750                 }
6751                 NEXT;
6752
6753                 /*
6754                  * Parse up to the end of the conditional section but disable
6755                  * SAX event generating DTD building in the meantime
6756                  */
6757                 state = ctxt->disableSAX;
6758                 instate = ctxt->instate;
6759                 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6760                 ctxt->instate = XML_PARSER_IGNORE;
6761
6762                 while (RAW != 0) {
6763                     if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6764                         SKIP(3);
6765                         ignoreDepth++;
6766                         /* Check for integer overflow */
6767                         if (ignoreDepth == 0) {
6768                             xmlErrMemory(ctxt, NULL);
6769                             goto error;
6770                         }
6771                     } else if ((RAW == ']') && (NXT(1) == ']') &&
6772                                (NXT(2) == '>')) {
6773                         if (ignoreDepth == 0)
6774                             break;
6775                         SKIP(3);
6776                         ignoreDepth--;
6777                     } else {
6778                         NEXT;
6779                     }
6780                 }
6781
6782                 ctxt->disableSAX = state;
6783                 ctxt->instate = instate;
6784
6785                 if (RAW == 0) {
6786                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6787                     goto error;
6788                 }
6789                 if (ctxt->input->id != id) {
6790                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6791                                    "All markup of the conditional section is"
6792                                    " not in the same entity\n");
6793                 }
6794                 SKIP(3);
6795             } else {
6796                 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6797                 xmlHaltParser(ctxt);
6798                 goto error;
6799             }
6800         } else if ((depth > 0) &&
6801                    (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6802             depth--;
6803             if (ctxt->input->id != inputIds[depth]) {
6804                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6805                                "All markup of the conditional section is not"
6806                                " in the same entity\n");
6807             }
6808             SKIP(3);
6809         } else {
6810             int id = ctxt->input->id;
6811             unsigned long cons = CUR_CONSUMED;
6812
6813             xmlParseMarkupDecl(ctxt);
6814
6815             if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6816                 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6817                 xmlHaltParser(ctxt);
6818                 goto error;
6819             }
6820         }
6821
6822         if (depth == 0)
6823             break;
6824
6825         SKIP_BLANKS;
6826         GROW;
6827     }
6828
6829 error:
6830     xmlFree(inputIds);
6831 }
6832
6833 /**
6834  * xmlParseMarkupDecl:
6835  * @ctxt:  an XML parser context
6836  *
6837  * parse Markup declarations
6838  *
6839  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6840  *                     NotationDecl | PI | Comment
6841  *
6842  * [ VC: Proper Declaration/PE Nesting ]
6843  * Parameter-entity replacement text must be properly nested with
6844  * markup declarations. That is to say, if either the first character
6845  * or the last character of a markup declaration (markupdecl above) is
6846  * contained in the replacement text for a parameter-entity reference,
6847  * both must be contained in the same replacement text.
6848  *
6849  * [ WFC: PEs in Internal Subset ]
6850  * In the internal DTD subset, parameter-entity references can occur
6851  * only where markup declarations can occur, not within markup declarations.
6852  * (This does not apply to references that occur in external parameter
6853  * entities or to the external subset.)
6854  */
6855 void
6856 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6857     GROW;
6858     if (CUR == '<') {
6859         if (NXT(1) == '!') {
6860             switch (NXT(2)) {
6861                 case 'E':
6862                     if (NXT(3) == 'L')
6863                         xmlParseElementDecl(ctxt);
6864                     else if (NXT(3) == 'N')
6865                         xmlParseEntityDecl(ctxt);
6866                     break;
6867                 case 'A':
6868                     xmlParseAttributeListDecl(ctxt);
6869                     break;
6870                 case 'N':
6871                     xmlParseNotationDecl(ctxt);
6872                     break;
6873                 case '-':
6874                     xmlParseComment(ctxt);
6875                     break;
6876                 default:
6877                     /* there is an error but it will be detected later */
6878                     break;
6879             }
6880         } else if (NXT(1) == '?') {
6881             xmlParsePI(ctxt);
6882         }
6883     }
6884
6885     /*
6886      * detect requirement to exit there and act accordingly
6887      * and avoid having instate overridden later on
6888      */
6889     if (ctxt->instate == XML_PARSER_EOF)
6890         return;
6891
6892     ctxt->instate = XML_PARSER_DTD;
6893 }
6894
6895 /**
6896  * xmlParseTextDecl:
6897  * @ctxt:  an XML parser context
6898  *
6899  * parse an XML declaration header for external entities
6900  *
6901  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6902  */
6903
6904 void
6905 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6906     xmlChar *version;
6907     const xmlChar *encoding;
6908     int oldstate;
6909
6910     /*
6911      * We know that '<?xml' is here.
6912      */
6913     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6914         SKIP(5);
6915     } else {
6916         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6917         return;
6918     }
6919
6920     /* Avoid expansion of parameter entities when skipping blanks. */
6921     oldstate = ctxt->instate;
6922     ctxt->instate = XML_PARSER_START;
6923
6924     if (SKIP_BLANKS == 0) {
6925         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6926                        "Space needed after '<?xml'\n");
6927     }
6928
6929     /*
6930      * We may have the VersionInfo here.
6931      */
6932     version = xmlParseVersionInfo(ctxt);
6933     if (version == NULL)
6934         version = xmlCharStrdup(XML_DEFAULT_VERSION);
6935     else {
6936         if (SKIP_BLANKS == 0) {
6937             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6938                            "Space needed here\n");
6939         }
6940     }
6941     ctxt->input->version = version;
6942
6943     /*
6944      * We must have the encoding declaration
6945      */
6946     encoding = xmlParseEncodingDecl(ctxt);
6947     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6948         /*
6949          * The XML REC instructs us to stop parsing right here
6950          */
6951         ctxt->instate = oldstate;
6952         return;
6953     }
6954     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6955         xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6956                        "Missing encoding in text declaration\n");
6957     }
6958
6959     SKIP_BLANKS;
6960     if ((RAW == '?') && (NXT(1) == '>')) {
6961         SKIP(2);
6962     } else if (RAW == '>') {
6963         /* Deprecated old WD ... */
6964         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6965         NEXT;
6966     } else {
6967         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6968         MOVETO_ENDTAG(CUR_PTR);
6969         NEXT;
6970     }
6971
6972     ctxt->instate = oldstate;
6973 }
6974
6975 /**
6976  * xmlParseExternalSubset:
6977  * @ctxt:  an XML parser context
6978  * @ExternalID: the external identifier
6979  * @SystemID: the system identifier (or URL)
6980  *
6981  * parse Markup declarations from an external subset
6982  *
6983  * [30] extSubset ::= textDecl? extSubsetDecl
6984  *
6985  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6986  */
6987 void
6988 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6989                        const xmlChar *SystemID) {
6990     xmlDetectSAX2(ctxt);
6991     GROW;
6992
6993     if ((ctxt->encoding == NULL) &&
6994         (ctxt->input->end - ctxt->input->cur >= 4)) {
6995         xmlChar start[4];
6996         xmlCharEncoding enc;
6997
6998         start[0] = RAW;
6999         start[1] = NXT(1);
7000         start[2] = NXT(2);
7001         start[3] = NXT(3);
7002         enc = xmlDetectCharEncoding(start, 4);
7003         if (enc != XML_CHAR_ENCODING_NONE)
7004             xmlSwitchEncoding(ctxt, enc);
7005     }
7006
7007     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7008         xmlParseTextDecl(ctxt);
7009         if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7010             /*
7011              * The XML REC instructs us to stop parsing right here
7012              */
7013             xmlHaltParser(ctxt);
7014             return;
7015         }
7016     }
7017     if (ctxt->myDoc == NULL) {
7018         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7019         if (ctxt->myDoc == NULL) {
7020             xmlErrMemory(ctxt, "New Doc failed");
7021             return;
7022         }
7023         ctxt->myDoc->properties = XML_DOC_INTERNAL;
7024     }
7025     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7026         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7027
7028     ctxt->instate = XML_PARSER_DTD;
7029     ctxt->external = 1;
7030     SKIP_BLANKS;
7031     while (((RAW == '<') && (NXT(1) == '?')) ||
7032            ((RAW == '<') && (NXT(1) == '!')) ||
7033            (RAW == '%')) {
7034         int id = ctxt->input->id;
7035         unsigned long cons = CUR_CONSUMED;
7036
7037         GROW;
7038         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7039             xmlParseConditionalSections(ctxt);
7040         } else
7041             xmlParseMarkupDecl(ctxt);
7042         SKIP_BLANKS;
7043
7044         if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7045             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7046             break;
7047         }
7048     }
7049
7050     if (RAW != 0) {
7051         xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7052     }
7053
7054 }
7055
7056 /**
7057  * xmlParseReference:
7058  * @ctxt:  an XML parser context
7059  *
7060  * parse and handle entity references in content, depending on the SAX
7061  * interface, this may end-up in a call to character() if this is a
7062  * CharRef, a predefined entity, if there is no reference() callback.
7063  * or if the parser was asked to switch to that mode.
7064  *
7065  * [67] Reference ::= EntityRef | CharRef
7066  */
7067 void
7068 xmlParseReference(xmlParserCtxtPtr ctxt) {
7069     xmlEntityPtr ent;
7070     xmlChar *val;
7071     int was_checked;
7072     xmlNodePtr list = NULL;
7073     xmlParserErrors ret = XML_ERR_OK;
7074
7075
7076     if (RAW != '&')
7077         return;
7078
7079     /*
7080      * Simple case of a CharRef
7081      */
7082     if (NXT(1) == '#') {
7083         int i = 0;
7084         xmlChar out[16];
7085         int hex = NXT(2);
7086         int value = xmlParseCharRef(ctxt);
7087
7088         if (value == 0)
7089             return;
7090         if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7091             /*
7092              * So we are using non-UTF-8 buffers
7093              * Check that the char fit on 8bits, if not
7094              * generate a CharRef.
7095              */
7096             if (value <= 0xFF) {
7097                 out[0] = value;
7098                 out[1] = 0;
7099                 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7100                     (!ctxt->disableSAX))
7101                     ctxt->sax->characters(ctxt->userData, out, 1);
7102             } else {
7103                 if ((hex == 'x') || (hex == 'X'))
7104                     snprintf((char *)out, sizeof(out), "#x%X", value);
7105                 else
7106                     snprintf((char *)out, sizeof(out), "#%d", value);
7107                 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7108                     (!ctxt->disableSAX))
7109                     ctxt->sax->reference(ctxt->userData, out);
7110             }
7111         } else {
7112             /*
7113              * Just encode the value in UTF-8
7114              */
7115             COPY_BUF(0 ,out, i, value);
7116             out[i] = 0;
7117             if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7118                 (!ctxt->disableSAX))
7119                 ctxt->sax->characters(ctxt->userData, out, i);
7120         }
7121         return;
7122     }
7123
7124     /*
7125      * We are seeing an entity reference
7126      */
7127     ent = xmlParseEntityRef(ctxt);
7128     if (ent == NULL) return;
7129     if (!ctxt->wellFormed)
7130         return;
7131     was_checked = ent->checked;
7132
7133     /* special case of predefined entities */
7134     if ((ent->name == NULL) ||
7135         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7136         val = ent->content;
7137         if (val == NULL) return;
7138         /*
7139          * inline the entity.
7140          */
7141         if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7142             (!ctxt->disableSAX))
7143             ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7144         return;
7145     }
7146
7147     /*
7148      * The first reference to the entity trigger a parsing phase
7149      * where the ent->children is filled with the result from
7150      * the parsing.
7151      * Note: external parsed entities will not be loaded, it is not
7152      * required for a non-validating parser, unless the parsing option
7153      * of validating, or substituting entities were given. Doing so is
7154      * far more secure as the parser will only process data coming from
7155      * the document entity by default.
7156      */
7157     if (((ent->checked == 0) ||
7158          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7159         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7160          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7161         unsigned long oldnbent = ctxt->nbentities, diff;
7162
7163         /*
7164          * This is a bit hackish but this seems the best
7165          * way to make sure both SAX and DOM entity support
7166          * behaves okay.
7167          */
7168         void *user_data;
7169         if (ctxt->userData == ctxt)
7170             user_data = NULL;
7171         else
7172             user_data = ctxt->userData;
7173
7174         /*
7175          * Check that this entity is well formed
7176          * 4.3.2: An internal general parsed entity is well-formed
7177          * if its replacement text matches the production labeled
7178          * content.
7179          */
7180         if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7181             ctxt->depth++;
7182             ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7183                                                       user_data, &list);
7184             ctxt->depth--;
7185
7186         } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7187             ctxt->depth++;
7188             ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7189                                            user_data, ctxt->depth, ent->URI,
7190                                            ent->ExternalID, &list);
7191             ctxt->depth--;
7192         } else {
7193             ret = XML_ERR_ENTITY_PE_INTERNAL;
7194             xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7195                          "invalid entity type found\n", NULL);
7196         }
7197
7198         /*
7199          * Store the number of entities needing parsing for this entity
7200          * content and do checkings
7201          */
7202         diff = ctxt->nbentities - oldnbent + 1;
7203         if (diff > INT_MAX / 2)
7204             diff = INT_MAX / 2;
7205         ent->checked = diff * 2;
7206         if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7207             ent->checked |= 1;
7208         if (ret == XML_ERR_ENTITY_LOOP) {
7209             xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7210             xmlHaltParser(ctxt);
7211             xmlFreeNodeList(list);
7212             return;
7213         }
7214         if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7215             xmlFreeNodeList(list);
7216             return;
7217         }
7218
7219         if ((ret == XML_ERR_OK) && (list != NULL)) {
7220             if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7221              (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7222                 (ent->children == NULL)) {
7223                 ent->children = list;
7224                 /*
7225                  * Prune it directly in the generated document
7226                  * except for single text nodes.
7227                  */
7228                 if ((ctxt->replaceEntities == 0) ||
7229                     (ctxt->parseMode == XML_PARSE_READER) ||
7230                     ((list->type == XML_TEXT_NODE) &&
7231                      (list->next == NULL))) {
7232                     ent->owner = 1;
7233                     while (list != NULL) {
7234                         list->parent = (xmlNodePtr) ent;
7235                         if (list->doc != ent->doc)
7236                             xmlSetTreeDoc(list, ent->doc);
7237                         if (list->next == NULL)
7238                             ent->last = list;
7239                         list = list->next;
7240                     }
7241                     list = NULL;
7242                 } else {
7243                     ent->owner = 0;
7244                     while (list != NULL) {
7245                         list->parent = (xmlNodePtr) ctxt->node;
7246                         list->doc = ctxt->myDoc;
7247                         if (list->next == NULL)
7248                             ent->last = list;
7249                         list = list->next;
7250                     }
7251                     list = ent->children;
7252 #ifdef LIBXML_LEGACY_ENABLED
7253                     if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7254                         xmlAddEntityReference(ent, list, NULL);
7255 #endif /* LIBXML_LEGACY_ENABLED */
7256                 }
7257             } else {
7258                 xmlFreeNodeList(list);
7259                 list = NULL;
7260             }
7261         } else if ((ret != XML_ERR_OK) &&
7262                    (ret != XML_WAR_UNDECLARED_ENTITY)) {
7263             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7264                      "Entity '%s' failed to parse\n", ent->name);
7265             if (ent->content != NULL)
7266                 ent->content[0] = 0;
7267             xmlParserEntityCheck(ctxt, 0, ent, 0);
7268         } else if (list != NULL) {
7269             xmlFreeNodeList(list);
7270             list = NULL;
7271         }
7272         if (ent->checked == 0)
7273             ent->checked = 2;
7274
7275         /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7276         was_checked = 0;
7277     } else if (ent->checked != 1) {
7278         ctxt->nbentities += ent->checked / 2;
7279     }
7280
7281     /*
7282      * Now that the entity content has been gathered
7283      * provide it to the application, this can take different forms based
7284      * on the parsing modes.
7285      */
7286     if (ent->children == NULL) {
7287         /*
7288          * Probably running in SAX mode and the callbacks don't
7289          * build the entity content. So unless we already went
7290          * though parsing for first checking go though the entity
7291          * content to generate callbacks associated to the entity
7292          */
7293         if (was_checked != 0) {
7294             void *user_data;
7295             /*
7296              * This is a bit hackish but this seems the best
7297              * way to make sure both SAX and DOM entity support
7298              * behaves okay.
7299              */
7300             if (ctxt->userData == ctxt)
7301                 user_data = NULL;
7302             else
7303                 user_data = ctxt->userData;
7304
7305             if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7306                 ctxt->depth++;
7307                 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7308                                    ent->content, user_data, NULL);
7309                 ctxt->depth--;
7310             } else if (ent->etype ==
7311                        XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7312                 ctxt->depth++;
7313                 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7314                            ctxt->sax, user_data, ctxt->depth,
7315                            ent->URI, ent->ExternalID, NULL);
7316                 ctxt->depth--;
7317             } else {
7318                 ret = XML_ERR_ENTITY_PE_INTERNAL;
7319                 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7320                              "invalid entity type found\n", NULL);
7321             }
7322             if (ret == XML_ERR_ENTITY_LOOP) {
7323                 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7324                 return;
7325             }
7326         }
7327         if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7328             (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7329             /*
7330              * Entity reference callback comes second, it's somewhat
7331              * superfluous but a compatibility to historical behaviour
7332              */
7333             ctxt->sax->reference(ctxt->userData, ent->name);
7334         }
7335         return;
7336     }
7337
7338     /*
7339      * If we didn't get any children for the entity being built
7340      */
7341     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7342         (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7343         /*
7344          * Create a node.
7345          */
7346         ctxt->sax->reference(ctxt->userData, ent->name);
7347         return;
7348     }
7349
7350     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7351         /*
7352          * There is a problem on the handling of _private for entities
7353          * (bug 155816): Should we copy the content of the field from
7354          * the entity (possibly overwriting some value set by the user
7355          * when a copy is created), should we leave it alone, or should
7356          * we try to take care of different situations?  The problem
7357          * is exacerbated by the usage of this field by the xmlReader.
7358          * To fix this bug, we look at _private on the created node
7359          * and, if it's NULL, we copy in whatever was in the entity.
7360          * If it's not NULL we leave it alone.  This is somewhat of a
7361          * hack - maybe we should have further tests to determine
7362          * what to do.
7363          */
7364         if ((ctxt->node != NULL) && (ent->children != NULL)) {
7365             /*
7366              * Seems we are generating the DOM content, do
7367              * a simple tree copy for all references except the first
7368              * In the first occurrence list contains the replacement.
7369              */
7370             if (((list == NULL) && (ent->owner == 0)) ||
7371                 (ctxt->parseMode == XML_PARSE_READER)) {
7372                 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7373
7374                 /*
7375                  * We are copying here, make sure there is no abuse
7376                  */
7377                 ctxt->sizeentcopy += ent->length + 5;
7378                 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7379                     return;
7380
7381                 /*
7382                  * when operating on a reader, the entities definitions
7383                  * are always owning the entities subtree.
7384                 if (ctxt->parseMode == XML_PARSE_READER)
7385                     ent->owner = 1;
7386                  */
7387
7388                 cur = ent->children;
7389                 while (cur != NULL) {
7390                     nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7391                     if (nw != NULL) {
7392                         if (nw->_private == NULL)
7393                             nw->_private = cur->_private;
7394                         if (firstChild == NULL){
7395                             firstChild = nw;
7396                         }
7397                         nw = xmlAddChild(ctxt->node, nw);
7398                     }
7399                     if (cur == ent->last) {
7400                         /*
7401                          * needed to detect some strange empty
7402                          * node cases in the reader tests
7403                          */
7404                         if ((ctxt->parseMode == XML_PARSE_READER) &&
7405                             (nw != NULL) &&
7406                             (nw->type == XML_ELEMENT_NODE) &&
7407                             (nw->children == NULL))
7408                             nw->extra = 1;
7409
7410                         break;
7411                     }
7412                     cur = cur->next;
7413                 }
7414 #ifdef LIBXML_LEGACY_ENABLED
7415                 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7416                   xmlAddEntityReference(ent, firstChild, nw);
7417 #endif /* LIBXML_LEGACY_ENABLED */
7418             } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7419                 xmlNodePtr nw = NULL, cur, next, last,
7420                            firstChild = NULL;
7421
7422                 /*
7423                  * We are copying here, make sure there is no abuse
7424                  */
7425                 ctxt->sizeentcopy += ent->length + 5;
7426                 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7427                     return;
7428
7429                 /*
7430                  * Copy the entity child list and make it the new
7431                  * entity child list. The goal is to make sure any
7432                  * ID or REF referenced will be the one from the
7433                  * document content and not the entity copy.
7434                  */
7435                 cur = ent->children;
7436                 ent->children = NULL;
7437                 last = ent->last;
7438                 ent->last = NULL;
7439                 while (cur != NULL) {
7440                     next = cur->next;
7441                     cur->next = NULL;
7442                     cur->parent = NULL;
7443                     nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7444                     if (nw != NULL) {
7445                         if (nw->_private == NULL)
7446                             nw->_private = cur->_private;
7447                         if (firstChild == NULL){
7448                             firstChild = cur;
7449                         }
7450                         xmlAddChild((xmlNodePtr) ent, nw);
7451                         xmlAddChild(ctxt->node, cur);
7452                     }
7453                     if (cur == last)
7454                         break;
7455                     cur = next;
7456                 }
7457                 if (ent->owner == 0)
7458                     ent->owner = 1;
7459 #ifdef LIBXML_LEGACY_ENABLED
7460                 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7461                   xmlAddEntityReference(ent, firstChild, nw);
7462 #endif /* LIBXML_LEGACY_ENABLED */
7463             } else {
7464                 const xmlChar *nbktext;
7465
7466                 /*
7467                  * the name change is to avoid coalescing of the
7468                  * node with a possible previous text one which
7469                  * would make ent->children a dangling pointer
7470                  */
7471                 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7472                                         -1);
7473                 if (ent->children->type == XML_TEXT_NODE)
7474                     ent->children->name = nbktext;
7475                 if ((ent->last != ent->children) &&
7476                     (ent->last->type == XML_TEXT_NODE))
7477                     ent->last->name = nbktext;
7478                 xmlAddChildList(ctxt->node, ent->children);
7479             }
7480
7481             /*
7482              * This is to avoid a nasty side effect, see
7483              * characters() in SAX.c
7484              */
7485             ctxt->nodemem = 0;
7486             ctxt->nodelen = 0;
7487             return;
7488         }
7489     }
7490 }
7491
7492 /**
7493  * xmlParseEntityRef:
7494  * @ctxt:  an XML parser context
7495  *
7496  * parse ENTITY references declarations
7497  *
7498  * [68] EntityRef ::= '&' Name ';'
7499  *
7500  * [ WFC: Entity Declared ]
7501  * In a document without any DTD, a document with only an internal DTD
7502  * subset which contains no parameter entity references, or a document
7503  * with "standalone='yes'", the Name given in the entity reference
7504  * must match that in an entity declaration, except that well-formed
7505  * documents need not declare any of the following entities: amp, lt,
7506  * gt, apos, quot.  The declaration of a parameter entity must precede
7507  * any reference to it.  Similarly, the declaration of a general entity
7508  * must precede any reference to it which appears in a default value in an
7509  * attribute-list declaration. Note that if entities are declared in the
7510  * external subset or in external parameter entities, a non-validating
7511  * processor is not obligated to read and process their declarations;
7512  * for such documents, the rule that an entity must be declared is a
7513  * well-formedness constraint only if standalone='yes'.
7514  *
7515  * [ WFC: Parsed Entity ]
7516  * An entity reference must not contain the name of an unparsed entity
7517  *
7518  * Returns the xmlEntityPtr if found, or NULL otherwise.
7519  */
7520 xmlEntityPtr
7521 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7522     const xmlChar *name;
7523     xmlEntityPtr ent = NULL;
7524
7525     GROW;
7526     if (ctxt->instate == XML_PARSER_EOF)
7527         return(NULL);
7528
7529     if (RAW != '&')
7530         return(NULL);
7531     NEXT;
7532     name = xmlParseName(ctxt);
7533     if (name == NULL) {
7534         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7535                        "xmlParseEntityRef: no name\n");
7536         return(NULL);
7537     }
7538     if (RAW != ';') {
7539         xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7540         return(NULL);
7541     }
7542     NEXT;
7543
7544     /*
7545      * Predefined entities override any extra definition
7546      */
7547     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7548         ent = xmlGetPredefinedEntity(name);
7549         if (ent != NULL)
7550             return(ent);
7551     }
7552
7553     /*
7554      * Increase the number of entity references parsed
7555      */
7556     ctxt->nbentities++;
7557
7558     /*
7559      * Ask first SAX for entity resolution, otherwise try the
7560      * entities which may have stored in the parser context.
7561      */
7562     if (ctxt->sax != NULL) {
7563         if (ctxt->sax->getEntity != NULL)
7564             ent = ctxt->sax->getEntity(ctxt->userData, name);
7565         if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7566             (ctxt->options & XML_PARSE_OLDSAX))
7567             ent = xmlGetPredefinedEntity(name);
7568         if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7569             (ctxt->userData==ctxt)) {
7570             ent = xmlSAX2GetEntity(ctxt, name);
7571         }
7572     }
7573     if (ctxt->instate == XML_PARSER_EOF)
7574         return(NULL);
7575     /*
7576      * [ WFC: Entity Declared ]
7577      * In a document without any DTD, a document with only an
7578      * internal DTD subset which contains no parameter entity
7579      * references, or a document with "standalone='yes'", the
7580      * Name given in the entity reference must match that in an
7581      * entity declaration, except that well-formed documents
7582      * need not declare any of the following entities: amp, lt,
7583      * gt, apos, quot.
7584      * The declaration of a parameter entity must precede any
7585      * reference to it.
7586      * Similarly, the declaration of a general entity must
7587      * precede any reference to it which appears in a default
7588      * value in an attribute-list declaration. Note that if
7589      * entities are declared in the external subset or in
7590      * external parameter entities, a non-validating processor
7591      * is not obligated to read and process their declarations;
7592      * for such documents, the rule that an entity must be
7593      * declared is a well-formedness constraint only if
7594      * standalone='yes'.
7595      */
7596     if (ent == NULL) {
7597         if ((ctxt->standalone == 1) ||
7598             ((ctxt->hasExternalSubset == 0) &&
7599              (ctxt->hasPErefs == 0))) {
7600             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7601                      "Entity '%s' not defined\n", name);
7602         } else {
7603             xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7604                      "Entity '%s' not defined\n", name);
7605             if ((ctxt->inSubset == 0) &&
7606                 (ctxt->sax != NULL) &&
7607                 (ctxt->sax->reference != NULL)) {
7608                 ctxt->sax->reference(ctxt->userData, name);
7609             }
7610         }
7611         xmlParserEntityCheck(ctxt, 0, ent, 0);
7612         ctxt->valid = 0;
7613     }
7614
7615     /*
7616      * [ WFC: Parsed Entity ]
7617      * An entity reference must not contain the name of an
7618      * unparsed entity
7619      */
7620     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7621         xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7622                  "Entity reference to unparsed entity %s\n", name);
7623     }
7624
7625     /*
7626      * [ WFC: No External Entity References ]
7627      * Attribute values cannot contain direct or indirect
7628      * entity references to external entities.
7629      */
7630     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7631              (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7632         xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7633              "Attribute references external entity '%s'\n", name);
7634     }
7635     /*
7636      * [ WFC: No < in Attribute Values ]
7637      * The replacement text of any entity referred to directly or
7638      * indirectly in an attribute value (other than "&lt;") must
7639      * not contain a <.
7640      */
7641     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7642              (ent != NULL) &&
7643              (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7644         if (((ent->checked & 1) || (ent->checked == 0)) &&
7645              (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7646             xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7647         "'<' in entity '%s' is not allowed in attributes values\n", name);
7648         }
7649     }
7650
7651     /*
7652      * Internal check, no parameter entities here ...
7653      */
7654     else {
7655         switch (ent->etype) {
7656             case XML_INTERNAL_PARAMETER_ENTITY:
7657             case XML_EXTERNAL_PARAMETER_ENTITY:
7658             xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7659              "Attempt to reference the parameter entity '%s'\n",
7660                               name);
7661             break;
7662             default:
7663             break;
7664         }
7665     }
7666
7667     /*
7668      * [ WFC: No Recursion ]
7669      * A parsed entity must not contain a recursive reference
7670      * to itself, either directly or indirectly.
7671      * Done somewhere else
7672      */
7673     return(ent);
7674 }
7675
7676 /**
7677  * xmlParseStringEntityRef:
7678  * @ctxt:  an XML parser context
7679  * @str:  a pointer to an index in the string
7680  *
7681  * parse ENTITY references declarations, but this version parses it from
7682  * a string value.
7683  *
7684  * [68] EntityRef ::= '&' Name ';'
7685  *
7686  * [ WFC: Entity Declared ]
7687  * In a document without any DTD, a document with only an internal DTD
7688  * subset which contains no parameter entity references, or a document
7689  * with "standalone='yes'", the Name given in the entity reference
7690  * must match that in an entity declaration, except that well-formed
7691  * documents need not declare any of the following entities: amp, lt,
7692  * gt, apos, quot.  The declaration of a parameter entity must precede
7693  * any reference to it.  Similarly, the declaration of a general entity
7694  * must precede any reference to it which appears in a default value in an
7695  * attribute-list declaration. Note that if entities are declared in the
7696  * external subset or in external parameter entities, a non-validating
7697  * processor is not obligated to read and process their declarations;
7698  * for such documents, the rule that an entity must be declared is a
7699  * well-formedness constraint only if standalone='yes'.
7700  *
7701  * [ WFC: Parsed Entity ]
7702  * An entity reference must not contain the name of an unparsed entity
7703  *
7704  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7705  * is updated to the current location in the string.
7706  */
7707 static xmlEntityPtr
7708 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7709     xmlChar *name;
7710     const xmlChar *ptr;
7711     xmlChar cur;
7712     xmlEntityPtr ent = NULL;
7713
7714     if ((str == NULL) || (*str == NULL))
7715         return(NULL);
7716     ptr = *str;
7717     cur = *ptr;
7718     if (cur != '&')
7719         return(NULL);
7720
7721     ptr++;
7722     name = xmlParseStringName(ctxt, &ptr);
7723     if (name == NULL) {
7724         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7725                        "xmlParseStringEntityRef: no name\n");
7726         *str = ptr;
7727         return(NULL);
7728     }
7729     if (*ptr != ';') {
7730         xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7731         xmlFree(name);
7732         *str = ptr;
7733         return(NULL);
7734     }
7735     ptr++;
7736
7737
7738     /*
7739      * Predefined entities override any extra definition
7740      */
7741     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7742         ent = xmlGetPredefinedEntity(name);
7743         if (ent != NULL) {
7744             xmlFree(name);
7745             *str = ptr;
7746             return(ent);
7747         }
7748     }
7749
7750     /*
7751      * Increase the number of entity references parsed
7752      */
7753     ctxt->nbentities++;
7754
7755     /*
7756      * Ask first SAX for entity resolution, otherwise try the
7757      * entities which may have stored in the parser context.
7758      */
7759     if (ctxt->sax != NULL) {
7760         if (ctxt->sax->getEntity != NULL)
7761             ent = ctxt->sax->getEntity(ctxt->userData, name);
7762         if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7763             ent = xmlGetPredefinedEntity(name);
7764         if ((ent == NULL) && (ctxt->userData==ctxt)) {
7765             ent = xmlSAX2GetEntity(ctxt, name);
7766         }
7767     }
7768     if (ctxt->instate == XML_PARSER_EOF) {
7769         xmlFree(name);
7770         return(NULL);
7771     }
7772
7773     /*
7774      * [ WFC: Entity Declared ]
7775      * In a document without any DTD, a document with only an
7776      * internal DTD subset which contains no parameter entity
7777      * references, or a document with "standalone='yes'", the
7778      * Name given in the entity reference must match that in an
7779      * entity declaration, except that well-formed documents
7780      * need not declare any of the following entities: amp, lt,
7781      * gt, apos, quot.
7782      * The declaration of a parameter entity must precede any
7783      * reference to it.
7784      * Similarly, the declaration of a general entity must
7785      * precede any reference to it which appears in a default
7786      * value in an attribute-list declaration. Note that if
7787      * entities are declared in the external subset or in
7788      * external parameter entities, a non-validating processor
7789      * is not obligated to read and process their declarations;
7790      * for such documents, the rule that an entity must be
7791      * declared is a well-formedness constraint only if
7792      * standalone='yes'.
7793      */
7794     if (ent == NULL) {
7795         if ((ctxt->standalone == 1) ||
7796             ((ctxt->hasExternalSubset == 0) &&
7797              (ctxt->hasPErefs == 0))) {
7798             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7799                      "Entity '%s' not defined\n", name);
7800         } else {
7801             xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7802                           "Entity '%s' not defined\n",
7803                           name);
7804         }
7805         xmlParserEntityCheck(ctxt, 0, ent, 0);
7806         /* TODO ? check regressions ctxt->valid = 0; */
7807     }
7808
7809     /*
7810      * [ WFC: Parsed Entity ]
7811      * An entity reference must not contain the name of an
7812      * unparsed entity
7813      */
7814     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7815         xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7816                  "Entity reference to unparsed entity %s\n", name);
7817     }
7818
7819     /*
7820      * [ WFC: No External Entity References ]
7821      * Attribute values cannot contain direct or indirect
7822      * entity references to external entities.
7823      */
7824     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7825              (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7826         xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7827          "Attribute references external entity '%s'\n", name);
7828     }
7829     /*
7830      * [ WFC: No < in Attribute Values ]
7831      * The replacement text of any entity referred to directly or
7832      * indirectly in an attribute value (other than "&lt;") must
7833      * not contain a <.
7834      */
7835     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7836              (ent != NULL) && (ent->content != NULL) &&
7837              (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7838              (xmlStrchr(ent->content, '<'))) {
7839         xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7840      "'<' in entity '%s' is not allowed in attributes values\n",
7841                           name);
7842     }
7843
7844     /*
7845      * Internal check, no parameter entities here ...
7846      */
7847     else {
7848         switch (ent->etype) {
7849             case XML_INTERNAL_PARAMETER_ENTITY:
7850             case XML_EXTERNAL_PARAMETER_ENTITY:
7851                 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7852              "Attempt to reference the parameter entity '%s'\n",
7853                                   name);
7854             break;
7855             default:
7856             break;
7857         }
7858     }
7859
7860     /*
7861      * [ WFC: No Recursion ]
7862      * A parsed entity must not contain a recursive reference
7863      * to itself, either directly or indirectly.
7864      * Done somewhere else
7865      */
7866
7867     xmlFree(name);
7868     *str = ptr;
7869     return(ent);
7870 }
7871
7872 /**
7873  * xmlParsePEReference:
7874  * @ctxt:  an XML parser context
7875  *
7876  * parse PEReference declarations
7877  * The entity content is handled directly by pushing it's content as
7878  * a new input stream.
7879  *
7880  * [69] PEReference ::= '%' Name ';'
7881  *
7882  * [ WFC: No Recursion ]
7883  * A parsed entity must not contain a recursive
7884  * reference to itself, either directly or indirectly.
7885  *
7886  * [ WFC: Entity Declared ]
7887  * In a document without any DTD, a document with only an internal DTD
7888  * subset which contains no parameter entity references, or a document
7889  * with "standalone='yes'", ...  ... The declaration of a parameter
7890  * entity must precede any reference to it...
7891  *
7892  * [ VC: Entity Declared ]
7893  * In a document with an external subset or external parameter entities
7894  * with "standalone='no'", ...  ... The declaration of a parameter entity
7895  * must precede any reference to it...
7896  *
7897  * [ WFC: In DTD ]
7898  * Parameter-entity references may only appear in the DTD.
7899  * NOTE: misleading but this is handled.
7900  */
7901 void
7902 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7903 {
7904     const xmlChar *name;
7905     xmlEntityPtr entity = NULL;
7906     xmlParserInputPtr input;
7907
7908     if (RAW != '%')
7909         return;
7910     NEXT;
7911     name = xmlParseName(ctxt);
7912     if (name == NULL) {
7913         xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7914         return;
7915     }
7916     if (xmlParserDebugEntities)
7917         xmlGenericError(xmlGenericErrorContext,
7918                 "PEReference: %s\n", name);
7919     if (RAW != ';') {
7920         xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7921         return;
7922     }
7923
7924     NEXT;
7925
7926     /*
7927      * Increase the number of entity references parsed
7928      */
7929     ctxt->nbentities++;
7930
7931     /*
7932      * Request the entity from SAX
7933      */
7934     if ((ctxt->sax != NULL) &&
7935         (ctxt->sax->getParameterEntity != NULL))
7936         entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7937     if (ctxt->instate == XML_PARSER_EOF)
7938         return;
7939     if (entity == NULL) {
7940         /*
7941          * [ WFC: Entity Declared ]
7942          * In a document without any DTD, a document with only an
7943          * internal DTD subset which contains no parameter entity
7944          * references, or a document with "standalone='yes'", ...
7945          * ... The declaration of a parameter entity must precede
7946          * any reference to it...
7947          */
7948         if ((ctxt->standalone == 1) ||
7949             ((ctxt->hasExternalSubset == 0) &&
7950              (ctxt->hasPErefs == 0))) {
7951             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7952                               "PEReference: %%%s; not found\n",
7953                               name);
7954         } else {
7955             /*
7956              * [ VC: Entity Declared ]
7957              * In a document with an external subset or external
7958              * parameter entities with "standalone='no'", ...
7959              * ... The declaration of a parameter entity must
7960              * precede any reference to it...
7961              */
7962             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7963                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7964                                  "PEReference: %%%s; not found\n",
7965                                  name, NULL);
7966             } else
7967                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7968                               "PEReference: %%%s; not found\n",
7969                               name, NULL);
7970             ctxt->valid = 0;
7971         }
7972         xmlParserEntityCheck(ctxt, 0, NULL, 0);
7973     } else {
7974         /*
7975          * Internal checking in case the entity quest barfed
7976          */
7977         if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7978             (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7979             xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7980                   "Internal: %%%s; is not a parameter entity\n",
7981                           name, NULL);
7982         } else {
7983             xmlChar start[4];
7984             xmlCharEncoding enc;
7985
7986             if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7987                 return;
7988
7989             if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7990                 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7991                 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7992                 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7993                 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7994                 (ctxt->replaceEntities == 0) &&
7995                 (ctxt->validate == 0))
7996                 return;
7997
7998             input = xmlNewEntityInputStream(ctxt, entity);
7999             if (xmlPushInput(ctxt, input) < 0) {
8000                 xmlFreeInputStream(input);
8001                 return;
8002             }
8003
8004             if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8005                 /*
8006                  * Get the 4 first bytes and decode the charset
8007                  * if enc != XML_CHAR_ENCODING_NONE
8008                  * plug some encoding conversion routines.
8009                  * Note that, since we may have some non-UTF8
8010                  * encoding (like UTF16, bug 135229), the 'length'
8011                  * is not known, but we can calculate based upon
8012                  * the amount of data in the buffer.
8013                  */
8014                 GROW
8015                 if (ctxt->instate == XML_PARSER_EOF)
8016                     return;
8017                 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8018                     start[0] = RAW;
8019                     start[1] = NXT(1);
8020                     start[2] = NXT(2);
8021                     start[3] = NXT(3);
8022                     enc = xmlDetectCharEncoding(start, 4);
8023                     if (enc != XML_CHAR_ENCODING_NONE) {
8024                         xmlSwitchEncoding(ctxt, enc);
8025                     }
8026                 }
8027
8028                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8029                     (IS_BLANK_CH(NXT(5)))) {
8030                     xmlParseTextDecl(ctxt);
8031                 }
8032             }
8033         }
8034     }
8035     ctxt->hasPErefs = 1;
8036 }
8037
8038 /**
8039  * xmlLoadEntityContent:
8040  * @ctxt:  an XML parser context
8041  * @entity: an unloaded system entity
8042  *
8043  * Load the original content of the given system entity from the
8044  * ExternalID/SystemID given. This is to be used for Included in Literal
8045  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8046  *
8047  * Returns 0 in case of success and -1 in case of failure
8048  */
8049 static int
8050 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8051     xmlParserInputPtr input;
8052     xmlBufferPtr buf;
8053     int l, c;
8054     int count = 0;
8055
8056     if ((ctxt == NULL) || (entity == NULL) ||
8057         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8058          (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8059         (entity->content != NULL)) {
8060         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8061                     "xmlLoadEntityContent parameter error");
8062         return(-1);
8063     }
8064
8065     if (xmlParserDebugEntities)
8066         xmlGenericError(xmlGenericErrorContext,
8067                 "Reading %s entity content input\n", entity->name);
8068
8069     buf = xmlBufferCreate();
8070     if (buf == NULL) {
8071         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8072                     "xmlLoadEntityContent parameter error");
8073         return(-1);
8074     }
8075     xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8076
8077     input = xmlNewEntityInputStream(ctxt, entity);
8078     if (input == NULL) {
8079         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8080                     "xmlLoadEntityContent input error");
8081         xmlBufferFree(buf);
8082         return(-1);
8083     }
8084
8085     /*
8086      * Push the entity as the current input, read char by char
8087      * saving to the buffer until the end of the entity or an error
8088      */
8089     if (xmlPushInput(ctxt, input) < 0) {
8090         xmlBufferFree(buf);
8091         xmlFreeInputStream(input);
8092         return(-1);
8093     }
8094
8095     GROW;
8096     c = CUR_CHAR(l);
8097     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8098            (IS_CHAR(c))) {
8099         xmlBufferAdd(buf, ctxt->input->cur, l);
8100         if (count++ > XML_PARSER_CHUNK_SIZE) {
8101             count = 0;
8102             GROW;
8103             if (ctxt->instate == XML_PARSER_EOF) {
8104                 xmlBufferFree(buf);
8105                 return(-1);
8106             }
8107         }
8108         NEXTL(l);
8109         c = CUR_CHAR(l);
8110         if (c == 0) {
8111             count = 0;
8112             GROW;
8113             if (ctxt->instate == XML_PARSER_EOF) {
8114                 xmlBufferFree(buf);
8115                 return(-1);
8116             }
8117             c = CUR_CHAR(l);
8118         }
8119     }
8120
8121     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8122         xmlPopInput(ctxt);
8123     } else if (!IS_CHAR(c)) {
8124         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8125                           "xmlLoadEntityContent: invalid char value %d\n",
8126                           c);
8127         xmlBufferFree(buf);
8128         return(-1);
8129     }
8130     entity->content = buf->content;
8131     buf->content = NULL;
8132     xmlBufferFree(buf);
8133
8134     return(0);
8135 }
8136
8137 /**
8138  * xmlParseStringPEReference:
8139  * @ctxt:  an XML parser context
8140  * @str:  a pointer to an index in the string
8141  *
8142  * parse PEReference declarations
8143  *
8144  * [69] PEReference ::= '%' Name ';'
8145  *
8146  * [ WFC: No Recursion ]
8147  * A parsed entity must not contain a recursive
8148  * reference to itself, either directly or indirectly.
8149  *
8150  * [ WFC: Entity Declared ]
8151  * In a document without any DTD, a document with only an internal DTD
8152  * subset which contains no parameter entity references, or a document
8153  * with "standalone='yes'", ...  ... The declaration of a parameter
8154  * entity must precede any reference to it...
8155  *
8156  * [ VC: Entity Declared ]
8157  * In a document with an external subset or external parameter entities
8158  * with "standalone='no'", ...  ... The declaration of a parameter entity
8159  * must precede any reference to it...
8160  *
8161  * [ WFC: In DTD ]
8162  * Parameter-entity references may only appear in the DTD.
8163  * NOTE: misleading but this is handled.
8164  *
8165  * Returns the string of the entity content.
8166  *         str is updated to the current value of the index
8167  */
8168 static xmlEntityPtr
8169 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8170     const xmlChar *ptr;
8171     xmlChar cur;
8172     xmlChar *name;
8173     xmlEntityPtr entity = NULL;
8174
8175     if ((str == NULL) || (*str == NULL)) return(NULL);
8176     ptr = *str;
8177     cur = *ptr;
8178     if (cur != '%')
8179         return(NULL);
8180     ptr++;
8181     name = xmlParseStringName(ctxt, &ptr);
8182     if (name == NULL) {
8183         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8184                        "xmlParseStringPEReference: no name\n");
8185         *str = ptr;
8186         return(NULL);
8187     }
8188     cur = *ptr;
8189     if (cur != ';') {
8190         xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8191         xmlFree(name);
8192         *str = ptr;
8193         return(NULL);
8194     }
8195     ptr++;
8196
8197     /*
8198      * Increase the number of entity references parsed
8199      */
8200     ctxt->nbentities++;
8201
8202     /*
8203      * Request the entity from SAX
8204      */
8205     if ((ctxt->sax != NULL) &&
8206         (ctxt->sax->getParameterEntity != NULL))
8207         entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8208     if (ctxt->instate == XML_PARSER_EOF) {
8209         xmlFree(name);
8210         *str = ptr;
8211         return(NULL);
8212     }
8213     if (entity == NULL) {
8214         /*
8215          * [ WFC: Entity Declared ]
8216          * In a document without any DTD, a document with only an
8217          * internal DTD subset which contains no parameter entity
8218          * references, or a document with "standalone='yes'", ...
8219          * ... The declaration of a parameter entity must precede
8220          * any reference to it...
8221          */
8222         if ((ctxt->standalone == 1) ||
8223             ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8224             xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8225                  "PEReference: %%%s; not found\n", name);
8226         } else {
8227             /*
8228              * [ VC: Entity Declared ]
8229              * In a document with an external subset or external
8230              * parameter entities with "standalone='no'", ...
8231              * ... The declaration of a parameter entity must
8232              * precede any reference to it...
8233              */
8234             xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8235                           "PEReference: %%%s; not found\n",
8236                           name, NULL);
8237             ctxt->valid = 0;
8238         }
8239         xmlParserEntityCheck(ctxt, 0, NULL, 0);
8240     } else {
8241         /*
8242          * Internal checking in case the entity quest barfed
8243          */
8244         if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8245             (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8246             xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8247                           "%%%s; is not a parameter entity\n",
8248                           name, NULL);
8249         }
8250     }
8251     ctxt->hasPErefs = 1;
8252     xmlFree(name);
8253     *str = ptr;
8254     return(entity);
8255 }
8256
8257 /**
8258  * xmlParseDocTypeDecl:
8259  * @ctxt:  an XML parser context
8260  *
8261  * parse a DOCTYPE declaration
8262  *
8263  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8264  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8265  *
8266  * [ VC: Root Element Type ]
8267  * The Name in the document type declaration must match the element
8268  * type of the root element.
8269  */
8270
8271 void
8272 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8273     const xmlChar *name = NULL;
8274     xmlChar *ExternalID = NULL;
8275     xmlChar *URI = NULL;
8276
8277     /*
8278      * We know that '<!DOCTYPE' has been detected.
8279      */
8280     SKIP(9);
8281
8282     SKIP_BLANKS;
8283
8284     /*
8285      * Parse the DOCTYPE name.
8286      */
8287     name = xmlParseName(ctxt);
8288     if (name == NULL) {
8289         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8290                        "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8291     }
8292     ctxt->intSubName = name;
8293
8294     SKIP_BLANKS;
8295
8296     /*
8297      * Check for SystemID and ExternalID
8298      */
8299     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8300
8301     if ((URI != NULL) || (ExternalID != NULL)) {
8302         ctxt->hasExternalSubset = 1;
8303     }
8304     ctxt->extSubURI = URI;
8305     ctxt->extSubSystem = ExternalID;
8306
8307     SKIP_BLANKS;
8308
8309     /*
8310      * Create and update the internal subset.
8311      */
8312     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8313         (!ctxt->disableSAX))
8314         ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8315     if (ctxt->instate == XML_PARSER_EOF)
8316         return;
8317
8318     /*
8319      * Is there any internal subset declarations ?
8320      * they are handled separately in xmlParseInternalSubset()
8321      */
8322     if (RAW == '[')
8323         return;
8324
8325     /*
8326      * We should be at the end of the DOCTYPE declaration.
8327      */
8328     if (RAW != '>') {
8329         xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8330     }
8331     NEXT;
8332 }
8333
8334 /**
8335  * xmlParseInternalSubset:
8336  * @ctxt:  an XML parser context
8337  *
8338  * parse the internal subset declaration
8339  *
8340  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8341  */
8342
8343 static void
8344 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8345     /*
8346      * Is there any DTD definition ?
8347      */
8348     if (RAW == '[') {
8349         int baseInputNr = ctxt->inputNr;
8350         ctxt->instate = XML_PARSER_DTD;
8351         NEXT;
8352         /*
8353          * Parse the succession of Markup declarations and
8354          * PEReferences.
8355          * Subsequence (markupdecl | PEReference | S)*
8356          */
8357         while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8358                (ctxt->instate != XML_PARSER_EOF)) {
8359             int id = ctxt->input->id;
8360             unsigned long cons = CUR_CONSUMED;
8361
8362             SKIP_BLANKS;
8363             xmlParseMarkupDecl(ctxt);
8364             xmlParsePEReference(ctxt);
8365
8366             /*
8367              * Conditional sections are allowed from external entities included
8368              * by PE References in the internal subset.
8369              */
8370             if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8371                 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8372                 xmlParseConditionalSections(ctxt);
8373             }
8374
8375             if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8376                 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8377              "xmlParseInternalSubset: error detected in Markup declaration\n");
8378                 if (ctxt->inputNr > baseInputNr)
8379                     xmlPopInput(ctxt);
8380                 else
8381                     break;
8382             }
8383         }
8384         if (RAW == ']') {
8385             NEXT;
8386             SKIP_BLANKS;
8387         }
8388     }
8389
8390     /*
8391      * We should be at the end of the DOCTYPE declaration.
8392      */
8393     if (RAW != '>') {
8394         xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8395         return;
8396     }
8397     NEXT;
8398 }
8399
8400 #ifdef LIBXML_SAX1_ENABLED
8401 /**
8402  * xmlParseAttribute:
8403  * @ctxt:  an XML parser context
8404  * @value:  a xmlChar ** used to store the value of the attribute
8405  *
8406  * parse an attribute
8407  *
8408  * [41] Attribute ::= Name Eq AttValue
8409  *
8410  * [ WFC: No External Entity References ]
8411  * Attribute values cannot contain direct or indirect entity references
8412  * to external entities.
8413  *
8414  * [ WFC: No < in Attribute Values ]
8415  * The replacement text of any entity referred to directly or indirectly in
8416  * an attribute value (other than "&lt;") must not contain a <.
8417  *
8418  * [ VC: Attribute Value Type ]
8419  * The attribute must have been declared; the value must be of the type
8420  * declared for it.
8421  *
8422  * [25] Eq ::= S? '=' S?
8423  *
8424  * With namespace:
8425  *
8426  * [NS 11] Attribute ::= QName Eq AttValue
8427  *
8428  * Also the case QName == xmlns:??? is handled independently as a namespace
8429  * definition.
8430  *
8431  * Returns the attribute name, and the value in *value.
8432  */
8433
8434 const xmlChar *
8435 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8436     const xmlChar *name;
8437     xmlChar *val;
8438
8439     *value = NULL;
8440     GROW;
8441     name = xmlParseName(ctxt);
8442     if (name == NULL) {
8443         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8444                        "error parsing attribute name\n");
8445         return(NULL);
8446     }
8447
8448     /*
8449      * read the value
8450      */
8451     SKIP_BLANKS;
8452     if (RAW == '=') {
8453         NEXT;
8454         SKIP_BLANKS;
8455         val = xmlParseAttValue(ctxt);
8456         ctxt->instate = XML_PARSER_CONTENT;
8457     } else {
8458         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8459                "Specification mandates value for attribute %s\n", name);
8460         return(NULL);
8461     }
8462
8463     /*
8464      * Check that xml:lang conforms to the specification
8465      * No more registered as an error, just generate a warning now
8466      * since this was deprecated in XML second edition
8467      */
8468     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8469         if (!xmlCheckLanguageID(val)) {
8470             xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8471                           "Malformed value for xml:lang : %s\n",
8472                           val, NULL);
8473         }
8474     }
8475
8476     /*
8477      * Check that xml:space conforms to the specification
8478      */
8479     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8480         if (xmlStrEqual(val, BAD_CAST "default"))
8481             *(ctxt->space) = 0;
8482         else if (xmlStrEqual(val, BAD_CAST "preserve"))
8483             *(ctxt->space) = 1;
8484         else {
8485                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8486 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8487                                  val, NULL);
8488         }
8489     }
8490
8491     *value = val;
8492     return(name);
8493 }
8494
8495 /**
8496  * xmlParseStartTag:
8497  * @ctxt:  an XML parser context
8498  *
8499  * parse a start of tag either for rule element or
8500  * EmptyElement. In both case we don't parse the tag closing chars.
8501  *
8502  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8503  *
8504  * [ WFC: Unique Att Spec ]
8505  * No attribute name may appear more than once in the same start-tag or
8506  * empty-element tag.
8507  *
8508  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8509  *
8510  * [ WFC: Unique Att Spec ]
8511  * No attribute name may appear more than once in the same start-tag or
8512  * empty-element tag.
8513  *
8514  * With namespace:
8515  *
8516  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8517  *
8518  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8519  *
8520  * Returns the element name parsed
8521  */
8522
8523 const xmlChar *
8524 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8525     const xmlChar *name;
8526     const xmlChar *attname;
8527     xmlChar *attvalue;
8528     const xmlChar **atts = ctxt->atts;
8529     int nbatts = 0;
8530     int maxatts = ctxt->maxatts;
8531     int i;
8532
8533     if (RAW != '<') return(NULL);
8534     NEXT1;
8535
8536     name = xmlParseName(ctxt);
8537     if (name == NULL) {
8538         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8539              "xmlParseStartTag: invalid element name\n");
8540         return(NULL);
8541     }
8542
8543     /*
8544      * Now parse the attributes, it ends up with the ending
8545      *
8546      * (S Attribute)* S?
8547      */
8548     SKIP_BLANKS;
8549     GROW;
8550
8551     while (((RAW != '>') &&
8552            ((RAW != '/') || (NXT(1) != '>')) &&
8553            (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8554         int id = ctxt->input->id;
8555         unsigned long cons = CUR_CONSUMED;
8556
8557         attname = xmlParseAttribute(ctxt, &attvalue);
8558         if ((attname != NULL) && (attvalue != NULL)) {
8559             /*
8560              * [ WFC: Unique Att Spec ]
8561              * No attribute name may appear more than once in the same
8562              * start-tag or empty-element tag.
8563              */
8564             for (i = 0; i < nbatts;i += 2) {
8565                 if (xmlStrEqual(atts[i], attname)) {
8566                     xmlErrAttributeDup(ctxt, NULL, attname);
8567                     xmlFree(attvalue);
8568                     goto failed;
8569                 }
8570             }
8571             /*
8572              * Add the pair to atts
8573              */
8574             if (atts == NULL) {
8575                 maxatts = 22; /* allow for 10 attrs by default */
8576                 atts = (const xmlChar **)
8577                        xmlMalloc(maxatts * sizeof(xmlChar *));
8578                 if (atts == NULL) {
8579                     xmlErrMemory(ctxt, NULL);
8580                     if (attvalue != NULL)
8581                         xmlFree(attvalue);
8582                     goto failed;
8583                 }
8584                 ctxt->atts = atts;
8585                 ctxt->maxatts = maxatts;
8586             } else if (nbatts + 4 > maxatts) {
8587                 const xmlChar **n;
8588
8589                 maxatts *= 2;
8590                 n = (const xmlChar **) xmlRealloc((void *) atts,
8591                                              maxatts * sizeof(const xmlChar *));
8592                 if (n == NULL) {
8593                     xmlErrMemory(ctxt, NULL);
8594                     if (attvalue != NULL)
8595                         xmlFree(attvalue);
8596                     goto failed;
8597                 }
8598                 atts = n;
8599                 ctxt->atts = atts;
8600                 ctxt->maxatts = maxatts;
8601             }
8602             atts[nbatts++] = attname;
8603             atts[nbatts++] = attvalue;
8604             atts[nbatts] = NULL;
8605             atts[nbatts + 1] = NULL;
8606         } else {
8607             if (attvalue != NULL)
8608                 xmlFree(attvalue);
8609         }
8610
8611 failed:
8612
8613         GROW
8614         if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8615             break;
8616         if (SKIP_BLANKS == 0) {
8617             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8618                            "attributes construct error\n");
8619         }
8620         if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8621             (attname == NULL) && (attvalue == NULL)) {
8622             xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8623                            "xmlParseStartTag: problem parsing attributes\n");
8624             break;
8625         }
8626         SHRINK;
8627         GROW;
8628     }
8629
8630     /*
8631      * SAX: Start of Element !
8632      */
8633     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8634         (!ctxt->disableSAX)) {
8635         if (nbatts > 0)
8636             ctxt->sax->startElement(ctxt->userData, name, atts);
8637         else
8638             ctxt->sax->startElement(ctxt->userData, name, NULL);
8639     }
8640
8641     if (atts != NULL) {
8642         /* Free only the content strings */
8643         for (i = 1;i < nbatts;i+=2)
8644             if (atts[i] != NULL)
8645                xmlFree((xmlChar *) atts[i]);
8646     }
8647     return(name);
8648 }
8649
8650 /**
8651  * xmlParseEndTag1:
8652  * @ctxt:  an XML parser context
8653  * @line:  line of the start tag
8654  * @nsNr:  number of namespaces on the start tag
8655  *
8656  * parse an end of tag
8657  *
8658  * [42] ETag ::= '</' Name S? '>'
8659  *
8660  * With namespace
8661  *
8662  * [NS 9] ETag ::= '</' QName S? '>'
8663  */
8664
8665 static void
8666 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8667     const xmlChar *name;
8668
8669     GROW;
8670     if ((RAW != '<') || (NXT(1) != '/')) {
8671         xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8672                        "xmlParseEndTag: '</' not found\n");
8673         return;
8674     }
8675     SKIP(2);
8676
8677     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8678
8679     /*
8680      * We should definitely be at the ending "S? '>'" part
8681      */
8682     GROW;
8683     SKIP_BLANKS;
8684     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8685         xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8686     } else
8687         NEXT1;
8688
8689     /*
8690      * [ WFC: Element Type Match ]
8691      * The Name in an element's end-tag must match the element type in the
8692      * start-tag.
8693      *
8694      */
8695     if (name != (xmlChar*)1) {
8696         if (name == NULL) name = BAD_CAST "unparsable";
8697         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8698                      "Opening and ending tag mismatch: %s line %d and %s\n",
8699                                 ctxt->name, line, name);
8700     }
8701
8702     /*
8703      * SAX: End of Tag
8704      */
8705     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8706         (!ctxt->disableSAX))
8707         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8708
8709     namePop(ctxt);
8710     spacePop(ctxt);
8711     return;
8712 }
8713
8714 /**
8715  * xmlParseEndTag:
8716  * @ctxt:  an XML parser context
8717  *
8718  * parse an end of tag
8719  *
8720  * [42] ETag ::= '</' Name S? '>'
8721  *
8722  * With namespace
8723  *
8724  * [NS 9] ETag ::= '</' QName S? '>'
8725  */
8726
8727 void
8728 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8729     xmlParseEndTag1(ctxt, 0);
8730 }
8731 #endif /* LIBXML_SAX1_ENABLED */
8732
8733 /************************************************************************
8734  *                                                                      *
8735  *                    SAX 2 specific operations                         *
8736  *                                                                      *
8737  ************************************************************************/
8738
8739 /*
8740  * xmlGetNamespace:
8741  * @ctxt:  an XML parser context
8742  * @prefix:  the prefix to lookup
8743  *
8744  * Lookup the namespace name for the @prefix (which ca be NULL)
8745  * The prefix must come from the @ctxt->dict dictionary
8746  *
8747  * Returns the namespace name or NULL if not bound
8748  */
8749 static const xmlChar *
8750 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8751     int i;
8752
8753     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8754     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8755         if (ctxt->nsTab[i] == prefix) {
8756             if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8757                 return(NULL);
8758             return(ctxt->nsTab[i + 1]);
8759         }
8760     return(NULL);
8761 }
8762
8763 /**
8764  * xmlParseQName:
8765  * @ctxt:  an XML parser context
8766  * @prefix:  pointer to store the prefix part
8767  *
8768  * parse an XML Namespace QName
8769  *
8770  * [6]  QName  ::= (Prefix ':')? LocalPart
8771  * [7]  Prefix  ::= NCName
8772  * [8]  LocalPart  ::= NCName
8773  *
8774  * Returns the Name parsed or NULL
8775  */
8776
8777 static const xmlChar *
8778 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8779     const xmlChar *l, *p;
8780
8781     GROW;
8782
8783     l = xmlParseNCName(ctxt);
8784     if (l == NULL) {
8785         if (CUR == ':') {
8786             l = xmlParseName(ctxt);
8787             if (l != NULL) {
8788                 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8789                          "Failed to parse QName '%s'\n", l, NULL, NULL);
8790                 *prefix = NULL;
8791                 return(l);
8792             }
8793         }
8794         return(NULL);
8795     }
8796     if (CUR == ':') {
8797         NEXT;
8798         p = l;
8799         l = xmlParseNCName(ctxt);
8800         if (l == NULL) {
8801             xmlChar *tmp;
8802
8803             if (ctxt->instate == XML_PARSER_EOF)
8804                 return(NULL);
8805             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8806                      "Failed to parse QName '%s:'\n", p, NULL, NULL);
8807             l = xmlParseNmtoken(ctxt);
8808             if (l == NULL) {
8809                 if (ctxt->instate == XML_PARSER_EOF)
8810                     return(NULL);
8811                 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8812             } else {
8813                 tmp = xmlBuildQName(l, p, NULL, 0);
8814                 xmlFree((char *)l);
8815             }
8816             p = xmlDictLookup(ctxt->dict, tmp, -1);
8817             if (tmp != NULL) xmlFree(tmp);
8818             *prefix = NULL;
8819             return(p);
8820         }
8821         if (CUR == ':') {
8822             xmlChar *tmp;
8823
8824             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8825                      "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8826             NEXT;
8827             tmp = (xmlChar *) xmlParseName(ctxt);
8828             if (tmp != NULL) {
8829                 tmp = xmlBuildQName(tmp, l, NULL, 0);
8830                 l = xmlDictLookup(ctxt->dict, tmp, -1);
8831                 if (tmp != NULL) xmlFree(tmp);
8832                 *prefix = p;
8833                 return(l);
8834             }
8835             if (ctxt->instate == XML_PARSER_EOF)
8836                 return(NULL);
8837             tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8838             l = xmlDictLookup(ctxt->dict, tmp, -1);
8839             if (tmp != NULL) xmlFree(tmp);
8840             *prefix = p;
8841             return(l);
8842         }
8843         *prefix = p;
8844     } else
8845         *prefix = NULL;
8846     return(l);
8847 }
8848
8849 /**
8850  * xmlParseQNameAndCompare:
8851  * @ctxt:  an XML parser context
8852  * @name:  the localname
8853  * @prefix:  the prefix, if any.
8854  *
8855  * parse an XML name and compares for match
8856  * (specialized for endtag parsing)
8857  *
8858  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8859  * and the name for mismatch
8860  */
8861
8862 static const xmlChar *
8863 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8864                         xmlChar const *prefix) {
8865     const xmlChar *cmp;
8866     const xmlChar *in;
8867     const xmlChar *ret;
8868     const xmlChar *prefix2;
8869
8870     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8871
8872     GROW;
8873     in = ctxt->input->cur;
8874
8875     cmp = prefix;
8876     while (*in != 0 && *in == *cmp) {
8877         ++in;
8878         ++cmp;
8879     }
8880     if ((*cmp == 0) && (*in == ':')) {
8881         in++;
8882         cmp = name;
8883         while (*in != 0 && *in == *cmp) {
8884             ++in;
8885             ++cmp;
8886         }
8887         if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8888             /* success */
8889             ctxt->input->col += in - ctxt->input->cur;
8890             ctxt->input->cur = in;
8891             return((const xmlChar*) 1);
8892         }
8893     }
8894     /*
8895      * all strings coms from the dictionary, equality can be done directly
8896      */
8897     ret = xmlParseQName (ctxt, &prefix2);
8898     if ((ret == name) && (prefix == prefix2))
8899         return((const xmlChar*) 1);
8900     return ret;
8901 }
8902
8903 /**
8904  * xmlParseAttValueInternal:
8905  * @ctxt:  an XML parser context
8906  * @len:  attribute len result
8907  * @alloc:  whether the attribute was reallocated as a new string
8908  * @normalize:  if 1 then further non-CDATA normalization must be done
8909  *
8910  * parse a value for an attribute.
8911  * NOTE: if no normalization is needed, the routine will return pointers
8912  *       directly from the data buffer.
8913  *
8914  * 3.3.3 Attribute-Value Normalization:
8915  * Before the value of an attribute is passed to the application or
8916  * checked for validity, the XML processor must normalize it as follows:
8917  * - a character reference is processed by appending the referenced
8918  *   character to the attribute value
8919  * - an entity reference is processed by recursively processing the
8920  *   replacement text of the entity
8921  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8922  *   appending #x20 to the normalized value, except that only a single
8923  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8924  *   parsed entity or the literal entity value of an internal parsed entity
8925  * - other characters are processed by appending them to the normalized value
8926  * If the declared value is not CDATA, then the XML processor must further
8927  * process the normalized attribute value by discarding any leading and
8928  * trailing space (#x20) characters, and by replacing sequences of space
8929  * (#x20) characters by a single space (#x20) character.
8930  * All attributes for which no declaration has been read should be treated
8931  * by a non-validating parser as if declared CDATA.
8932  *
8933  * Returns the AttValue parsed or NULL. The value has to be freed by the
8934  *     caller if it was copied, this can be detected by val[*len] == 0.
8935  */
8936
8937 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8938     const xmlChar *oldbase = ctxt->input->base;\
8939     GROW;\
8940     if (ctxt->instate == XML_PARSER_EOF)\
8941         return(NULL);\
8942     if (oldbase != ctxt->input->base) {\
8943         ptrdiff_t delta = ctxt->input->base - oldbase;\
8944         start = start + delta;\
8945         in = in + delta;\
8946     }\
8947     end = ctxt->input->end;
8948
8949 static xmlChar *
8950 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8951                          int normalize)
8952 {
8953     xmlChar limit = 0;
8954     const xmlChar *in = NULL, *start, *end, *last;
8955     xmlChar *ret = NULL;
8956     int line, col;
8957
8958     GROW;
8959     in = (xmlChar *) CUR_PTR;
8960     line = ctxt->input->line;
8961     col = ctxt->input->col;
8962     if (*in != '"' && *in != '\'') {
8963         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8964         return (NULL);
8965     }
8966     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8967
8968     /*
8969      * try to handle in this routine the most common case where no
8970      * allocation of a new string is required and where content is
8971      * pure ASCII.
8972      */
8973     limit = *in++;
8974     col++;
8975     end = ctxt->input->end;
8976     start = in;
8977     if (in >= end) {
8978         GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8979     }
8980     if (normalize) {
8981         /*
8982          * Skip any leading spaces
8983          */
8984         while ((in < end) && (*in != limit) &&
8985                ((*in == 0x20) || (*in == 0x9) ||
8986                 (*in == 0xA) || (*in == 0xD))) {
8987             if (*in == 0xA) {
8988                 line++; col = 1;
8989             } else {
8990                 col++;
8991             }
8992             in++;
8993             start = in;
8994             if (in >= end) {
8995                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8996                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8997                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8998                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8999                                    "AttValue length too long\n");
9000                     return(NULL);
9001                 }
9002             }
9003         }
9004         while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9005                (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9006             col++;
9007             if ((*in++ == 0x20) && (*in == 0x20)) break;
9008             if (in >= end) {
9009                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9010                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9011                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9012                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9013                                    "AttValue length too long\n");
9014                     return(NULL);
9015                 }
9016             }
9017         }
9018         last = in;
9019         /*
9020          * skip the trailing blanks
9021          */
9022         while ((last[-1] == 0x20) && (last > start)) last--;
9023         while ((in < end) && (*in != limit) &&
9024                ((*in == 0x20) || (*in == 0x9) ||
9025                 (*in == 0xA) || (*in == 0xD))) {
9026             if (*in == 0xA) {
9027                 line++, col = 1;
9028             } else {
9029                 col++;
9030             }
9031             in++;
9032             if (in >= end) {
9033                 const xmlChar *oldbase = ctxt->input->base;
9034                 GROW;
9035                 if (ctxt->instate == XML_PARSER_EOF)
9036                     return(NULL);
9037                 if (oldbase != ctxt->input->base) {
9038                     ptrdiff_t delta = ctxt->input->base - oldbase;
9039                     start = start + delta;
9040                     in = in + delta;
9041                     last = last + delta;
9042                 }
9043                 end = ctxt->input->end;
9044                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9045                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9046                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9047                                    "AttValue length too long\n");
9048                     return(NULL);
9049                 }
9050             }
9051         }
9052         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9053             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9054             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9055                            "AttValue length too long\n");
9056             return(NULL);
9057         }
9058         if (*in != limit) goto need_complex;
9059     } else {
9060         while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9061                (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9062             in++;
9063             col++;
9064             if (in >= end) {
9065                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9066                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9067                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9068                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9069                                    "AttValue length too long\n");
9070                     return(NULL);
9071                 }
9072             }
9073         }
9074         last = in;
9075         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9076             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9077             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9078                            "AttValue length too long\n");
9079             return(NULL);
9080         }
9081         if (*in != limit) goto need_complex;
9082     }
9083     in++;
9084     col++;
9085     if (len != NULL) {
9086         *len = last - start;
9087         ret = (xmlChar *) start;
9088     } else {
9089         if (alloc) *alloc = 1;
9090         ret = xmlStrndup(start, last - start);
9091     }
9092     CUR_PTR = in;
9093     ctxt->input->line = line;
9094     ctxt->input->col = col;
9095     if (alloc) *alloc = 0;
9096     return ret;
9097 need_complex:
9098     if (alloc) *alloc = 1;
9099     return xmlParseAttValueComplex(ctxt, len, normalize);
9100 }
9101
9102 /**
9103  * xmlParseAttribute2:
9104  * @ctxt:  an XML parser context
9105  * @pref:  the element prefix
9106  * @elem:  the element name
9107  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9108  * @value:  a xmlChar ** used to store the value of the attribute
9109  * @len:  an int * to save the length of the attribute
9110  * @alloc:  an int * to indicate if the attribute was allocated
9111  *
9112  * parse an attribute in the new SAX2 framework.
9113  *
9114  * Returns the attribute name, and the value in *value, .
9115  */
9116
9117 static const xmlChar *
9118 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9119                    const xmlChar * pref, const xmlChar * elem,
9120                    const xmlChar ** prefix, xmlChar ** value,
9121                    int *len, int *alloc)
9122 {
9123     const xmlChar *name;
9124     xmlChar *val, *internal_val = NULL;
9125     int normalize = 0;
9126
9127     *value = NULL;
9128     GROW;
9129     name = xmlParseQName(ctxt, prefix);
9130     if (name == NULL) {
9131         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9132                        "error parsing attribute name\n");
9133         return (NULL);
9134     }
9135
9136     /*
9137      * get the type if needed
9138      */
9139     if (ctxt->attsSpecial != NULL) {
9140         int type;
9141
9142         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9143                                                  pref, elem, *prefix, name);
9144         if (type != 0)
9145             normalize = 1;
9146     }
9147
9148     /*
9149      * read the value
9150      */
9151     SKIP_BLANKS;
9152     if (RAW == '=') {
9153         NEXT;
9154         SKIP_BLANKS;
9155         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9156         if (normalize) {
9157             /*
9158              * Sometimes a second normalisation pass for spaces is needed
9159              * but that only happens if charrefs or entities references
9160              * have been used in the attribute value, i.e. the attribute
9161              * value have been extracted in an allocated string already.
9162              */
9163             if (*alloc) {
9164                 const xmlChar *val2;
9165
9166                 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9167                 if ((val2 != NULL) && (val2 != val)) {
9168                     xmlFree(val);
9169                     val = (xmlChar *) val2;
9170                 }
9171             }
9172         }
9173         ctxt->instate = XML_PARSER_CONTENT;
9174     } else {
9175         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9176                           "Specification mandates value for attribute %s\n",
9177                           name);
9178         return (NULL);
9179     }
9180
9181     if (*prefix == ctxt->str_xml) {
9182         /*
9183          * Check that xml:lang conforms to the specification
9184          * No more registered as an error, just generate a warning now
9185          * since this was deprecated in XML second edition
9186          */
9187         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9188             internal_val = xmlStrndup(val, *len);
9189             if (!xmlCheckLanguageID(internal_val)) {
9190                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9191                               "Malformed value for xml:lang : %s\n",
9192                               internal_val, NULL);
9193             }
9194         }
9195
9196         /*
9197          * Check that xml:space conforms to the specification
9198          */
9199         if (xmlStrEqual(name, BAD_CAST "space")) {
9200             internal_val = xmlStrndup(val, *len);
9201             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9202                 *(ctxt->space) = 0;
9203             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9204                 *(ctxt->space) = 1;
9205             else {
9206                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9207                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9208                               internal_val, NULL);
9209             }
9210         }
9211         if (internal_val) {
9212             xmlFree(internal_val);
9213         }
9214     }
9215
9216     *value = val;
9217     return (name);
9218 }
9219 /**
9220  * xmlParseStartTag2:
9221  * @ctxt:  an XML parser context
9222  *
9223  * parse a start of tag either for rule element or
9224  * EmptyElement. In both case we don't parse the tag closing chars.
9225  * This routine is called when running SAX2 parsing
9226  *
9227  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9228  *
9229  * [ WFC: Unique Att Spec ]
9230  * No attribute name may appear more than once in the same start-tag or
9231  * empty-element tag.
9232  *
9233  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9234  *
9235  * [ WFC: Unique Att Spec ]
9236  * No attribute name may appear more than once in the same start-tag or
9237  * empty-element tag.
9238  *
9239  * With namespace:
9240  *
9241  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9242  *
9243  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9244  *
9245  * Returns the element name parsed
9246  */
9247
9248 static const xmlChar *
9249 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9250                   const xmlChar **URI, int *tlen) {
9251     const xmlChar *localname;
9252     const xmlChar *prefix;
9253     const xmlChar *attname;
9254     const xmlChar *aprefix;
9255     const xmlChar *nsname;
9256     xmlChar *attvalue;
9257     const xmlChar **atts = ctxt->atts;
9258     int maxatts = ctxt->maxatts;
9259     int nratts, nbatts, nbdef, inputid;
9260     int i, j, nbNs, attval;
9261     unsigned long cur;
9262     int nsNr = ctxt->nsNr;
9263
9264     if (RAW != '<') return(NULL);
9265     NEXT1;
9266
9267     /*
9268      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9269      *       point since the attribute values may be stored as pointers to
9270      *       the buffer and calling SHRINK would destroy them !
9271      *       The Shrinking is only possible once the full set of attribute
9272      *       callbacks have been done.
9273      */
9274     SHRINK;
9275     cur = ctxt->input->cur - ctxt->input->base;
9276     inputid = ctxt->input->id;
9277     nbatts = 0;
9278     nratts = 0;
9279     nbdef = 0;
9280     nbNs = 0;
9281     attval = 0;
9282     /* Forget any namespaces added during an earlier parse of this element. */
9283     ctxt->nsNr = nsNr;
9284
9285     localname = xmlParseQName(ctxt, &prefix);
9286     if (localname == NULL) {
9287         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9288                        "StartTag: invalid element name\n");
9289         return(NULL);
9290     }
9291     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9292
9293     /*
9294      * Now parse the attributes, it ends up with the ending
9295      *
9296      * (S Attribute)* S?
9297      */
9298     SKIP_BLANKS;
9299     GROW;
9300
9301     while (((RAW != '>') &&
9302            ((RAW != '/') || (NXT(1) != '>')) &&
9303            (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9304         int id = ctxt->input->id;
9305         unsigned long cons = CUR_CONSUMED;
9306         int len = -1, alloc = 0;
9307
9308         attname = xmlParseAttribute2(ctxt, prefix, localname,
9309                                      &aprefix, &attvalue, &len, &alloc);
9310         if ((attname == NULL) || (attvalue == NULL))
9311             goto next_attr;
9312         if (len < 0) len = xmlStrlen(attvalue);
9313
9314         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9315             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9316             xmlURIPtr uri;
9317
9318             if (URL == NULL) {
9319                 xmlErrMemory(ctxt, "dictionary allocation failure");
9320                 if ((attvalue != NULL) && (alloc != 0))
9321                     xmlFree(attvalue);
9322                 localname = NULL;
9323                 goto done;
9324             }
9325             if (*URL != 0) {
9326                 uri = xmlParseURI((const char *) URL);
9327                 if (uri == NULL) {
9328                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9329                              "xmlns: '%s' is not a valid URI\n",
9330                                        URL, NULL, NULL);
9331                 } else {
9332                     if (uri->scheme == NULL) {
9333                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9334                                   "xmlns: URI %s is not absolute\n",
9335                                   URL, NULL, NULL);
9336                     }
9337                     xmlFreeURI(uri);
9338                 }
9339                 if (URL == ctxt->str_xml_ns) {
9340                     if (attname != ctxt->str_xml) {
9341                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9342                      "xml namespace URI cannot be the default namespace\n",
9343                                  NULL, NULL, NULL);
9344                     }
9345                     goto next_attr;
9346                 }
9347                 if ((len == 29) &&
9348                     (xmlStrEqual(URL,
9349                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9350                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9351                          "reuse of the xmlns namespace name is forbidden\n",
9352                              NULL, NULL, NULL);
9353                     goto next_attr;
9354                 }
9355             }
9356             /*
9357              * check that it's not a defined namespace
9358              */
9359             for (j = 1;j <= nbNs;j++)
9360                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9361                     break;
9362             if (j <= nbNs)
9363                 xmlErrAttributeDup(ctxt, NULL, attname);
9364             else
9365                 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9366
9367         } else if (aprefix == ctxt->str_xmlns) {
9368             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9369             xmlURIPtr uri;
9370
9371             if (attname == ctxt->str_xml) {
9372                 if (URL != ctxt->str_xml_ns) {
9373                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9374                              "xml namespace prefix mapped to wrong URI\n",
9375                              NULL, NULL, NULL);
9376                 }
9377                 /*
9378                  * Do not keep a namespace definition node
9379                  */
9380                 goto next_attr;
9381             }
9382             if (URL == ctxt->str_xml_ns) {
9383                 if (attname != ctxt->str_xml) {
9384                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9385                              "xml namespace URI mapped to wrong prefix\n",
9386                              NULL, NULL, NULL);
9387                 }
9388                 goto next_attr;
9389             }
9390             if (attname == ctxt->str_xmlns) {
9391                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9392                          "redefinition of the xmlns prefix is forbidden\n",
9393                          NULL, NULL, NULL);
9394                 goto next_attr;
9395             }
9396             if ((len == 29) &&
9397                 (xmlStrEqual(URL,
9398                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9399                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9400                          "reuse of the xmlns namespace name is forbidden\n",
9401                          NULL, NULL, NULL);
9402                 goto next_attr;
9403             }
9404             if ((URL == NULL) || (URL[0] == 0)) {
9405                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9406                          "xmlns:%s: Empty XML namespace is not allowed\n",
9407                               attname, NULL, NULL);
9408                 goto next_attr;
9409             } else {
9410                 uri = xmlParseURI((const char *) URL);
9411                 if (uri == NULL) {
9412                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9413                          "xmlns:%s: '%s' is not a valid URI\n",
9414                                        attname, URL, NULL);
9415                 } else {
9416                     if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9417                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9418                                   "xmlns:%s: URI %s is not absolute\n",
9419                                   attname, URL, NULL);
9420                     }
9421                     xmlFreeURI(uri);
9422                 }
9423             }
9424
9425             /*
9426              * check that it's not a defined namespace
9427              */
9428             for (j = 1;j <= nbNs;j++)
9429                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9430                     break;
9431             if (j <= nbNs)
9432                 xmlErrAttributeDup(ctxt, aprefix, attname);
9433             else
9434                 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9435
9436         } else {
9437             /*
9438              * Add the pair to atts
9439              */
9440             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9441                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9442                     goto next_attr;
9443                 }
9444                 maxatts = ctxt->maxatts;
9445                 atts = ctxt->atts;
9446             }
9447             ctxt->attallocs[nratts++] = alloc;
9448             atts[nbatts++] = attname;
9449             atts[nbatts++] = aprefix;
9450             /*
9451              * The namespace URI field is used temporarily to point at the
9452              * base of the current input buffer for non-alloced attributes.
9453              * When the input buffer is reallocated, all the pointers become
9454              * invalid, but they can be reconstructed later.
9455              */
9456             if (alloc)
9457                 atts[nbatts++] = NULL;
9458             else
9459                 atts[nbatts++] = ctxt->input->base;
9460             atts[nbatts++] = attvalue;
9461             attvalue += len;
9462             atts[nbatts++] = attvalue;
9463             /*
9464              * tag if some deallocation is needed
9465              */
9466             if (alloc != 0) attval = 1;
9467             attvalue = NULL; /* moved into atts */
9468         }
9469
9470 next_attr:
9471         if ((attvalue != NULL) && (alloc != 0)) {
9472             xmlFree(attvalue);
9473             attvalue = NULL;
9474         }
9475
9476         GROW
9477         if (ctxt->instate == XML_PARSER_EOF)
9478             break;
9479         if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9480             break;
9481         if (SKIP_BLANKS == 0) {
9482             xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9483                            "attributes construct error\n");
9484             break;
9485         }
9486         if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9487             (attname == NULL) && (attvalue == NULL)) {
9488             xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9489                  "xmlParseStartTag: problem parsing attributes\n");
9490             break;
9491         }
9492         GROW;
9493     }
9494
9495     if (ctxt->input->id != inputid) {
9496         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9497                     "Unexpected change of input\n");
9498         localname = NULL;
9499         goto done;
9500     }
9501
9502     /* Reconstruct attribute value pointers. */
9503     for (i = 0, j = 0; j < nratts; i += 5, j++) {
9504         if (atts[i+2] != NULL) {
9505             /*
9506              * Arithmetic on dangling pointers is technically undefined
9507              * behavior, but well...
9508              */
9509             ptrdiff_t offset = ctxt->input->base - atts[i+2];
9510             atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9511             atts[i+3] += offset;  /* value */
9512             atts[i+4] += offset;  /* valuend */
9513         }
9514     }
9515
9516     /*
9517      * The attributes defaulting
9518      */
9519     if (ctxt->attsDefault != NULL) {
9520         xmlDefAttrsPtr defaults;
9521
9522         defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9523         if (defaults != NULL) {
9524             for (i = 0;i < defaults->nbAttrs;i++) {
9525                 attname = defaults->values[5 * i];
9526                 aprefix = defaults->values[5 * i + 1];
9527
9528                 /*
9529                  * special work for namespaces defaulted defs
9530                  */
9531                 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9532                     /*
9533                      * check that it's not a defined namespace
9534                      */
9535                     for (j = 1;j <= nbNs;j++)
9536                         if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9537                             break;
9538                     if (j <= nbNs) continue;
9539
9540                     nsname = xmlGetNamespace(ctxt, NULL);
9541                     if (nsname != defaults->values[5 * i + 2]) {
9542                         if (nsPush(ctxt, NULL,
9543                                    defaults->values[5 * i + 2]) > 0)
9544                             nbNs++;
9545                     }
9546                 } else if (aprefix == ctxt->str_xmlns) {
9547                     /*
9548                      * check that it's not a defined namespace
9549                      */
9550                     for (j = 1;j <= nbNs;j++)
9551                         if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9552                             break;
9553                     if (j <= nbNs) continue;
9554
9555                     nsname = xmlGetNamespace(ctxt, attname);
9556                     if (nsname != defaults->values[2]) {
9557                         if (nsPush(ctxt, attname,
9558                                    defaults->values[5 * i + 2]) > 0)
9559                             nbNs++;
9560                     }
9561                 } else {
9562                     /*
9563                      * check that it's not a defined attribute
9564                      */
9565                     for (j = 0;j < nbatts;j+=5) {
9566                         if ((attname == atts[j]) && (aprefix == atts[j+1]))
9567                             break;
9568                     }
9569                     if (j < nbatts) continue;
9570
9571                     if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9572                         if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9573                             localname = NULL;
9574                             goto done;
9575                         }
9576                         maxatts = ctxt->maxatts;
9577                         atts = ctxt->atts;
9578                     }
9579                     atts[nbatts++] = attname;
9580                     atts[nbatts++] = aprefix;
9581                     if (aprefix == NULL)
9582                         atts[nbatts++] = NULL;
9583                     else
9584                         atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9585                     atts[nbatts++] = defaults->values[5 * i + 2];
9586                     atts[nbatts++] = defaults->values[5 * i + 3];
9587                     if ((ctxt->standalone == 1) &&
9588                         (defaults->values[5 * i + 4] != NULL)) {
9589                         xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9590           "standalone: attribute %s on %s defaulted from external subset\n",
9591                                          attname, localname);
9592                     }
9593                     nbdef++;
9594                 }
9595             }
9596         }
9597     }
9598
9599     /*
9600      * The attributes checkings
9601      */
9602     for (i = 0; i < nbatts;i += 5) {
9603         /*
9604         * The default namespace does not apply to attribute names.
9605         */
9606         if (atts[i + 1] != NULL) {
9607             nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9608             if (nsname == NULL) {
9609                 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9610                     "Namespace prefix %s for %s on %s is not defined\n",
9611                     atts[i + 1], atts[i], localname);
9612             }
9613             atts[i + 2] = nsname;
9614         } else
9615             nsname = NULL;
9616         /*
9617          * [ WFC: Unique Att Spec ]
9618          * No attribute name may appear more than once in the same
9619          * start-tag or empty-element tag.
9620          * As extended by the Namespace in XML REC.
9621          */
9622         for (j = 0; j < i;j += 5) {
9623             if (atts[i] == atts[j]) {
9624                 if (atts[i+1] == atts[j+1]) {
9625                     xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9626                     break;
9627                 }
9628                 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9629                     xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9630                              "Namespaced Attribute %s in '%s' redefined\n",
9631                              atts[i], nsname, NULL);
9632                     break;
9633                 }
9634             }
9635         }
9636     }
9637
9638     nsname = xmlGetNamespace(ctxt, prefix);
9639     if ((prefix != NULL) && (nsname == NULL)) {
9640         xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9641                  "Namespace prefix %s on %s is not defined\n",
9642                  prefix, localname, NULL);
9643     }
9644     *pref = prefix;
9645     *URI = nsname;
9646
9647     /*
9648      * SAX: Start of Element !
9649      */
9650     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9651         (!ctxt->disableSAX)) {
9652         if (nbNs > 0)
9653             ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9654                           nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9655                           nbatts / 5, nbdef, atts);
9656         else
9657             ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9658                           nsname, 0, NULL, nbatts / 5, nbdef, atts);
9659     }
9660
9661 done:
9662     /*
9663      * Free up attribute allocated strings if needed
9664      */
9665     if (attval != 0) {
9666         for (i = 3,j = 0; j < nratts;i += 5,j++)
9667             if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9668                 xmlFree((xmlChar *) atts[i]);
9669     }
9670
9671     return(localname);
9672 }
9673
9674 /**
9675  * xmlParseEndTag2:
9676  * @ctxt:  an XML parser context
9677  * @line:  line of the start tag
9678  * @nsNr:  number of namespaces on the start tag
9679  *
9680  * parse an end of tag
9681  *
9682  * [42] ETag ::= '</' Name S? '>'
9683  *
9684  * With namespace
9685  *
9686  * [NS 9] ETag ::= '</' QName S? '>'
9687  */
9688
9689 static void
9690 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9691     const xmlChar *name;
9692
9693     GROW;
9694     if ((RAW != '<') || (NXT(1) != '/')) {
9695         xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9696         return;
9697     }
9698     SKIP(2);
9699
9700     if (tag->prefix == NULL)
9701         name = xmlParseNameAndCompare(ctxt, ctxt->name);
9702     else
9703         name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9704
9705     /*
9706      * We should definitely be at the ending "S? '>'" part
9707      */
9708     GROW;
9709     if (ctxt->instate == XML_PARSER_EOF)
9710         return;
9711     SKIP_BLANKS;
9712     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9713         xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9714     } else
9715         NEXT1;
9716
9717     /*
9718      * [ WFC: Element Type Match ]
9719      * The Name in an element's end-tag must match the element type in the
9720      * start-tag.
9721      *
9722      */
9723     if (name != (xmlChar*)1) {
9724         if (name == NULL) name = BAD_CAST "unparsable";
9725         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9726                      "Opening and ending tag mismatch: %s line %d and %s\n",
9727                                 ctxt->name, tag->line, name);
9728     }
9729
9730     /*
9731      * SAX: End of Tag
9732      */
9733     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9734         (!ctxt->disableSAX))
9735         ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9736                                 tag->URI);
9737
9738     spacePop(ctxt);
9739     if (tag->nsNr != 0)
9740         nsPop(ctxt, tag->nsNr);
9741 }
9742
9743 /**
9744  * xmlParseCDSect:
9745  * @ctxt:  an XML parser context
9746  *
9747  * Parse escaped pure raw content.
9748  *
9749  * [18] CDSect ::= CDStart CData CDEnd
9750  *
9751  * [19] CDStart ::= '<![CDATA['
9752  *
9753  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9754  *
9755  * [21] CDEnd ::= ']]>'
9756  */
9757 void
9758 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9759     xmlChar *buf = NULL;
9760     int len = 0;
9761     int size = XML_PARSER_BUFFER_SIZE;
9762     int r, rl;
9763     int s, sl;
9764     int cur, l;
9765     int count = 0;
9766
9767     /* Check 2.6.0 was NXT(0) not RAW */
9768     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9769         SKIP(9);
9770     } else
9771         return;
9772
9773     ctxt->instate = XML_PARSER_CDATA_SECTION;
9774     r = CUR_CHAR(rl);
9775     if (!IS_CHAR(r)) {
9776         xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9777         ctxt->instate = XML_PARSER_CONTENT;
9778         return;
9779     }
9780     NEXTL(rl);
9781     s = CUR_CHAR(sl);
9782     if (!IS_CHAR(s)) {
9783         xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9784         ctxt->instate = XML_PARSER_CONTENT;
9785         return;
9786     }
9787     NEXTL(sl);
9788     cur = CUR_CHAR(l);
9789     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9790     if (buf == NULL) {
9791         xmlErrMemory(ctxt, NULL);
9792         return;
9793     }
9794     while (IS_CHAR(cur) &&
9795            ((r != ']') || (s != ']') || (cur != '>'))) {
9796         if (len + 5 >= size) {
9797             xmlChar *tmp;
9798
9799             if ((size > XML_MAX_TEXT_LENGTH) &&
9800                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9801                 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9802                              "CData section too big found", NULL);
9803                 xmlFree (buf);
9804                 return;
9805             }
9806             tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9807             if (tmp == NULL) {
9808                 xmlFree(buf);
9809                 xmlErrMemory(ctxt, NULL);
9810                 return;
9811             }
9812             buf = tmp;
9813             size *= 2;
9814         }
9815         COPY_BUF(rl,buf,len,r);
9816         r = s;
9817         rl = sl;
9818         s = cur;
9819         sl = l;
9820         count++;
9821         if (count > 50) {
9822             SHRINK;
9823             GROW;
9824             if (ctxt->instate == XML_PARSER_EOF) {
9825                 xmlFree(buf);
9826                 return;
9827             }
9828             count = 0;
9829         }
9830         NEXTL(l);
9831         cur = CUR_CHAR(l);
9832     }
9833     buf[len] = 0;
9834     ctxt->instate = XML_PARSER_CONTENT;
9835     if (cur != '>') {
9836         xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9837                              "CData section not finished\n%.50s\n", buf);
9838         xmlFree(buf);
9839         return;
9840     }
9841     NEXTL(l);
9842
9843     /*
9844      * OK the buffer is to be consumed as cdata.
9845      */
9846     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9847         if (ctxt->sax->cdataBlock != NULL)
9848             ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9849         else if (ctxt->sax->characters != NULL)
9850             ctxt->sax->characters(ctxt->userData, buf, len);
9851     }
9852     xmlFree(buf);
9853 }
9854
9855 /**
9856  * xmlParseContentInternal:
9857  * @ctxt:  an XML parser context
9858  *
9859  * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9860  * unexpected EOF to the caller.
9861  */
9862
9863 static void
9864 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9865     int nameNr = ctxt->nameNr;
9866
9867     GROW;
9868     while ((RAW != 0) &&
9869            (ctxt->instate != XML_PARSER_EOF)) {
9870         int id = ctxt->input->id;
9871         unsigned long cons = CUR_CONSUMED;
9872         const xmlChar *cur = ctxt->input->cur;
9873
9874         /*
9875          * First case : a Processing Instruction.
9876          */
9877         if ((*cur == '<') && (cur[1] == '?')) {
9878             xmlParsePI(ctxt);
9879         }
9880
9881         /*
9882          * Second case : a CDSection
9883          */
9884         /* 2.6.0 test was *cur not RAW */
9885         else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9886             xmlParseCDSect(ctxt);
9887         }
9888
9889         /*
9890          * Third case :  a comment
9891          */
9892         else if ((*cur == '<') && (NXT(1) == '!') &&
9893                  (NXT(2) == '-') && (NXT(3) == '-')) {
9894             xmlParseComment(ctxt);
9895             ctxt->instate = XML_PARSER_CONTENT;
9896         }
9897
9898         /*
9899          * Fourth case :  a sub-element.
9900          */
9901         else if (*cur == '<') {
9902             if (NXT(1) == '/') {
9903                 if (ctxt->nameNr <= nameNr)
9904                     break;
9905                 xmlParseElementEnd(ctxt);
9906             } else {
9907                 xmlParseElementStart(ctxt);
9908             }
9909         }
9910
9911         /*
9912          * Fifth case : a reference. If if has not been resolved,
9913          *    parsing returns it's Name, create the node
9914          */
9915
9916         else if (*cur == '&') {
9917             xmlParseReference(ctxt);
9918         }
9919
9920         /*
9921          * Last case, text. Note that References are handled directly.
9922          */
9923         else {
9924             xmlParseCharData(ctxt, 0);
9925         }
9926
9927         GROW;
9928         SHRINK;
9929
9930         if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
9931             xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9932                         "detected an error in element content\n");
9933             xmlHaltParser(ctxt);
9934             break;
9935         }
9936     }
9937 }
9938
9939 /**
9940  * xmlParseContent:
9941  * @ctxt:  an XML parser context
9942  *
9943  * Parse a content sequence. Stops at EOF or '</'.
9944  *
9945  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9946  */
9947
9948 void
9949 xmlParseContent(xmlParserCtxtPtr ctxt) {
9950     int nameNr = ctxt->nameNr;
9951
9952     xmlParseContentInternal(ctxt);
9953
9954     if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9955         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9956         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9957         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9958                 "Premature end of data in tag %s line %d\n",
9959                 name, line, NULL);
9960     }
9961 }
9962
9963 /**
9964  * xmlParseElement:
9965  * @ctxt:  an XML parser context
9966  *
9967  * parse an XML element
9968  *
9969  * [39] element ::= EmptyElemTag | STag content ETag
9970  *
9971  * [ WFC: Element Type Match ]
9972  * The Name in an element's end-tag must match the element type in the
9973  * start-tag.
9974  *
9975  */
9976
9977 void
9978 xmlParseElement(xmlParserCtxtPtr ctxt) {
9979     if (xmlParseElementStart(ctxt) != 0)
9980         return;
9981
9982     xmlParseContentInternal(ctxt);
9983     if (ctxt->instate == XML_PARSER_EOF)
9984         return;
9985
9986     if (CUR == 0) {
9987         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9988         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9989         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9990                 "Premature end of data in tag %s line %d\n",
9991                 name, line, NULL);
9992         return;
9993     }
9994
9995     xmlParseElementEnd(ctxt);
9996 }
9997
9998 /**
9999  * xmlParseElementStart:
10000  * @ctxt:  an XML parser context
10001  *
10002  * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10003  * opening tag was parsed, 1 if an empty element was parsed.
10004  */
10005 static int
10006 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10007     const xmlChar *name;
10008     const xmlChar *prefix = NULL;
10009     const xmlChar *URI = NULL;
10010     xmlParserNodeInfo node_info;
10011     int line, tlen = 0;
10012     xmlNodePtr ret;
10013     int nsNr = ctxt->nsNr;
10014
10015     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10016         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10017         xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10018                  "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10019                           xmlParserMaxDepth);
10020         xmlHaltParser(ctxt);
10021         return(-1);
10022     }
10023
10024     /* Capture start position */
10025     if (ctxt->record_info) {
10026         node_info.begin_pos = ctxt->input->consumed +
10027                           (CUR_PTR - ctxt->input->base);
10028         node_info.begin_line = ctxt->input->line;
10029     }
10030
10031     if (ctxt->spaceNr == 0)
10032         spacePush(ctxt, -1);
10033     else if (*ctxt->space == -2)
10034         spacePush(ctxt, -1);
10035     else
10036         spacePush(ctxt, *ctxt->space);
10037
10038     line = ctxt->input->line;
10039 #ifdef LIBXML_SAX1_ENABLED
10040     if (ctxt->sax2)
10041 #endif /* LIBXML_SAX1_ENABLED */
10042         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10043 #ifdef LIBXML_SAX1_ENABLED
10044     else
10045         name = xmlParseStartTag(ctxt);
10046 #endif /* LIBXML_SAX1_ENABLED */
10047     if (ctxt->instate == XML_PARSER_EOF)
10048         return(-1);
10049     if (name == NULL) {
10050         spacePop(ctxt);
10051         return(-1);
10052     }
10053     nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10054     ret = ctxt->node;
10055
10056 #ifdef LIBXML_VALID_ENABLED
10057     /*
10058      * [ VC: Root Element Type ]
10059      * The Name in the document type declaration must match the element
10060      * type of the root element.
10061      */
10062     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10063         ctxt->node && (ctxt->node == ctxt->myDoc->children))
10064         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10065 #endif /* LIBXML_VALID_ENABLED */
10066
10067     /*
10068      * Check for an Empty Element.
10069      */
10070     if ((RAW == '/') && (NXT(1) == '>')) {
10071         SKIP(2);
10072         if (ctxt->sax2) {
10073             if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10074                 (!ctxt->disableSAX))
10075                 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10076 #ifdef LIBXML_SAX1_ENABLED
10077         } else {
10078             if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10079                 (!ctxt->disableSAX))
10080                 ctxt->sax->endElement(ctxt->userData, name);
10081 #endif /* LIBXML_SAX1_ENABLED */
10082         }
10083         namePop(ctxt);
10084         spacePop(ctxt);
10085         if (nsNr != ctxt->nsNr)
10086             nsPop(ctxt, ctxt->nsNr - nsNr);
10087         if ( ret != NULL && ctxt->record_info ) {
10088            node_info.end_pos = ctxt->input->consumed +
10089                               (CUR_PTR - ctxt->input->base);
10090            node_info.end_line = ctxt->input->line;
10091            node_info.node = ret;
10092            xmlParserAddNodeInfo(ctxt, &node_info);
10093         }
10094         return(1);
10095     }
10096     if (RAW == '>') {
10097         NEXT1;
10098     } else {
10099         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10100                      "Couldn't find end of Start Tag %s line %d\n",
10101                                 name, line, NULL);
10102
10103         /*
10104          * end of parsing of this node.
10105          */
10106         nodePop(ctxt);
10107         namePop(ctxt);
10108         spacePop(ctxt);
10109         if (nsNr != ctxt->nsNr)
10110             nsPop(ctxt, ctxt->nsNr - nsNr);
10111
10112         /*
10113          * Capture end position and add node
10114          */
10115         if ( ret != NULL && ctxt->record_info ) {
10116            node_info.end_pos = ctxt->input->consumed +
10117                               (CUR_PTR - ctxt->input->base);
10118            node_info.end_line = ctxt->input->line;
10119            node_info.node = ret;
10120            xmlParserAddNodeInfo(ctxt, &node_info);
10121         }
10122         return(-1);
10123     }
10124
10125     return(0);
10126 }
10127
10128 /**
10129  * xmlParseElementEnd:
10130  * @ctxt:  an XML parser context
10131  *
10132  * Parse the end of an XML element.
10133  */
10134 static void
10135 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10136     xmlParserNodeInfo node_info;
10137     xmlNodePtr ret = ctxt->node;
10138
10139     if (ctxt->nameNr <= 0)
10140         return;
10141
10142     /*
10143      * parse the end of tag: '</' should be here.
10144      */
10145     if (ctxt->sax2) {
10146         xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10147         namePop(ctxt);
10148     }
10149 #ifdef LIBXML_SAX1_ENABLED
10150     else
10151         xmlParseEndTag1(ctxt, 0);
10152 #endif /* LIBXML_SAX1_ENABLED */
10153
10154     /*
10155      * Capture end position and add node
10156      */
10157     if ( ret != NULL && ctxt->record_info ) {
10158        node_info.end_pos = ctxt->input->consumed +
10159                           (CUR_PTR - ctxt->input->base);
10160        node_info.end_line = ctxt->input->line;
10161        node_info.node = ret;
10162        xmlParserAddNodeInfo(ctxt, &node_info);
10163     }
10164 }
10165
10166 /**
10167  * xmlParseVersionNum:
10168  * @ctxt:  an XML parser context
10169  *
10170  * parse the XML version value.
10171  *
10172  * [26] VersionNum ::= '1.' [0-9]+
10173  *
10174  * In practice allow [0-9].[0-9]+ at that level
10175  *
10176  * Returns the string giving the XML version number, or NULL
10177  */
10178 xmlChar *
10179 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10180     xmlChar *buf = NULL;
10181     int len = 0;
10182     int size = 10;
10183     xmlChar cur;
10184
10185     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10186     if (buf == NULL) {
10187         xmlErrMemory(ctxt, NULL);
10188         return(NULL);
10189     }
10190     cur = CUR;
10191     if (!((cur >= '0') && (cur <= '9'))) {
10192         xmlFree(buf);
10193         return(NULL);
10194     }
10195     buf[len++] = cur;
10196     NEXT;
10197     cur=CUR;
10198     if (cur != '.') {
10199         xmlFree(buf);
10200         return(NULL);
10201     }
10202     buf[len++] = cur;
10203     NEXT;
10204     cur=CUR;
10205     while ((cur >= '0') && (cur <= '9')) {
10206         if (len + 1 >= size) {
10207             xmlChar *tmp;
10208
10209             size *= 2;
10210             tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10211             if (tmp == NULL) {
10212                 xmlFree(buf);
10213                 xmlErrMemory(ctxt, NULL);
10214                 return(NULL);
10215             }
10216             buf = tmp;
10217         }
10218         buf[len++] = cur;
10219         NEXT;
10220         cur=CUR;
10221     }
10222     buf[len] = 0;
10223     return(buf);
10224 }
10225
10226 /**
10227  * xmlParseVersionInfo:
10228  * @ctxt:  an XML parser context
10229  *
10230  * parse the XML version.
10231  *
10232  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10233  *
10234  * [25] Eq ::= S? '=' S?
10235  *
10236  * Returns the version string, e.g. "1.0"
10237  */
10238
10239 xmlChar *
10240 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10241     xmlChar *version = NULL;
10242
10243     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10244         SKIP(7);
10245         SKIP_BLANKS;
10246         if (RAW != '=') {
10247             xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10248             return(NULL);
10249         }
10250         NEXT;
10251         SKIP_BLANKS;
10252         if (RAW == '"') {
10253             NEXT;
10254             version = xmlParseVersionNum(ctxt);
10255             if (RAW != '"') {
10256                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10257             } else
10258                 NEXT;
10259         } else if (RAW == '\''){
10260             NEXT;
10261             version = xmlParseVersionNum(ctxt);
10262             if (RAW != '\'') {
10263                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10264             } else
10265                 NEXT;
10266         } else {
10267             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10268         }
10269     }
10270     return(version);
10271 }
10272
10273 /**
10274  * xmlParseEncName:
10275  * @ctxt:  an XML parser context
10276  *
10277  * parse the XML encoding name
10278  *
10279  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10280  *
10281  * Returns the encoding name value or NULL
10282  */
10283 xmlChar *
10284 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10285     xmlChar *buf = NULL;
10286     int len = 0;
10287     int size = 10;
10288     xmlChar cur;
10289
10290     cur = CUR;
10291     if (((cur >= 'a') && (cur <= 'z')) ||
10292         ((cur >= 'A') && (cur <= 'Z'))) {
10293         buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10294         if (buf == NULL) {
10295             xmlErrMemory(ctxt, NULL);
10296             return(NULL);
10297         }
10298
10299         buf[len++] = cur;
10300         NEXT;
10301         cur = CUR;
10302         while (((cur >= 'a') && (cur <= 'z')) ||
10303                ((cur >= 'A') && (cur <= 'Z')) ||
10304                ((cur >= '0') && (cur <= '9')) ||
10305                (cur == '.') || (cur == '_') ||
10306                (cur == '-')) {
10307             if (len + 1 >= size) {
10308                 xmlChar *tmp;
10309
10310                 size *= 2;
10311                 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10312                 if (tmp == NULL) {
10313                     xmlErrMemory(ctxt, NULL);
10314                     xmlFree(buf);
10315                     return(NULL);
10316                 }
10317                 buf = tmp;
10318             }
10319             buf[len++] = cur;
10320             NEXT;
10321             cur = CUR;
10322             if (cur == 0) {
10323                 SHRINK;
10324                 GROW;
10325                 cur = CUR;
10326             }
10327         }
10328         buf[len] = 0;
10329     } else {
10330         xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10331     }
10332     return(buf);
10333 }
10334
10335 /**
10336  * xmlParseEncodingDecl:
10337  * @ctxt:  an XML parser context
10338  *
10339  * parse the XML encoding declaration
10340  *
10341  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10342  *
10343  * this setups the conversion filters.
10344  *
10345  * Returns the encoding value or NULL
10346  */
10347
10348 const xmlChar *
10349 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10350     xmlChar *encoding = NULL;
10351
10352     SKIP_BLANKS;
10353     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10354         SKIP(8);
10355         SKIP_BLANKS;
10356         if (RAW != '=') {
10357             xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10358             return(NULL);
10359         }
10360         NEXT;
10361         SKIP_BLANKS;
10362         if (RAW == '"') {
10363             NEXT;
10364             encoding = xmlParseEncName(ctxt);
10365             if (RAW != '"') {
10366                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10367                 xmlFree((xmlChar *) encoding);
10368                 return(NULL);
10369             } else
10370                 NEXT;
10371         } else if (RAW == '\''){
10372             NEXT;
10373             encoding = xmlParseEncName(ctxt);
10374             if (RAW != '\'') {
10375                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10376                 xmlFree((xmlChar *) encoding);
10377                 return(NULL);
10378             } else
10379                 NEXT;
10380         } else {
10381             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10382         }
10383
10384         /*
10385          * Non standard parsing, allowing the user to ignore encoding
10386          */
10387         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10388             xmlFree((xmlChar *) encoding);
10389             return(NULL);
10390         }
10391
10392         /*
10393          * UTF-16 encoding switch has already taken place at this stage,
10394          * more over the little-endian/big-endian selection is already done
10395          */
10396         if ((encoding != NULL) &&
10397             ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10398              (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10399             /*
10400              * If no encoding was passed to the parser, that we are
10401              * using UTF-16 and no decoder is present i.e. the
10402              * document is apparently UTF-8 compatible, then raise an
10403              * encoding mismatch fatal error
10404              */
10405             if ((ctxt->encoding == NULL) &&
10406                 (ctxt->input->buf != NULL) &&
10407                 (ctxt->input->buf->encoder == NULL)) {
10408                 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10409                   "Document labelled UTF-16 but has UTF-8 content\n");
10410             }
10411             if (ctxt->encoding != NULL)
10412                 xmlFree((xmlChar *) ctxt->encoding);
10413             ctxt->encoding = encoding;
10414         }
10415         /*
10416          * UTF-8 encoding is handled natively
10417          */
10418         else if ((encoding != NULL) &&
10419             ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10420              (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10421             if (ctxt->encoding != NULL)
10422                 xmlFree((xmlChar *) ctxt->encoding);
10423             ctxt->encoding = encoding;
10424         }
10425         else if (encoding != NULL) {
10426             xmlCharEncodingHandlerPtr handler;
10427
10428             if (ctxt->input->encoding != NULL)
10429                 xmlFree((xmlChar *) ctxt->input->encoding);
10430             ctxt->input->encoding = encoding;
10431
10432             handler = xmlFindCharEncodingHandler((const char *) encoding);
10433             if (handler != NULL) {
10434                 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10435                     /* failed to convert */
10436                     ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10437                     return(NULL);
10438                 }
10439             } else {
10440                 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10441                         "Unsupported encoding %s\n", encoding);
10442                 return(NULL);
10443             }
10444         }
10445     }
10446     return(encoding);
10447 }
10448
10449 /**
10450  * xmlParseSDDecl:
10451  * @ctxt:  an XML parser context
10452  *
10453  * parse the XML standalone declaration
10454  *
10455  * [32] SDDecl ::= S 'standalone' Eq
10456  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10457  *
10458  * [ VC: Standalone Document Declaration ]
10459  * TODO The standalone document declaration must have the value "no"
10460  * if any external markup declarations contain declarations of:
10461  *  - attributes with default values, if elements to which these
10462  *    attributes apply appear in the document without specifications
10463  *    of values for these attributes, or
10464  *  - entities (other than amp, lt, gt, apos, quot), if references
10465  *    to those entities appear in the document, or
10466  *  - attributes with values subject to normalization, where the
10467  *    attribute appears in the document with a value which will change
10468  *    as a result of normalization, or
10469  *  - element types with element content, if white space occurs directly
10470  *    within any instance of those types.
10471  *
10472  * Returns:
10473  *   1 if standalone="yes"
10474  *   0 if standalone="no"
10475  *  -2 if standalone attribute is missing or invalid
10476  *        (A standalone value of -2 means that the XML declaration was found,
10477  *         but no value was specified for the standalone attribute).
10478  */
10479
10480 int
10481 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10482     int standalone = -2;
10483
10484     SKIP_BLANKS;
10485     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10486         SKIP(10);
10487         SKIP_BLANKS;
10488         if (RAW != '=') {
10489             xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10490             return(standalone);
10491         }
10492         NEXT;
10493         SKIP_BLANKS;
10494         if (RAW == '\''){
10495             NEXT;
10496             if ((RAW == 'n') && (NXT(1) == 'o')) {
10497                 standalone = 0;
10498                 SKIP(2);
10499             } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10500                        (NXT(2) == 's')) {
10501                 standalone = 1;
10502                 SKIP(3);
10503             } else {
10504                 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10505             }
10506             if (RAW != '\'') {
10507                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10508             } else
10509                 NEXT;
10510         } else if (RAW == '"'){
10511             NEXT;
10512             if ((RAW == 'n') && (NXT(1) == 'o')) {
10513                 standalone = 0;
10514                 SKIP(2);
10515             } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10516                        (NXT(2) == 's')) {
10517                 standalone = 1;
10518                 SKIP(3);
10519             } else {
10520                 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10521             }
10522             if (RAW != '"') {
10523                 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10524             } else
10525                 NEXT;
10526         } else {
10527             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10528         }
10529     }
10530     return(standalone);
10531 }
10532
10533 /**
10534  * xmlParseXMLDecl:
10535  * @ctxt:  an XML parser context
10536  *
10537  * parse an XML declaration header
10538  *
10539  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10540  */
10541
10542 void
10543 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10544     xmlChar *version;
10545
10546     /*
10547      * This value for standalone indicates that the document has an
10548      * XML declaration but it does not have a standalone attribute.
10549      * It will be overwritten later if a standalone attribute is found.
10550      */
10551     ctxt->input->standalone = -2;
10552
10553     /*
10554      * We know that '<?xml' is here.
10555      */
10556     SKIP(5);
10557
10558     if (!IS_BLANK_CH(RAW)) {
10559         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10560                        "Blank needed after '<?xml'\n");
10561     }
10562     SKIP_BLANKS;
10563
10564     /*
10565      * We must have the VersionInfo here.
10566      */
10567     version = xmlParseVersionInfo(ctxt);
10568     if (version == NULL) {
10569         xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10570     } else {
10571         if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10572             /*
10573              * Changed here for XML-1.0 5th edition
10574              */
10575             if (ctxt->options & XML_PARSE_OLD10) {
10576                 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10577                                   "Unsupported version '%s'\n",
10578                                   version);
10579             } else {
10580                 if ((version[0] == '1') && ((version[1] == '.'))) {
10581                     xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10582                                   "Unsupported version '%s'\n",
10583                                   version, NULL);
10584                 } else {
10585                     xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10586                                       "Unsupported version '%s'\n",
10587                                       version);
10588                 }
10589             }
10590         }
10591         if (ctxt->version != NULL)
10592             xmlFree((void *) ctxt->version);
10593         ctxt->version = version;
10594     }
10595
10596     /*
10597      * We may have the encoding declaration
10598      */
10599     if (!IS_BLANK_CH(RAW)) {
10600         if ((RAW == '?') && (NXT(1) == '>')) {
10601             SKIP(2);
10602             return;
10603         }
10604         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10605     }
10606     xmlParseEncodingDecl(ctxt);
10607     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10608          (ctxt->instate == XML_PARSER_EOF)) {
10609         /*
10610          * The XML REC instructs us to stop parsing right here
10611          */
10612         return;
10613     }
10614
10615     /*
10616      * We may have the standalone status.
10617      */
10618     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10619         if ((RAW == '?') && (NXT(1) == '>')) {
10620             SKIP(2);
10621             return;
10622         }
10623         xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10624     }
10625
10626     /*
10627      * We can grow the input buffer freely at that point
10628      */
10629     GROW;
10630
10631     SKIP_BLANKS;
10632     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10633
10634     SKIP_BLANKS;
10635     if ((RAW == '?') && (NXT(1) == '>')) {
10636         SKIP(2);
10637     } else if (RAW == '>') {
10638         /* Deprecated old WD ... */
10639         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10640         NEXT;
10641     } else {
10642         xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10643         MOVETO_ENDTAG(CUR_PTR);
10644         NEXT;
10645     }
10646 }
10647
10648 /**
10649  * xmlParseMisc:
10650  * @ctxt:  an XML parser context
10651  *
10652  * parse an XML Misc* optional field.
10653  *
10654  * [27] Misc ::= Comment | PI |  S
10655  */
10656
10657 void
10658 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10659     while (ctxt->instate != XML_PARSER_EOF) {
10660         SKIP_BLANKS;
10661         GROW;
10662         if ((RAW == '<') && (NXT(1) == '?')) {
10663             xmlParsePI(ctxt);
10664         } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10665             xmlParseComment(ctxt);
10666         } else {
10667             break;
10668         }
10669     }
10670 }
10671
10672 /**
10673  * xmlParseDocument:
10674  * @ctxt:  an XML parser context
10675  *
10676  * parse an XML document (and build a tree if using the standard SAX
10677  * interface).
10678  *
10679  * [1] document ::= prolog element Misc*
10680  *
10681  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10682  *
10683  * Returns 0, -1 in case of error. the parser context is augmented
10684  *                as a result of the parsing.
10685  */
10686
10687 int
10688 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10689     xmlChar start[4];
10690     xmlCharEncoding enc;
10691
10692     xmlInitParser();
10693
10694     if ((ctxt == NULL) || (ctxt->input == NULL))
10695         return(-1);
10696
10697     GROW;
10698
10699     /*
10700      * SAX: detecting the level.
10701      */
10702     xmlDetectSAX2(ctxt);
10703
10704     /*
10705      * SAX: beginning of the document processing.
10706      */
10707     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10708         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10709     if (ctxt->instate == XML_PARSER_EOF)
10710         return(-1);
10711
10712     if ((ctxt->encoding == NULL) &&
10713         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10714         /*
10715          * Get the 4 first bytes and decode the charset
10716          * if enc != XML_CHAR_ENCODING_NONE
10717          * plug some encoding conversion routines.
10718          */
10719         start[0] = RAW;
10720         start[1] = NXT(1);
10721         start[2] = NXT(2);
10722         start[3] = NXT(3);
10723         enc = xmlDetectCharEncoding(&start[0], 4);
10724         if (enc != XML_CHAR_ENCODING_NONE) {
10725             xmlSwitchEncoding(ctxt, enc);
10726         }
10727     }
10728
10729
10730     if (CUR == 0) {
10731         xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10732         return(-1);
10733     }
10734
10735     /*
10736      * Check for the XMLDecl in the Prolog.
10737      * do not GROW here to avoid the detected encoder to decode more
10738      * than just the first line, unless the amount of data is really
10739      * too small to hold "<?xml version="1.0" encoding="foo"
10740      */
10741     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10742        GROW;
10743     }
10744     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10745
10746         /*
10747          * Note that we will switch encoding on the fly.
10748          */
10749         xmlParseXMLDecl(ctxt);
10750         if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10751             (ctxt->instate == XML_PARSER_EOF)) {
10752             /*
10753              * The XML REC instructs us to stop parsing right here
10754              */
10755             return(-1);
10756         }
10757         ctxt->standalone = ctxt->input->standalone;
10758         SKIP_BLANKS;
10759     } else {
10760         ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10761     }
10762     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10763         ctxt->sax->startDocument(ctxt->userData);
10764     if (ctxt->instate == XML_PARSER_EOF)
10765         return(-1);
10766     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10767         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10768         ctxt->myDoc->compression = ctxt->input->buf->compressed;
10769     }
10770
10771     /*
10772      * The Misc part of the Prolog
10773      */
10774     xmlParseMisc(ctxt);
10775
10776     /*
10777      * Then possibly doc type declaration(s) and more Misc
10778      * (doctypedecl Misc*)?
10779      */
10780     GROW;
10781     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10782
10783         ctxt->inSubset = 1;
10784         xmlParseDocTypeDecl(ctxt);
10785         if (RAW == '[') {
10786             ctxt->instate = XML_PARSER_DTD;
10787             xmlParseInternalSubset(ctxt);
10788             if (ctxt->instate == XML_PARSER_EOF)
10789                 return(-1);
10790         }
10791
10792         /*
10793          * Create and update the external subset.
10794          */
10795         ctxt->inSubset = 2;
10796         if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10797             (!ctxt->disableSAX))
10798             ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10799                                       ctxt->extSubSystem, ctxt->extSubURI);
10800         if (ctxt->instate == XML_PARSER_EOF)
10801             return(-1);
10802         ctxt->inSubset = 0;
10803
10804         xmlCleanSpecialAttr(ctxt);
10805
10806         ctxt->instate = XML_PARSER_PROLOG;
10807         xmlParseMisc(ctxt);
10808     }
10809
10810     /*
10811      * Time to start parsing the tree itself
10812      */
10813     GROW;
10814     if (RAW != '<') {
10815         xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10816                        "Start tag expected, '<' not found\n");
10817     } else {
10818         ctxt->instate = XML_PARSER_CONTENT;
10819         xmlParseElement(ctxt);
10820         ctxt->instate = XML_PARSER_EPILOG;
10821
10822
10823         /*
10824          * The Misc part at the end
10825          */
10826         xmlParseMisc(ctxt);
10827
10828         if (RAW != 0) {
10829             xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10830         }
10831         ctxt->instate = XML_PARSER_EOF;
10832     }
10833
10834     /*
10835      * SAX: end of the document processing.
10836      */
10837     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10838         ctxt->sax->endDocument(ctxt->userData);
10839
10840     /*
10841      * Remove locally kept entity definitions if the tree was not built
10842      */
10843     if ((ctxt->myDoc != NULL) &&
10844         (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10845         xmlFreeDoc(ctxt->myDoc);
10846         ctxt->myDoc = NULL;
10847     }
10848
10849     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10850         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10851         if (ctxt->valid)
10852             ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10853         if (ctxt->nsWellFormed)
10854             ctxt->myDoc->properties |= XML_DOC_NSVALID;
10855         if (ctxt->options & XML_PARSE_OLD10)
10856             ctxt->myDoc->properties |= XML_DOC_OLD10;
10857     }
10858     if (! ctxt->wellFormed) {
10859         ctxt->valid = 0;
10860         return(-1);
10861     }
10862     return(0);
10863 }
10864
10865 /**
10866  * xmlParseExtParsedEnt:
10867  * @ctxt:  an XML parser context
10868  *
10869  * parse a general parsed entity
10870  * An external general parsed entity is well-formed if it matches the
10871  * production labeled extParsedEnt.
10872  *
10873  * [78] extParsedEnt ::= TextDecl? content
10874  *
10875  * Returns 0, -1 in case of error. the parser context is augmented
10876  *                as a result of the parsing.
10877  */
10878
10879 int
10880 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10881     xmlChar start[4];
10882     xmlCharEncoding enc;
10883
10884     if ((ctxt == NULL) || (ctxt->input == NULL))
10885         return(-1);
10886
10887     xmlDetectSAX2(ctxt);
10888
10889     GROW;
10890
10891     /*
10892      * SAX: beginning of the document processing.
10893      */
10894     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10895         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10896
10897     /*
10898      * Get the 4 first bytes and decode the charset
10899      * if enc != XML_CHAR_ENCODING_NONE
10900      * plug some encoding conversion routines.
10901      */
10902     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10903         start[0] = RAW;
10904         start[1] = NXT(1);
10905         start[2] = NXT(2);
10906         start[3] = NXT(3);
10907         enc = xmlDetectCharEncoding(start, 4);
10908         if (enc != XML_CHAR_ENCODING_NONE) {
10909             xmlSwitchEncoding(ctxt, enc);
10910         }
10911     }
10912
10913
10914     if (CUR == 0) {
10915         xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10916     }
10917
10918     /*
10919      * Check for the XMLDecl in the Prolog.
10920      */
10921     GROW;
10922     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10923
10924         /*
10925          * Note that we will switch encoding on the fly.
10926          */
10927         xmlParseXMLDecl(ctxt);
10928         if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10929             /*
10930              * The XML REC instructs us to stop parsing right here
10931              */
10932             return(-1);
10933         }
10934         SKIP_BLANKS;
10935     } else {
10936         ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10937     }
10938     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10939         ctxt->sax->startDocument(ctxt->userData);
10940     if (ctxt->instate == XML_PARSER_EOF)
10941         return(-1);
10942
10943     /*
10944      * Doing validity checking on chunk doesn't make sense
10945      */
10946     ctxt->instate = XML_PARSER_CONTENT;
10947     ctxt->validate = 0;
10948     ctxt->loadsubset = 0;
10949     ctxt->depth = 0;
10950
10951     xmlParseContent(ctxt);
10952     if (ctxt->instate == XML_PARSER_EOF)
10953         return(-1);
10954
10955     if ((RAW == '<') && (NXT(1) == '/')) {
10956         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10957     } else if (RAW != 0) {
10958         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10959     }
10960
10961     /*
10962      * SAX: end of the document processing.
10963      */
10964     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10965         ctxt->sax->endDocument(ctxt->userData);
10966
10967     if (! ctxt->wellFormed) return(-1);
10968     return(0);
10969 }
10970
10971 #ifdef LIBXML_PUSH_ENABLED
10972 /************************************************************************
10973  *                                                                      *
10974  *              Progressive parsing interfaces                          *
10975  *                                                                      *
10976  ************************************************************************/
10977
10978 /**
10979  * xmlParseLookupSequence:
10980  * @ctxt:  an XML parser context
10981  * @first:  the first char to lookup
10982  * @next:  the next char to lookup or zero
10983  * @third:  the next char to lookup or zero
10984  *
10985  * Try to find if a sequence (first, next, third) or  just (first next) or
10986  * (first) is available in the input stream.
10987  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10988  * to avoid rescanning sequences of bytes, it DOES change the state of the
10989  * parser, do not use liberally.
10990  *
10991  * Returns the index to the current parsing point if the full sequence
10992  *      is available, -1 otherwise.
10993  */
10994 static int
10995 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10996                        xmlChar next, xmlChar third) {
10997     int base, len;
10998     xmlParserInputPtr in;
10999     const xmlChar *buf;
11000
11001     in = ctxt->input;
11002     if (in == NULL) return(-1);
11003     base = in->cur - in->base;
11004     if (base < 0) return(-1);
11005     if (ctxt->checkIndex > base)
11006         base = ctxt->checkIndex;
11007     if (in->buf == NULL) {
11008         buf = in->base;
11009         len = in->length;
11010     } else {
11011         buf = xmlBufContent(in->buf->buffer);
11012         len = xmlBufUse(in->buf->buffer);
11013     }
11014     /* take into account the sequence length */
11015     if (third) len -= 2;
11016     else if (next) len --;
11017     for (;base < len;base++) {
11018         if (buf[base] == first) {
11019             if (third != 0) {
11020                 if ((buf[base + 1] != next) ||
11021                     (buf[base + 2] != third)) continue;
11022             } else if (next != 0) {
11023                 if (buf[base + 1] != next) continue;
11024             }
11025             ctxt->checkIndex = 0;
11026 #ifdef DEBUG_PUSH
11027             if (next == 0)
11028                 xmlGenericError(xmlGenericErrorContext,
11029                         "PP: lookup '%c' found at %d\n",
11030                         first, base);
11031             else if (third == 0)
11032                 xmlGenericError(xmlGenericErrorContext,
11033                         "PP: lookup '%c%c' found at %d\n",
11034                         first, next, base);
11035             else
11036                 xmlGenericError(xmlGenericErrorContext,
11037                         "PP: lookup '%c%c%c' found at %d\n",
11038                         first, next, third, base);
11039 #endif
11040             return(base - (in->cur - in->base));
11041         }
11042     }
11043     ctxt->checkIndex = base;
11044 #ifdef DEBUG_PUSH
11045     if (next == 0)
11046         xmlGenericError(xmlGenericErrorContext,
11047                 "PP: lookup '%c' failed\n", first);
11048     else if (third == 0)
11049         xmlGenericError(xmlGenericErrorContext,
11050                 "PP: lookup '%c%c' failed\n", first, next);
11051     else
11052         xmlGenericError(xmlGenericErrorContext,
11053                 "PP: lookup '%c%c%c' failed\n", first, next, third);
11054 #endif
11055     return(-1);
11056 }
11057
11058 /**
11059  * xmlParseGetLasts:
11060  * @ctxt:  an XML parser context
11061  * @lastlt:  pointer to store the last '<' from the input
11062  * @lastgt:  pointer to store the last '>' from the input
11063  *
11064  * Lookup the last < and > in the current chunk
11065  */
11066 static void
11067 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11068                  const xmlChar **lastgt) {
11069     const xmlChar *tmp;
11070
11071     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11072         xmlGenericError(xmlGenericErrorContext,
11073                     "Internal error: xmlParseGetLasts\n");
11074         return;
11075     }
11076     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11077         tmp = ctxt->input->end;
11078         tmp--;
11079         while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11080         if (tmp < ctxt->input->base) {
11081             *lastlt = NULL;
11082             *lastgt = NULL;
11083         } else {
11084             *lastlt = tmp;
11085             tmp++;
11086             while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11087                 if (*tmp == '\'') {
11088                     tmp++;
11089                     while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11090                     if (tmp < ctxt->input->end) tmp++;
11091                 } else if (*tmp == '"') {
11092                     tmp++;
11093                     while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11094                     if (tmp < ctxt->input->end) tmp++;
11095                 } else
11096                     tmp++;
11097             }
11098             if (tmp < ctxt->input->end)
11099                 *lastgt = tmp;
11100             else {
11101                 tmp = *lastlt;
11102                 tmp--;
11103                 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11104                 if (tmp >= ctxt->input->base)
11105                     *lastgt = tmp;
11106                 else
11107                     *lastgt = NULL;
11108             }
11109         }
11110     } else {
11111         *lastlt = NULL;
11112         *lastgt = NULL;
11113     }
11114 }
11115 /**
11116  * xmlCheckCdataPush:
11117  * @cur: pointer to the block of characters
11118  * @len: length of the block in bytes
11119  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11120  *
11121  * Check that the block of characters is okay as SCdata content [20]
11122  *
11123  * Returns the number of bytes to pass if okay, a negative index where an
11124  *         UTF-8 error occurred otherwise
11125  */
11126 static int
11127 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11128     int ix;
11129     unsigned char c;
11130     int codepoint;
11131
11132     if ((utf == NULL) || (len <= 0))
11133         return(0);
11134
11135     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11136         c = utf[ix];
11137         if ((c & 0x80) == 0x00) {       /* 1-byte code, starts with 10 */
11138             if (c >= 0x20)
11139                 ix++;
11140             else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11141                 ix++;
11142             else
11143                 return(-ix);
11144         } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11145             if (ix + 2 > len) return(complete ? -ix : ix);
11146             if ((utf[ix+1] & 0xc0 ) != 0x80)
11147                 return(-ix);
11148             codepoint = (utf[ix] & 0x1f) << 6;
11149             codepoint |= utf[ix+1] & 0x3f;
11150             if (!xmlIsCharQ(codepoint))
11151                 return(-ix);
11152             ix += 2;
11153         } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11154             if (ix + 3 > len) return(complete ? -ix : ix);
11155             if (((utf[ix+1] & 0xc0) != 0x80) ||
11156                 ((utf[ix+2] & 0xc0) != 0x80))
11157                     return(-ix);
11158             codepoint = (utf[ix] & 0xf) << 12;
11159             codepoint |= (utf[ix+1] & 0x3f) << 6;
11160             codepoint |= utf[ix+2] & 0x3f;
11161             if (!xmlIsCharQ(codepoint))
11162                 return(-ix);
11163             ix += 3;
11164         } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11165             if (ix + 4 > len) return(complete ? -ix : ix);
11166             if (((utf[ix+1] & 0xc0) != 0x80) ||
11167                 ((utf[ix+2] & 0xc0) != 0x80) ||
11168                 ((utf[ix+3] & 0xc0) != 0x80))
11169                     return(-ix);
11170             codepoint = (utf[ix] & 0x7) << 18;
11171             codepoint |= (utf[ix+1] & 0x3f) << 12;
11172             codepoint |= (utf[ix+2] & 0x3f) << 6;
11173             codepoint |= utf[ix+3] & 0x3f;
11174             if (!xmlIsCharQ(codepoint))
11175                 return(-ix);
11176             ix += 4;
11177         } else                          /* unknown encoding */
11178             return(-ix);
11179       }
11180       return(ix);
11181 }
11182
11183 /**
11184  * xmlParseTryOrFinish:
11185  * @ctxt:  an XML parser context
11186  * @terminate:  last chunk indicator
11187  *
11188  * Try to progress on parsing
11189  *
11190  * Returns zero if no parsing was possible
11191  */
11192 static int
11193 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11194     int ret = 0;
11195     int avail, tlen;
11196     xmlChar cur, next;
11197     const xmlChar *lastlt, *lastgt;
11198
11199     if (ctxt->input == NULL)
11200         return(0);
11201
11202 #ifdef DEBUG_PUSH
11203     switch (ctxt->instate) {
11204         case XML_PARSER_EOF:
11205             xmlGenericError(xmlGenericErrorContext,
11206                     "PP: try EOF\n"); break;
11207         case XML_PARSER_START:
11208             xmlGenericError(xmlGenericErrorContext,
11209                     "PP: try START\n"); break;
11210         case XML_PARSER_MISC:
11211             xmlGenericError(xmlGenericErrorContext,
11212                     "PP: try MISC\n");break;
11213         case XML_PARSER_COMMENT:
11214             xmlGenericError(xmlGenericErrorContext,
11215                     "PP: try COMMENT\n");break;
11216         case XML_PARSER_PROLOG:
11217             xmlGenericError(xmlGenericErrorContext,
11218                     "PP: try PROLOG\n");break;
11219         case XML_PARSER_START_TAG:
11220             xmlGenericError(xmlGenericErrorContext,
11221                     "PP: try START_TAG\n");break;
11222         case XML_PARSER_CONTENT:
11223             xmlGenericError(xmlGenericErrorContext,
11224                     "PP: try CONTENT\n");break;
11225         case XML_PARSER_CDATA_SECTION:
11226             xmlGenericError(xmlGenericErrorContext,
11227                     "PP: try CDATA_SECTION\n");break;
11228         case XML_PARSER_END_TAG:
11229             xmlGenericError(xmlGenericErrorContext,
11230                     "PP: try END_TAG\n");break;
11231         case XML_PARSER_ENTITY_DECL:
11232             xmlGenericError(xmlGenericErrorContext,
11233                     "PP: try ENTITY_DECL\n");break;
11234         case XML_PARSER_ENTITY_VALUE:
11235             xmlGenericError(xmlGenericErrorContext,
11236                     "PP: try ENTITY_VALUE\n");break;
11237         case XML_PARSER_ATTRIBUTE_VALUE:
11238             xmlGenericError(xmlGenericErrorContext,
11239                     "PP: try ATTRIBUTE_VALUE\n");break;
11240         case XML_PARSER_DTD:
11241             xmlGenericError(xmlGenericErrorContext,
11242                     "PP: try DTD\n");break;
11243         case XML_PARSER_EPILOG:
11244             xmlGenericError(xmlGenericErrorContext,
11245                     "PP: try EPILOG\n");break;
11246         case XML_PARSER_PI:
11247             xmlGenericError(xmlGenericErrorContext,
11248                     "PP: try PI\n");break;
11249         case XML_PARSER_IGNORE:
11250             xmlGenericError(xmlGenericErrorContext,
11251                     "PP: try IGNORE\n");break;
11252     }
11253 #endif
11254
11255     if ((ctxt->input != NULL) &&
11256         (ctxt->input->cur - ctxt->input->base > 4096)) {
11257         xmlSHRINK(ctxt);
11258         ctxt->checkIndex = 0;
11259     }
11260     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11261
11262     while (ctxt->instate != XML_PARSER_EOF) {
11263         if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11264             return(0);
11265
11266         if (ctxt->input == NULL) break;
11267         if (ctxt->input->buf == NULL)
11268             avail = ctxt->input->length -
11269                     (ctxt->input->cur - ctxt->input->base);
11270         else {
11271             /*
11272              * If we are operating on converted input, try to flush
11273              * remaining chars to avoid them stalling in the non-converted
11274              * buffer. But do not do this in document start where
11275              * encoding="..." may not have been read and we work on a
11276              * guessed encoding.
11277              */
11278             if ((ctxt->instate != XML_PARSER_START) &&
11279                 (ctxt->input->buf->raw != NULL) &&
11280                 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11281                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11282                                                  ctxt->input);
11283                 size_t current = ctxt->input->cur - ctxt->input->base;
11284
11285                 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11286                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11287                                       base, current);
11288             }
11289             avail = xmlBufUse(ctxt->input->buf->buffer) -
11290                     (ctxt->input->cur - ctxt->input->base);
11291         }
11292         if (avail < 1)
11293             goto done;
11294         switch (ctxt->instate) {
11295             case XML_PARSER_EOF:
11296                 /*
11297                  * Document parsing is done !
11298                  */
11299                 goto done;
11300             case XML_PARSER_START:
11301                 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11302                     xmlChar start[4];
11303                     xmlCharEncoding enc;
11304
11305                     /*
11306                      * Very first chars read from the document flow.
11307                      */
11308                     if (avail < 4)
11309                         goto done;
11310
11311                     /*
11312                      * Get the 4 first bytes and decode the charset
11313                      * if enc != XML_CHAR_ENCODING_NONE
11314                      * plug some encoding conversion routines,
11315                      * else xmlSwitchEncoding will set to (default)
11316                      * UTF8.
11317                      */
11318                     start[0] = RAW;
11319                     start[1] = NXT(1);
11320                     start[2] = NXT(2);
11321                     start[3] = NXT(3);
11322                     enc = xmlDetectCharEncoding(start, 4);
11323                     xmlSwitchEncoding(ctxt, enc);
11324                     break;
11325                 }
11326
11327                 if (avail < 2)
11328                     goto done;
11329                 cur = ctxt->input->cur[0];
11330                 next = ctxt->input->cur[1];
11331                 if (cur == 0) {
11332                     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11333                         ctxt->sax->setDocumentLocator(ctxt->userData,
11334                                                       &xmlDefaultSAXLocator);
11335                     xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11336                     xmlHaltParser(ctxt);
11337 #ifdef DEBUG_PUSH
11338                     xmlGenericError(xmlGenericErrorContext,
11339                             "PP: entering EOF\n");
11340 #endif
11341                     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11342                         ctxt->sax->endDocument(ctxt->userData);
11343                     goto done;
11344                 }
11345                 if ((cur == '<') && (next == '?')) {
11346                     /* PI or XML decl */
11347                     if (avail < 5) return(ret);
11348                     if ((!terminate) &&
11349                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11350                         return(ret);
11351                     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11352                         ctxt->sax->setDocumentLocator(ctxt->userData,
11353                                                       &xmlDefaultSAXLocator);
11354                     if ((ctxt->input->cur[2] == 'x') &&
11355                         (ctxt->input->cur[3] == 'm') &&
11356                         (ctxt->input->cur[4] == 'l') &&
11357                         (IS_BLANK_CH(ctxt->input->cur[5]))) {
11358                         ret += 5;
11359 #ifdef DEBUG_PUSH
11360                         xmlGenericError(xmlGenericErrorContext,
11361                                 "PP: Parsing XML Decl\n");
11362 #endif
11363                         xmlParseXMLDecl(ctxt);
11364                         if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11365                             /*
11366                              * The XML REC instructs us to stop parsing right
11367                              * here
11368                              */
11369                             xmlHaltParser(ctxt);
11370                             return(0);
11371                         }
11372                         ctxt->standalone = ctxt->input->standalone;
11373                         if ((ctxt->encoding == NULL) &&
11374                             (ctxt->input->encoding != NULL))
11375                             ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11376                         if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11377                             (!ctxt->disableSAX))
11378                             ctxt->sax->startDocument(ctxt->userData);
11379                         ctxt->instate = XML_PARSER_MISC;
11380 #ifdef DEBUG_PUSH
11381                         xmlGenericError(xmlGenericErrorContext,
11382                                 "PP: entering MISC\n");
11383 #endif
11384                     } else {
11385                         ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11386                         if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11387                             (!ctxt->disableSAX))
11388                             ctxt->sax->startDocument(ctxt->userData);
11389                         ctxt->instate = XML_PARSER_MISC;
11390 #ifdef DEBUG_PUSH
11391                         xmlGenericError(xmlGenericErrorContext,
11392                                 "PP: entering MISC\n");
11393 #endif
11394                     }
11395                 } else {
11396                     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11397                         ctxt->sax->setDocumentLocator(ctxt->userData,
11398                                                       &xmlDefaultSAXLocator);
11399                     ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11400                     if (ctxt->version == NULL) {
11401                         xmlErrMemory(ctxt, NULL);
11402                         break;
11403                     }
11404                     if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11405                         (!ctxt->disableSAX))
11406                         ctxt->sax->startDocument(ctxt->userData);
11407                     ctxt->instate = XML_PARSER_MISC;
11408 #ifdef DEBUG_PUSH
11409                     xmlGenericError(xmlGenericErrorContext,
11410                             "PP: entering MISC\n");
11411 #endif
11412                 }
11413                 break;
11414             case XML_PARSER_START_TAG: {
11415                 const xmlChar *name;
11416                 const xmlChar *prefix = NULL;
11417                 const xmlChar *URI = NULL;
11418                 int line = ctxt->input->line;
11419                 int nsNr = ctxt->nsNr;
11420
11421                 if ((avail < 2) && (ctxt->inputNr == 1))
11422                     goto done;
11423                 cur = ctxt->input->cur[0];
11424                 if (cur != '<') {
11425                     xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11426                     xmlHaltParser(ctxt);
11427                     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11428                         ctxt->sax->endDocument(ctxt->userData);
11429                     goto done;
11430                 }
11431                 if (!terminate) {
11432                     if (ctxt->progressive) {
11433                         /* > can be found unescaped in attribute values */
11434                         if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11435                             goto done;
11436                     } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11437                         goto done;
11438                     }
11439                 }
11440                 if (ctxt->spaceNr == 0)
11441                     spacePush(ctxt, -1);
11442                 else if (*ctxt->space == -2)
11443                     spacePush(ctxt, -1);
11444                 else
11445                     spacePush(ctxt, *ctxt->space);
11446 #ifdef LIBXML_SAX1_ENABLED
11447                 if (ctxt->sax2)
11448 #endif /* LIBXML_SAX1_ENABLED */
11449                     name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11450 #ifdef LIBXML_SAX1_ENABLED
11451                 else
11452                     name = xmlParseStartTag(ctxt);
11453 #endif /* LIBXML_SAX1_ENABLED */
11454                 if (ctxt->instate == XML_PARSER_EOF)
11455                     goto done;
11456                 if (name == NULL) {
11457                     spacePop(ctxt);
11458                     xmlHaltParser(ctxt);
11459                     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11460                         ctxt->sax->endDocument(ctxt->userData);
11461                     goto done;
11462                 }
11463 #ifdef LIBXML_VALID_ENABLED
11464                 /*
11465                  * [ VC: Root Element Type ]
11466                  * The Name in the document type declaration must match
11467                  * the element type of the root element.
11468                  */
11469                 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11470                     ctxt->node && (ctxt->node == ctxt->myDoc->children))
11471                     ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11472 #endif /* LIBXML_VALID_ENABLED */
11473
11474                 /*
11475                  * Check for an Empty Element.
11476                  */
11477                 if ((RAW == '/') && (NXT(1) == '>')) {
11478                     SKIP(2);
11479
11480                     if (ctxt->sax2) {
11481                         if ((ctxt->sax != NULL) &&
11482                             (ctxt->sax->endElementNs != NULL) &&
11483                             (!ctxt->disableSAX))
11484                             ctxt->sax->endElementNs(ctxt->userData, name,
11485                                                     prefix, URI);
11486                         if (ctxt->nsNr - nsNr > 0)
11487                             nsPop(ctxt, ctxt->nsNr - nsNr);
11488 #ifdef LIBXML_SAX1_ENABLED
11489                     } else {
11490                         if ((ctxt->sax != NULL) &&
11491                             (ctxt->sax->endElement != NULL) &&
11492                             (!ctxt->disableSAX))
11493                             ctxt->sax->endElement(ctxt->userData, name);
11494 #endif /* LIBXML_SAX1_ENABLED */
11495                     }
11496                     if (ctxt->instate == XML_PARSER_EOF)
11497                         goto done;
11498                     spacePop(ctxt);
11499                     if (ctxt->nameNr == 0) {
11500                         ctxt->instate = XML_PARSER_EPILOG;
11501                     } else {
11502                         ctxt->instate = XML_PARSER_CONTENT;
11503                     }
11504                     ctxt->progressive = 1;
11505                     break;
11506                 }
11507                 if (RAW == '>') {
11508                     NEXT;
11509                 } else {
11510                     xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11511                                          "Couldn't find end of Start Tag %s\n",
11512                                          name);
11513                     nodePop(ctxt);
11514                     spacePop(ctxt);
11515                 }
11516                 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11517
11518                 ctxt->instate = XML_PARSER_CONTENT;
11519                 ctxt->progressive = 1;
11520                 break;
11521             }
11522             case XML_PARSER_CONTENT: {
11523                 int id;
11524                 unsigned long cons;
11525                 if ((avail < 2) && (ctxt->inputNr == 1))
11526                     goto done;
11527                 cur = ctxt->input->cur[0];
11528                 next = ctxt->input->cur[1];
11529
11530                 id = ctxt->input->id;
11531                 cons = CUR_CONSUMED;
11532                 if ((cur == '<') && (next == '/')) {
11533                     ctxt->instate = XML_PARSER_END_TAG;
11534                     break;
11535                 } else if ((cur == '<') && (next == '?')) {
11536                     if ((!terminate) &&
11537                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11538                         ctxt->progressive = XML_PARSER_PI;
11539                         goto done;
11540                     }
11541                     xmlParsePI(ctxt);
11542                     ctxt->instate = XML_PARSER_CONTENT;
11543                     ctxt->progressive = 1;
11544                 } else if ((cur == '<') && (next != '!')) {
11545                     ctxt->instate = XML_PARSER_START_TAG;
11546                     break;
11547                 } else if ((cur == '<') && (next == '!') &&
11548                            (ctxt->input->cur[2] == '-') &&
11549                            (ctxt->input->cur[3] == '-')) {
11550                     int term;
11551
11552                     if (avail < 4)
11553                         goto done;
11554                     ctxt->input->cur += 4;
11555                     term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11556                     ctxt->input->cur -= 4;
11557                     if ((!terminate) && (term < 0)) {
11558                         ctxt->progressive = XML_PARSER_COMMENT;
11559                         goto done;
11560                     }
11561                     xmlParseComment(ctxt);
11562                     ctxt->instate = XML_PARSER_CONTENT;
11563                     ctxt->progressive = 1;
11564                 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11565                     (ctxt->input->cur[2] == '[') &&
11566                     (ctxt->input->cur[3] == 'C') &&
11567                     (ctxt->input->cur[4] == 'D') &&
11568                     (ctxt->input->cur[5] == 'A') &&
11569                     (ctxt->input->cur[6] == 'T') &&
11570                     (ctxt->input->cur[7] == 'A') &&
11571                     (ctxt->input->cur[8] == '[')) {
11572                     SKIP(9);
11573                     ctxt->instate = XML_PARSER_CDATA_SECTION;
11574                     break;
11575                 } else if ((cur == '<') && (next == '!') &&
11576                            (avail < 9)) {
11577                     goto done;
11578                 } else if (cur == '&') {
11579                     if ((!terminate) &&
11580                         (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11581                         goto done;
11582                     xmlParseReference(ctxt);
11583                 } else {
11584                     /* TODO Avoid the extra copy, handle directly !!! */
11585                     /*
11586                      * Goal of the following test is:
11587                      *  - minimize calls to the SAX 'character' callback
11588                      *    when they are mergeable
11589                      *  - handle an problem for isBlank when we only parse
11590                      *    a sequence of blank chars and the next one is
11591                      *    not available to check against '<' presence.
11592                      *  - tries to homogenize the differences in SAX
11593                      *    callbacks between the push and pull versions
11594                      *    of the parser.
11595                      */
11596                     if ((ctxt->inputNr == 1) &&
11597                         (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11598                         if (!terminate) {
11599                             if (ctxt->progressive) {
11600                                 if ((lastlt == NULL) ||
11601                                     (ctxt->input->cur > lastlt))
11602                                     goto done;
11603                             } else if (xmlParseLookupSequence(ctxt,
11604                                                               '<', 0, 0) < 0) {
11605                                 goto done;
11606                             }
11607                         }
11608                     }
11609                     ctxt->checkIndex = 0;
11610                     xmlParseCharData(ctxt, 0);
11611                 }
11612                 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11613                     xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11614                                 "detected an error in element content\n");
11615                     xmlHaltParser(ctxt);
11616                     break;
11617                 }
11618                 break;
11619             }
11620             case XML_PARSER_END_TAG:
11621                 if (avail < 2)
11622                     goto done;
11623                 if (!terminate) {
11624                     if (ctxt->progressive) {
11625                         /* > can be found unescaped in attribute values */
11626                         if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11627                             goto done;
11628                     } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11629                         goto done;
11630                     }
11631                 }
11632                 if (ctxt->sax2) {
11633                     xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11634                     nameNsPop(ctxt);
11635                 }
11636 #ifdef LIBXML_SAX1_ENABLED
11637                   else
11638                     xmlParseEndTag1(ctxt, 0);
11639 #endif /* LIBXML_SAX1_ENABLED */
11640                 if (ctxt->instate == XML_PARSER_EOF) {
11641                     /* Nothing */
11642                 } else if (ctxt->nameNr == 0) {
11643                     ctxt->instate = XML_PARSER_EPILOG;
11644                 } else {
11645                     ctxt->instate = XML_PARSER_CONTENT;
11646                 }
11647                 break;
11648             case XML_PARSER_CDATA_SECTION: {
11649                 /*
11650                  * The Push mode need to have the SAX callback for
11651                  * cdataBlock merge back contiguous callbacks.
11652                  */
11653                 int base;
11654
11655                 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11656                 if (base < 0) {
11657                     if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11658                         int tmp;
11659
11660                         tmp = xmlCheckCdataPush(ctxt->input->cur,
11661                                                 XML_PARSER_BIG_BUFFER_SIZE, 0);
11662                         if (tmp < 0) {
11663                             tmp = -tmp;
11664                             ctxt->input->cur += tmp;
11665                             goto encoding_error;
11666                         }
11667                         if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11668                             if (ctxt->sax->cdataBlock != NULL)
11669                                 ctxt->sax->cdataBlock(ctxt->userData,
11670                                                       ctxt->input->cur, tmp);
11671                             else if (ctxt->sax->characters != NULL)
11672                                 ctxt->sax->characters(ctxt->userData,
11673                                                       ctxt->input->cur, tmp);
11674                         }
11675                         if (ctxt->instate == XML_PARSER_EOF)
11676                             goto done;
11677                         SKIPL(tmp);
11678                         ctxt->checkIndex = 0;
11679                     }
11680                     goto done;
11681                 } else {
11682                     int tmp;
11683
11684                     tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11685                     if ((tmp < 0) || (tmp != base)) {
11686                         tmp = -tmp;
11687                         ctxt->input->cur += tmp;
11688                         goto encoding_error;
11689                     }
11690                     if ((ctxt->sax != NULL) && (base == 0) &&
11691                         (ctxt->sax->cdataBlock != NULL) &&
11692                         (!ctxt->disableSAX)) {
11693                         /*
11694                          * Special case to provide identical behaviour
11695                          * between pull and push parsers on enpty CDATA
11696                          * sections
11697                          */
11698                          if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11699                              (!strncmp((const char *)&ctxt->input->cur[-9],
11700                                        "<![CDATA[", 9)))
11701                              ctxt->sax->cdataBlock(ctxt->userData,
11702                                                    BAD_CAST "", 0);
11703                     } else if ((ctxt->sax != NULL) && (base > 0) &&
11704                         (!ctxt->disableSAX)) {
11705                         if (ctxt->sax->cdataBlock != NULL)
11706                             ctxt->sax->cdataBlock(ctxt->userData,
11707                                                   ctxt->input->cur, base);
11708                         else if (ctxt->sax->characters != NULL)
11709                             ctxt->sax->characters(ctxt->userData,
11710                                                   ctxt->input->cur, base);
11711                     }
11712                     if (ctxt->instate == XML_PARSER_EOF)
11713                         goto done;
11714                     SKIPL(base + 3);
11715                     ctxt->checkIndex = 0;
11716                     ctxt->instate = XML_PARSER_CONTENT;
11717 #ifdef DEBUG_PUSH
11718                     xmlGenericError(xmlGenericErrorContext,
11719                             "PP: entering CONTENT\n");
11720 #endif
11721                 }
11722                 break;
11723             }
11724             case XML_PARSER_MISC:
11725                 SKIP_BLANKS;
11726                 if (ctxt->input->buf == NULL)
11727                     avail = ctxt->input->length -
11728                             (ctxt->input->cur - ctxt->input->base);
11729                 else
11730                     avail = xmlBufUse(ctxt->input->buf->buffer) -
11731                             (ctxt->input->cur - ctxt->input->base);
11732                 if (avail < 2)
11733                     goto done;
11734                 cur = ctxt->input->cur[0];
11735                 next = ctxt->input->cur[1];
11736                 if ((cur == '<') && (next == '?')) {
11737                     if ((!terminate) &&
11738                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11739                         ctxt->progressive = XML_PARSER_PI;
11740                         goto done;
11741                     }
11742 #ifdef DEBUG_PUSH
11743                     xmlGenericError(xmlGenericErrorContext,
11744                             "PP: Parsing PI\n");
11745 #endif
11746                     xmlParsePI(ctxt);
11747                     if (ctxt->instate == XML_PARSER_EOF)
11748                         goto done;
11749                     ctxt->instate = XML_PARSER_MISC;
11750                     ctxt->progressive = 1;
11751                     ctxt->checkIndex = 0;
11752                 } else if ((cur == '<') && (next == '!') &&
11753                     (ctxt->input->cur[2] == '-') &&
11754                     (ctxt->input->cur[3] == '-')) {
11755                     if ((!terminate) &&
11756                         (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11757                         ctxt->progressive = XML_PARSER_COMMENT;
11758                         goto done;
11759                     }
11760 #ifdef DEBUG_PUSH
11761                     xmlGenericError(xmlGenericErrorContext,
11762                             "PP: Parsing Comment\n");
11763 #endif
11764                     xmlParseComment(ctxt);
11765                     if (ctxt->instate == XML_PARSER_EOF)
11766                         goto done;
11767                     ctxt->instate = XML_PARSER_MISC;
11768                     ctxt->progressive = 1;
11769                     ctxt->checkIndex = 0;
11770                 } else if ((cur == '<') && (next == '!') &&
11771                     (ctxt->input->cur[2] == 'D') &&
11772                     (ctxt->input->cur[3] == 'O') &&
11773                     (ctxt->input->cur[4] == 'C') &&
11774                     (ctxt->input->cur[5] == 'T') &&
11775                     (ctxt->input->cur[6] == 'Y') &&
11776                     (ctxt->input->cur[7] == 'P') &&
11777                     (ctxt->input->cur[8] == 'E')) {
11778                     if ((!terminate) &&
11779                         (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11780                         ctxt->progressive = XML_PARSER_DTD;
11781                         goto done;
11782                     }
11783 #ifdef DEBUG_PUSH
11784                     xmlGenericError(xmlGenericErrorContext,
11785                             "PP: Parsing internal subset\n");
11786 #endif
11787                     ctxt->inSubset = 1;
11788                     ctxt->progressive = 0;
11789                     ctxt->checkIndex = 0;
11790                     xmlParseDocTypeDecl(ctxt);
11791                     if (ctxt->instate == XML_PARSER_EOF)
11792                         goto done;
11793                     if (RAW == '[') {
11794                         ctxt->instate = XML_PARSER_DTD;
11795 #ifdef DEBUG_PUSH
11796                         xmlGenericError(xmlGenericErrorContext,
11797                                 "PP: entering DTD\n");
11798 #endif
11799                     } else {
11800                         /*
11801                          * Create and update the external subset.
11802                          */
11803                         ctxt->inSubset = 2;
11804                         if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11805                             (ctxt->sax->externalSubset != NULL))
11806                             ctxt->sax->externalSubset(ctxt->userData,
11807                                     ctxt->intSubName, ctxt->extSubSystem,
11808                                     ctxt->extSubURI);
11809                         ctxt->inSubset = 0;
11810                         xmlCleanSpecialAttr(ctxt);
11811                         ctxt->instate = XML_PARSER_PROLOG;
11812 #ifdef DEBUG_PUSH
11813                         xmlGenericError(xmlGenericErrorContext,
11814                                 "PP: entering PROLOG\n");
11815 #endif
11816                     }
11817                 } else if ((cur == '<') && (next == '!') &&
11818                            (avail < 9)) {
11819                     goto done;
11820                 } else {
11821                     ctxt->instate = XML_PARSER_START_TAG;
11822                     ctxt->progressive = XML_PARSER_START_TAG;
11823                     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11824 #ifdef DEBUG_PUSH
11825                     xmlGenericError(xmlGenericErrorContext,
11826                             "PP: entering START_TAG\n");
11827 #endif
11828                 }
11829                 break;
11830             case XML_PARSER_PROLOG:
11831                 SKIP_BLANKS;
11832                 if (ctxt->input->buf == NULL)
11833                     avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11834                 else
11835                     avail = xmlBufUse(ctxt->input->buf->buffer) -
11836                             (ctxt->input->cur - ctxt->input->base);
11837                 if (avail < 2)
11838                     goto done;
11839                 cur = ctxt->input->cur[0];
11840                 next = ctxt->input->cur[1];
11841                 if ((cur == '<') && (next == '?')) {
11842                     if ((!terminate) &&
11843                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11844                         ctxt->progressive = XML_PARSER_PI;
11845                         goto done;
11846                     }
11847 #ifdef DEBUG_PUSH
11848                     xmlGenericError(xmlGenericErrorContext,
11849                             "PP: Parsing PI\n");
11850 #endif
11851                     xmlParsePI(ctxt);
11852                     if (ctxt->instate == XML_PARSER_EOF)
11853                         goto done;
11854                     ctxt->instate = XML_PARSER_PROLOG;
11855                     ctxt->progressive = 1;
11856                 } else if ((cur == '<') && (next == '!') &&
11857                     (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11858                     if ((!terminate) &&
11859                         (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11860                         ctxt->progressive = XML_PARSER_COMMENT;
11861                         goto done;
11862                     }
11863 #ifdef DEBUG_PUSH
11864                     xmlGenericError(xmlGenericErrorContext,
11865                             "PP: Parsing Comment\n");
11866 #endif
11867                     xmlParseComment(ctxt);
11868                     if (ctxt->instate == XML_PARSER_EOF)
11869                         goto done;
11870                     ctxt->instate = XML_PARSER_PROLOG;
11871                     ctxt->progressive = 1;
11872                 } else if ((cur == '<') && (next == '!') &&
11873                            (avail < 4)) {
11874                     goto done;
11875                 } else {
11876                     ctxt->instate = XML_PARSER_START_TAG;
11877                     if (ctxt->progressive == 0)
11878                         ctxt->progressive = XML_PARSER_START_TAG;
11879                     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11880 #ifdef DEBUG_PUSH
11881                     xmlGenericError(xmlGenericErrorContext,
11882                             "PP: entering START_TAG\n");
11883 #endif
11884                 }
11885                 break;
11886             case XML_PARSER_EPILOG:
11887                 SKIP_BLANKS;
11888                 if (ctxt->input->buf == NULL)
11889                     avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11890                 else
11891                     avail = xmlBufUse(ctxt->input->buf->buffer) -
11892                             (ctxt->input->cur - ctxt->input->base);
11893                 if (avail < 2)
11894                     goto done;
11895                 cur = ctxt->input->cur[0];
11896                 next = ctxt->input->cur[1];
11897                 if ((cur == '<') && (next == '?')) {
11898                     if ((!terminate) &&
11899                         (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11900                         ctxt->progressive = XML_PARSER_PI;
11901                         goto done;
11902                     }
11903 #ifdef DEBUG_PUSH
11904                     xmlGenericError(xmlGenericErrorContext,
11905                             "PP: Parsing PI\n");
11906 #endif
11907                     xmlParsePI(ctxt);
11908                     if (ctxt->instate == XML_PARSER_EOF)
11909                         goto done;
11910                     ctxt->instate = XML_PARSER_EPILOG;
11911                     ctxt->progressive = 1;
11912                 } else if ((cur == '<') && (next == '!') &&
11913                     (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11914                     if ((!terminate) &&
11915                         (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11916                         ctxt->progressive = XML_PARSER_COMMENT;
11917                         goto done;
11918                     }
11919 #ifdef DEBUG_PUSH
11920                     xmlGenericError(xmlGenericErrorContext,
11921                             "PP: Parsing Comment\n");
11922 #endif
11923                     xmlParseComment(ctxt);
11924                     if (ctxt->instate == XML_PARSER_EOF)
11925                         goto done;
11926                     ctxt->instate = XML_PARSER_EPILOG;
11927                     ctxt->progressive = 1;
11928                 } else if ((cur == '<') && (next == '!') &&
11929                            (avail < 4)) {
11930                     goto done;
11931                 } else {
11932                     xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11933                     xmlHaltParser(ctxt);
11934 #ifdef DEBUG_PUSH
11935                     xmlGenericError(xmlGenericErrorContext,
11936                             "PP: entering EOF\n");
11937 #endif
11938                     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11939                         ctxt->sax->endDocument(ctxt->userData);
11940                     goto done;
11941                 }
11942                 break;
11943             case XML_PARSER_DTD: {
11944                 /*
11945                  * Sorry but progressive parsing of the internal subset
11946                  * is not expected to be supported. We first check that
11947                  * the full content of the internal subset is available and
11948                  * the parsing is launched only at that point.
11949                  * Internal subset ends up with "']' S? '>'" in an unescaped
11950                  * section and not in a ']]>' sequence which are conditional
11951                  * sections (whoever argued to keep that crap in XML deserve
11952                  * a place in hell !).
11953                  */
11954                 int base, i;
11955                 xmlChar *buf;
11956                 xmlChar quote = 0;
11957                 size_t use;
11958
11959                 base = ctxt->input->cur - ctxt->input->base;
11960                 if (base < 0) return(0);
11961                 if (ctxt->checkIndex > base)
11962                     base = ctxt->checkIndex;
11963                 buf = xmlBufContent(ctxt->input->buf->buffer);
11964                 use = xmlBufUse(ctxt->input->buf->buffer);
11965                 for (;(unsigned int) base < use; base++) {
11966                     if (quote != 0) {
11967                         if (buf[base] == quote)
11968                             quote = 0;
11969                         continue;
11970                     }
11971                     if ((quote == 0) && (buf[base] == '<')) {
11972                         int found  = 0;
11973                         /* special handling of comments */
11974                         if (((unsigned int) base + 4 < use) &&
11975                             (buf[base + 1] == '!') &&
11976                             (buf[base + 2] == '-') &&
11977                             (buf[base + 3] == '-')) {
11978                             for (;(unsigned int) base + 3 < use; base++) {
11979                                 if ((buf[base] == '-') &&
11980                                     (buf[base + 1] == '-') &&
11981                                     (buf[base + 2] == '>')) {
11982                                     found = 1;
11983                                     base += 2;
11984                                     break;
11985                                 }
11986                             }
11987                             if (!found) {
11988 #if 0
11989                                 fprintf(stderr, "unfinished comment\n");
11990 #endif
11991                                 break; /* for */
11992                             }
11993                             continue;
11994                         }
11995                     }
11996                     if (buf[base] == '"') {
11997                         quote = '"';
11998                         continue;
11999                     }
12000                     if (buf[base] == '\'') {
12001                         quote = '\'';
12002                         continue;
12003                     }
12004                     if (buf[base] == ']') {
12005 #if 0
12006                         fprintf(stderr, "%c%c%c%c: ", buf[base],
12007                                 buf[base + 1], buf[base + 2], buf[base + 3]);
12008 #endif
12009                         if ((unsigned int) base +1 >= use)
12010                             break;
12011                         if (buf[base + 1] == ']') {
12012                             /* conditional crap, skip both ']' ! */
12013                             base++;
12014                             continue;
12015                         }
12016                         for (i = 1; (unsigned int) base + i < use; i++) {
12017                             if (buf[base + i] == '>') {
12018 #if 0
12019                                 fprintf(stderr, "found\n");
12020 #endif
12021                                 goto found_end_int_subset;
12022                             }
12023                             if (!IS_BLANK_CH(buf[base + i])) {
12024 #if 0
12025                                 fprintf(stderr, "not found\n");
12026 #endif
12027                                 goto not_end_of_int_subset;
12028                             }
12029                         }
12030 #if 0
12031                         fprintf(stderr, "end of stream\n");
12032 #endif
12033                         break;
12034
12035                     }
12036 not_end_of_int_subset:
12037                     continue; /* for */
12038                 }
12039                 /*
12040                  * We didn't found the end of the Internal subset
12041                  */
12042                 if (quote == 0)
12043                     ctxt->checkIndex = base;
12044                 else
12045                     ctxt->checkIndex = 0;
12046 #ifdef DEBUG_PUSH
12047                 if (next == 0)
12048                     xmlGenericError(xmlGenericErrorContext,
12049                             "PP: lookup of int subset end filed\n");
12050 #endif
12051                 goto done;
12052
12053 found_end_int_subset:
12054                 ctxt->checkIndex = 0;
12055                 xmlParseInternalSubset(ctxt);
12056                 if (ctxt->instate == XML_PARSER_EOF)
12057                     goto done;
12058                 ctxt->inSubset = 2;
12059                 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12060                     (ctxt->sax->externalSubset != NULL))
12061                     ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12062                             ctxt->extSubSystem, ctxt->extSubURI);
12063                 ctxt->inSubset = 0;
12064                 xmlCleanSpecialAttr(ctxt);
12065                 if (ctxt->instate == XML_PARSER_EOF)
12066                     goto done;
12067                 ctxt->instate = XML_PARSER_PROLOG;
12068                 ctxt->checkIndex = 0;
12069 #ifdef DEBUG_PUSH
12070                 xmlGenericError(xmlGenericErrorContext,
12071                         "PP: entering PROLOG\n");
12072 #endif
12073                 break;
12074             }
12075             case XML_PARSER_COMMENT:
12076                 xmlGenericError(xmlGenericErrorContext,
12077                         "PP: internal error, state == COMMENT\n");
12078                 ctxt->instate = XML_PARSER_CONTENT;
12079 #ifdef DEBUG_PUSH
12080                 xmlGenericError(xmlGenericErrorContext,
12081                         "PP: entering CONTENT\n");
12082 #endif
12083                 break;
12084             case XML_PARSER_IGNORE:
12085                 xmlGenericError(xmlGenericErrorContext,
12086                         "PP: internal error, state == IGNORE");
12087                 ctxt->instate = XML_PARSER_DTD;
12088 #ifdef DEBUG_PUSH
12089                 xmlGenericError(xmlGenericErrorContext,
12090                         "PP: entering DTD\n");
12091 #endif
12092                 break;
12093             case XML_PARSER_PI:
12094                 xmlGenericError(xmlGenericErrorContext,
12095                         "PP: internal error, state == PI\n");
12096                 ctxt->instate = XML_PARSER_CONTENT;
12097 #ifdef DEBUG_PUSH
12098                 xmlGenericError(xmlGenericErrorContext,
12099                         "PP: entering CONTENT\n");
12100 #endif
12101                 break;
12102             case XML_PARSER_ENTITY_DECL:
12103                 xmlGenericError(xmlGenericErrorContext,
12104                         "PP: internal error, state == ENTITY_DECL\n");
12105                 ctxt->instate = XML_PARSER_DTD;
12106 #ifdef DEBUG_PUSH
12107                 xmlGenericError(xmlGenericErrorContext,
12108                         "PP: entering DTD\n");
12109 #endif
12110                 break;
12111             case XML_PARSER_ENTITY_VALUE:
12112                 xmlGenericError(xmlGenericErrorContext,
12113                         "PP: internal error, state == ENTITY_VALUE\n");
12114                 ctxt->instate = XML_PARSER_CONTENT;
12115 #ifdef DEBUG_PUSH
12116                 xmlGenericError(xmlGenericErrorContext,
12117                         "PP: entering DTD\n");
12118 #endif
12119                 break;
12120             case XML_PARSER_ATTRIBUTE_VALUE:
12121                 xmlGenericError(xmlGenericErrorContext,
12122                         "PP: internal error, state == ATTRIBUTE_VALUE\n");
12123                 ctxt->instate = XML_PARSER_START_TAG;
12124 #ifdef DEBUG_PUSH
12125                 xmlGenericError(xmlGenericErrorContext,
12126                         "PP: entering START_TAG\n");
12127 #endif
12128                 break;
12129             case XML_PARSER_SYSTEM_LITERAL:
12130                 xmlGenericError(xmlGenericErrorContext,
12131                         "PP: internal error, state == SYSTEM_LITERAL\n");
12132                 ctxt->instate = XML_PARSER_START_TAG;
12133 #ifdef DEBUG_PUSH
12134                 xmlGenericError(xmlGenericErrorContext,
12135                         "PP: entering START_TAG\n");
12136 #endif
12137                 break;
12138             case XML_PARSER_PUBLIC_LITERAL:
12139                 xmlGenericError(xmlGenericErrorContext,
12140                         "PP: internal error, state == PUBLIC_LITERAL\n");
12141                 ctxt->instate = XML_PARSER_START_TAG;
12142 #ifdef DEBUG_PUSH
12143                 xmlGenericError(xmlGenericErrorContext,
12144                         "PP: entering START_TAG\n");
12145 #endif
12146                 break;
12147         }
12148     }
12149 done:
12150 #ifdef DEBUG_PUSH
12151     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12152 #endif
12153     return(ret);
12154 encoding_error:
12155     {
12156         char buffer[150];
12157
12158         snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12159                         ctxt->input->cur[0], ctxt->input->cur[1],
12160                         ctxt->input->cur[2], ctxt->input->cur[3]);
12161         __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12162                      "Input is not proper UTF-8, indicate encoding !\n%s",
12163                      BAD_CAST buffer, NULL);
12164     }
12165     return(0);
12166 }
12167
12168 /**
12169  * xmlParseCheckTransition:
12170  * @ctxt:  an XML parser context
12171  * @chunk:  a char array
12172  * @size:  the size in byte of the chunk
12173  *
12174  * Check depending on the current parser state if the chunk given must be
12175  * processed immediately or one need more data to advance on parsing.
12176  *
12177  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12178  */
12179 static int
12180 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12181     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12182         return(-1);
12183     if (ctxt->instate == XML_PARSER_START_TAG) {
12184         if (memchr(chunk, '>', size) != NULL)
12185             return(1);
12186         return(0);
12187     }
12188     if (ctxt->progressive == XML_PARSER_COMMENT) {
12189         if (memchr(chunk, '>', size) != NULL)
12190             return(1);
12191         return(0);
12192     }
12193     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12194         if (memchr(chunk, '>', size) != NULL)
12195             return(1);
12196         return(0);
12197     }
12198     if (ctxt->progressive == XML_PARSER_PI) {
12199         if (memchr(chunk, '>', size) != NULL)
12200             return(1);
12201         return(0);
12202     }
12203     if (ctxt->instate == XML_PARSER_END_TAG) {
12204         if (memchr(chunk, '>', size) != NULL)
12205             return(1);
12206         return(0);
12207     }
12208     if ((ctxt->progressive == XML_PARSER_DTD) ||
12209         (ctxt->instate == XML_PARSER_DTD)) {
12210         if (memchr(chunk, '>', size) != NULL)
12211             return(1);
12212         return(0);
12213     }
12214     return(1);
12215 }
12216
12217 /**
12218  * xmlParseChunk:
12219  * @ctxt:  an XML parser context
12220  * @chunk:  an char array
12221  * @size:  the size in byte of the chunk
12222  * @terminate:  last chunk indicator
12223  *
12224  * Parse a Chunk of memory
12225  *
12226  * Returns zero if no error, the xmlParserErrors otherwise.
12227  */
12228 int
12229 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12230               int terminate) {
12231     int end_in_lf = 0;
12232     int remain = 0;
12233     size_t old_avail = 0;
12234     size_t avail = 0;
12235
12236     if (ctxt == NULL)
12237         return(XML_ERR_INTERNAL_ERROR);
12238     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12239         return(ctxt->errNo);
12240     if (ctxt->instate == XML_PARSER_EOF)
12241         return(-1);
12242     if (ctxt->instate == XML_PARSER_START)
12243         xmlDetectSAX2(ctxt);
12244     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12245         (chunk[size - 1] == '\r')) {
12246         end_in_lf = 1;
12247         size--;
12248     }
12249
12250 xmldecl_done:
12251
12252     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12253         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12254         size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12255         size_t cur = ctxt->input->cur - ctxt->input->base;
12256         int res;
12257
12258         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12259         /*
12260          * Specific handling if we autodetected an encoding, we should not
12261          * push more than the first line ... which depend on the encoding
12262          * And only push the rest once the final encoding was detected
12263          */
12264         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12265             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12266             unsigned int len = 45;
12267
12268             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12269                                BAD_CAST "UTF-16")) ||
12270                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12271                                BAD_CAST "UTF16")))
12272                 len = 90;
12273             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12274                                     BAD_CAST "UCS-4")) ||
12275                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12276                                     BAD_CAST "UCS4")))
12277                 len = 180;
12278
12279             if (ctxt->input->buf->rawconsumed < len)
12280                 len -= ctxt->input->buf->rawconsumed;
12281
12282             /*
12283              * Change size for reading the initial declaration only
12284              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12285              * will blindly copy extra bytes from memory.
12286              */
12287             if ((unsigned int) size > len) {
12288                 remain = size - len;
12289                 size = len;
12290             } else {
12291                 remain = 0;
12292             }
12293         }
12294         res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12295         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12296         if (res < 0) {
12297             ctxt->errNo = XML_PARSER_EOF;
12298             xmlHaltParser(ctxt);
12299             return (XML_PARSER_EOF);
12300         }
12301 #ifdef DEBUG_PUSH
12302         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12303 #endif
12304
12305     } else if (ctxt->instate != XML_PARSER_EOF) {
12306         if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12307             xmlParserInputBufferPtr in = ctxt->input->buf;
12308             if ((in->encoder != NULL) && (in->buffer != NULL) &&
12309                     (in->raw != NULL)) {
12310                 int nbchars;
12311                 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12312                 size_t current = ctxt->input->cur - ctxt->input->base;
12313
12314                 nbchars = xmlCharEncInput(in, terminate);
12315                 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12316                 if (nbchars < 0) {
12317                     /* TODO 2.6.0 */
12318                     xmlGenericError(xmlGenericErrorContext,
12319                                     "xmlParseChunk: encoder error\n");
12320                     xmlHaltParser(ctxt);
12321                     return(XML_ERR_INVALID_ENCODING);
12322                 }
12323             }
12324         }
12325     }
12326     if (remain != 0) {
12327         xmlParseTryOrFinish(ctxt, 0);
12328     } else {
12329         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12330             avail = xmlBufUse(ctxt->input->buf->buffer);
12331         /*
12332          * Depending on the current state it may not be such
12333          * a good idea to try parsing if there is nothing in the chunk
12334          * which would be worth doing a parser state transition and we
12335          * need to wait for more data
12336          */
12337         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12338             (old_avail == 0) || (avail == 0) ||
12339             (xmlParseCheckTransition(ctxt,
12340                        (const char *)&ctxt->input->base[old_avail],
12341                                      avail - old_avail)))
12342             xmlParseTryOrFinish(ctxt, terminate);
12343     }
12344     if (ctxt->instate == XML_PARSER_EOF)
12345         return(ctxt->errNo);
12346
12347     if ((ctxt->input != NULL) &&
12348          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12349          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12350         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12351         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12352         xmlHaltParser(ctxt);
12353     }
12354     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12355         return(ctxt->errNo);
12356
12357     if (remain != 0) {
12358         chunk += size;
12359         size = remain;
12360         remain = 0;
12361         goto xmldecl_done;
12362     }
12363     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12364         (ctxt->input->buf != NULL)) {
12365         size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12366                                          ctxt->input);
12367         size_t current = ctxt->input->cur - ctxt->input->base;
12368
12369         xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12370
12371         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12372                               base, current);
12373     }
12374     if (terminate) {
12375         /*
12376          * Check for termination
12377          */
12378         int cur_avail = 0;
12379
12380         if (ctxt->input != NULL) {
12381             if (ctxt->input->buf == NULL)
12382                 cur_avail = ctxt->input->length -
12383                             (ctxt->input->cur - ctxt->input->base);
12384             else
12385                 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12386                                       (ctxt->input->cur - ctxt->input->base);
12387         }
12388
12389         if ((ctxt->instate != XML_PARSER_EOF) &&
12390             (ctxt->instate != XML_PARSER_EPILOG)) {
12391             xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12392         }
12393         if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12394             xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12395         }
12396         if (ctxt->instate != XML_PARSER_EOF) {
12397             if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12398                 ctxt->sax->endDocument(ctxt->userData);
12399         }
12400         ctxt->instate = XML_PARSER_EOF;
12401     }
12402     if (ctxt->wellFormed == 0)
12403         return((xmlParserErrors) ctxt->errNo);
12404     else
12405         return(0);
12406 }
12407
12408 /************************************************************************
12409  *                                                                      *
12410  *              I/O front end functions to the parser                   *
12411  *                                                                      *
12412  ************************************************************************/
12413
12414 /**
12415  * xmlCreatePushParserCtxt:
12416  * @sax:  a SAX handler
12417  * @user_data:  The user data returned on SAX callbacks
12418  * @chunk:  a pointer to an array of chars
12419  * @size:  number of chars in the array
12420  * @filename:  an optional file name or URI
12421  *
12422  * Create a parser context for using the XML parser in push mode.
12423  * If @buffer and @size are non-NULL, the data is used to detect
12424  * the encoding.  The remaining characters will be parsed so they
12425  * don't need to be fed in again through xmlParseChunk.
12426  * To allow content encoding detection, @size should be >= 4
12427  * The value of @filename is used for fetching external entities
12428  * and error/warning reports.
12429  *
12430  * Returns the new parser context or NULL
12431  */
12432
12433 xmlParserCtxtPtr
12434 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12435                         const char *chunk, int size, const char *filename) {
12436     xmlParserCtxtPtr ctxt;
12437     xmlParserInputPtr inputStream;
12438     xmlParserInputBufferPtr buf;
12439     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12440
12441     /*
12442      * plug some encoding conversion routines
12443      */
12444     if ((chunk != NULL) && (size >= 4))
12445         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12446
12447     buf = xmlAllocParserInputBuffer(enc);
12448     if (buf == NULL) return(NULL);
12449
12450     ctxt = xmlNewParserCtxt();
12451     if (ctxt == NULL) {
12452         xmlErrMemory(NULL, "creating parser: out of memory\n");
12453         xmlFreeParserInputBuffer(buf);
12454         return(NULL);
12455     }
12456     ctxt->dictNames = 1;
12457     if (sax != NULL) {
12458 #ifdef LIBXML_SAX1_ENABLED
12459         if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12460 #endif /* LIBXML_SAX1_ENABLED */
12461             xmlFree(ctxt->sax);
12462         ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12463         if (ctxt->sax == NULL) {
12464             xmlErrMemory(ctxt, NULL);
12465             xmlFreeParserInputBuffer(buf);
12466             xmlFreeParserCtxt(ctxt);
12467             return(NULL);
12468         }
12469         memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12470         if (sax->initialized == XML_SAX2_MAGIC)
12471             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12472         else
12473             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12474         if (user_data != NULL)
12475             ctxt->userData = user_data;
12476     }
12477     if (filename == NULL) {
12478         ctxt->directory = NULL;
12479     } else {
12480         ctxt->directory = xmlParserGetDirectory(filename);
12481     }
12482
12483     inputStream = xmlNewInputStream(ctxt);
12484     if (inputStream == NULL) {
12485         xmlFreeParserCtxt(ctxt);
12486         xmlFreeParserInputBuffer(buf);
12487         return(NULL);
12488     }
12489
12490     if (filename == NULL)
12491         inputStream->filename = NULL;
12492     else {
12493         inputStream->filename = (char *)
12494             xmlCanonicPath((const xmlChar *) filename);
12495         if (inputStream->filename == NULL) {
12496             xmlFreeParserCtxt(ctxt);
12497             xmlFreeParserInputBuffer(buf);
12498             return(NULL);
12499         }
12500     }
12501     inputStream->buf = buf;
12502     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12503     inputPush(ctxt, inputStream);
12504
12505     /*
12506      * If the caller didn't provide an initial 'chunk' for determining
12507      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12508      * that it can be automatically determined later
12509      */
12510     if ((size == 0) || (chunk == NULL)) {
12511         ctxt->charset = XML_CHAR_ENCODING_NONE;
12512     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12513         size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12514         size_t cur = ctxt->input->cur - ctxt->input->base;
12515
12516         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12517
12518         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12519 #ifdef DEBUG_PUSH
12520         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12521 #endif
12522     }
12523
12524     if (enc != XML_CHAR_ENCODING_NONE) {
12525         xmlSwitchEncoding(ctxt, enc);
12526     }
12527
12528     return(ctxt);
12529 }
12530 #endif /* LIBXML_PUSH_ENABLED */
12531
12532 /**
12533  * xmlHaltParser:
12534  * @ctxt:  an XML parser context
12535  *
12536  * Blocks further parser processing don't override error
12537  * for internal use
12538  */
12539 static void
12540 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12541     if (ctxt == NULL)
12542         return;
12543     ctxt->instate = XML_PARSER_EOF;
12544     ctxt->disableSAX = 1;
12545     while (ctxt->inputNr > 1)
12546         xmlFreeInputStream(inputPop(ctxt));
12547     if (ctxt->input != NULL) {
12548         /*
12549          * in case there was a specific allocation deallocate before
12550          * overriding base
12551          */
12552         if (ctxt->input->free != NULL) {
12553             ctxt->input->free((xmlChar *) ctxt->input->base);
12554             ctxt->input->free = NULL;
12555         }
12556         if (ctxt->input->buf != NULL) {
12557             xmlFreeParserInputBuffer(ctxt->input->buf);
12558             ctxt->input->buf = NULL;
12559         }
12560         ctxt->input->cur = BAD_CAST"";
12561         ctxt->input->length = 0;
12562         ctxt->input->base = ctxt->input->cur;
12563         ctxt->input->end = ctxt->input->cur;
12564     }
12565 }
12566
12567 /**
12568  * xmlStopParser:
12569  * @ctxt:  an XML parser context
12570  *
12571  * Blocks further parser processing
12572  */
12573 void
12574 xmlStopParser(xmlParserCtxtPtr ctxt) {
12575     if (ctxt == NULL)
12576         return;
12577     xmlHaltParser(ctxt);
12578     ctxt->errNo = XML_ERR_USER_STOP;
12579 }
12580
12581 /**
12582  * xmlCreateIOParserCtxt:
12583  * @sax:  a SAX handler
12584  * @user_data:  The user data returned on SAX callbacks
12585  * @ioread:  an I/O read function
12586  * @ioclose:  an I/O close function
12587  * @ioctx:  an I/O handler
12588  * @enc:  the charset encoding if known
12589  *
12590  * Create a parser context for using the XML parser with an existing
12591  * I/O stream
12592  *
12593  * Returns the new parser context or NULL
12594  */
12595 xmlParserCtxtPtr
12596 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12597         xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12598         void *ioctx, xmlCharEncoding enc) {
12599     xmlParserCtxtPtr ctxt;
12600     xmlParserInputPtr inputStream;
12601     xmlParserInputBufferPtr buf;
12602
12603     if (ioread == NULL) return(NULL);
12604
12605     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12606     if (buf == NULL) {
12607         if (ioclose != NULL)
12608             ioclose(ioctx);
12609         return (NULL);
12610     }
12611
12612     ctxt = xmlNewParserCtxt();
12613     if (ctxt == NULL) {
12614         xmlFreeParserInputBuffer(buf);
12615         return(NULL);
12616     }
12617     if (sax != NULL) {
12618 #ifdef LIBXML_SAX1_ENABLED
12619         if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12620 #endif /* LIBXML_SAX1_ENABLED */
12621             xmlFree(ctxt->sax);
12622         ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12623         if (ctxt->sax == NULL) {
12624             xmlFreeParserInputBuffer(buf);
12625             xmlErrMemory(ctxt, NULL);
12626             xmlFreeParserCtxt(ctxt);
12627             return(NULL);
12628         }
12629         memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12630         if (sax->initialized == XML_SAX2_MAGIC)
12631             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12632         else
12633             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12634         if (user_data != NULL)
12635             ctxt->userData = user_data;
12636     }
12637
12638     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12639     if (inputStream == NULL) {
12640         xmlFreeParserCtxt(ctxt);
12641         return(NULL);
12642     }
12643     inputPush(ctxt, inputStream);
12644
12645     return(ctxt);
12646 }
12647
12648 #ifdef LIBXML_VALID_ENABLED
12649 /************************************************************************
12650  *                                                                      *
12651  *              Front ends when parsing a DTD                           *
12652  *                                                                      *
12653  ************************************************************************/
12654
12655 /**
12656  * xmlIOParseDTD:
12657  * @sax:  the SAX handler block or NULL
12658  * @input:  an Input Buffer
12659  * @enc:  the charset encoding if known
12660  *
12661  * Load and parse a DTD
12662  *
12663  * Returns the resulting xmlDtdPtr or NULL in case of error.
12664  * @input will be freed by the function in any case.
12665  */
12666
12667 xmlDtdPtr
12668 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12669               xmlCharEncoding enc) {
12670     xmlDtdPtr ret = NULL;
12671     xmlParserCtxtPtr ctxt;
12672     xmlParserInputPtr pinput = NULL;
12673     xmlChar start[4];
12674
12675     if (input == NULL)
12676         return(NULL);
12677
12678     ctxt = xmlNewParserCtxt();
12679     if (ctxt == NULL) {
12680         xmlFreeParserInputBuffer(input);
12681         return(NULL);
12682     }
12683
12684     /* We are loading a DTD */
12685     ctxt->options |= XML_PARSE_DTDLOAD;
12686
12687     /*
12688      * Set-up the SAX context
12689      */
12690     if (sax != NULL) {
12691         if (ctxt->sax != NULL)
12692             xmlFree(ctxt->sax);
12693         ctxt->sax = sax;
12694         ctxt->userData = ctxt;
12695     }
12696     xmlDetectSAX2(ctxt);
12697
12698     /*
12699      * generate a parser input from the I/O handler
12700      */
12701
12702     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12703     if (pinput == NULL) {
12704         if (sax != NULL) ctxt->sax = NULL;
12705         xmlFreeParserInputBuffer(input);
12706         xmlFreeParserCtxt(ctxt);
12707         return(NULL);
12708     }
12709
12710     /*
12711      * plug some encoding conversion routines here.
12712      */
12713     if (xmlPushInput(ctxt, pinput) < 0) {
12714         if (sax != NULL) ctxt->sax = NULL;
12715         xmlFreeParserCtxt(ctxt);
12716         return(NULL);
12717     }
12718     if (enc != XML_CHAR_ENCODING_NONE) {
12719         xmlSwitchEncoding(ctxt, enc);
12720     }
12721
12722     pinput->filename = NULL;
12723     pinput->line = 1;
12724     pinput->col = 1;
12725     pinput->base = ctxt->input->cur;
12726     pinput->cur = ctxt->input->cur;
12727     pinput->free = NULL;
12728
12729     /*
12730      * let's parse that entity knowing it's an external subset.
12731      */
12732     ctxt->inSubset = 2;
12733     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12734     if (ctxt->myDoc == NULL) {
12735         xmlErrMemory(ctxt, "New Doc failed");
12736         return(NULL);
12737     }
12738     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12739     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12740                                        BAD_CAST "none", BAD_CAST "none");
12741
12742     if ((enc == XML_CHAR_ENCODING_NONE) &&
12743         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12744         /*
12745          * Get the 4 first bytes and decode the charset
12746          * if enc != XML_CHAR_ENCODING_NONE
12747          * plug some encoding conversion routines.
12748          */
12749         start[0] = RAW;
12750         start[1] = NXT(1);
12751         start[2] = NXT(2);
12752         start[3] = NXT(3);
12753         enc = xmlDetectCharEncoding(start, 4);
12754         if (enc != XML_CHAR_ENCODING_NONE) {
12755             xmlSwitchEncoding(ctxt, enc);
12756         }
12757     }
12758
12759     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12760
12761     if (ctxt->myDoc != NULL) {
12762         if (ctxt->wellFormed) {
12763             ret = ctxt->myDoc->extSubset;
12764             ctxt->myDoc->extSubset = NULL;
12765             if (ret != NULL) {
12766                 xmlNodePtr tmp;
12767
12768                 ret->doc = NULL;
12769                 tmp = ret->children;
12770                 while (tmp != NULL) {
12771                     tmp->doc = NULL;
12772                     tmp = tmp->next;
12773                 }
12774             }
12775         } else {
12776             ret = NULL;
12777         }
12778         xmlFreeDoc(ctxt->myDoc);
12779         ctxt->myDoc = NULL;
12780     }
12781     if (sax != NULL) ctxt->sax = NULL;
12782     xmlFreeParserCtxt(ctxt);
12783
12784     return(ret);
12785 }
12786
12787 /**
12788  * xmlSAXParseDTD:
12789  * @sax:  the SAX handler block
12790  * @ExternalID:  a NAME* containing the External ID of the DTD
12791  * @SystemID:  a NAME* containing the URL to the DTD
12792  *
12793  * Load and parse an external subset.
12794  *
12795  * Returns the resulting xmlDtdPtr or NULL in case of error.
12796  */
12797
12798 xmlDtdPtr
12799 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12800                           const xmlChar *SystemID) {
12801     xmlDtdPtr ret = NULL;
12802     xmlParserCtxtPtr ctxt;
12803     xmlParserInputPtr input = NULL;
12804     xmlCharEncoding enc;
12805     xmlChar* systemIdCanonic;
12806
12807     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12808
12809     ctxt = xmlNewParserCtxt();
12810     if (ctxt == NULL) {
12811         return(NULL);
12812     }
12813
12814     /* We are loading a DTD */
12815     ctxt->options |= XML_PARSE_DTDLOAD;
12816
12817     /*
12818      * Set-up the SAX context
12819      */
12820     if (sax != NULL) {
12821         if (ctxt->sax != NULL)
12822             xmlFree(ctxt->sax);
12823         ctxt->sax = sax;
12824         ctxt->userData = ctxt;
12825     }
12826
12827     /*
12828      * Canonicalise the system ID
12829      */
12830     systemIdCanonic = xmlCanonicPath(SystemID);
12831     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12832         xmlFreeParserCtxt(ctxt);
12833         return(NULL);
12834     }
12835
12836     /*
12837      * Ask the Entity resolver to load the damn thing
12838      */
12839
12840     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12841         input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12842                                          systemIdCanonic);
12843     if (input == NULL) {
12844         if (sax != NULL) ctxt->sax = NULL;
12845         xmlFreeParserCtxt(ctxt);
12846         if (systemIdCanonic != NULL)
12847             xmlFree(systemIdCanonic);
12848         return(NULL);
12849     }
12850
12851     /*
12852      * plug some encoding conversion routines here.
12853      */
12854     if (xmlPushInput(ctxt, input) < 0) {
12855         if (sax != NULL) ctxt->sax = NULL;
12856         xmlFreeParserCtxt(ctxt);
12857         if (systemIdCanonic != NULL)
12858             xmlFree(systemIdCanonic);
12859         return(NULL);
12860     }
12861     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12862         enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12863         xmlSwitchEncoding(ctxt, enc);
12864     }
12865
12866     if (input->filename == NULL)
12867         input->filename = (char *) systemIdCanonic;
12868     else
12869         xmlFree(systemIdCanonic);
12870     input->line = 1;
12871     input->col = 1;
12872     input->base = ctxt->input->cur;
12873     input->cur = ctxt->input->cur;
12874     input->free = NULL;
12875
12876     /*
12877      * let's parse that entity knowing it's an external subset.
12878      */
12879     ctxt->inSubset = 2;
12880     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12881     if (ctxt->myDoc == NULL) {
12882         xmlErrMemory(ctxt, "New Doc failed");
12883         if (sax != NULL) ctxt->sax = NULL;
12884         xmlFreeParserCtxt(ctxt);
12885         return(NULL);
12886     }
12887     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12888     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12889                                        ExternalID, SystemID);
12890     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12891
12892     if (ctxt->myDoc != NULL) {
12893         if (ctxt->wellFormed) {
12894             ret = ctxt->myDoc->extSubset;
12895             ctxt->myDoc->extSubset = NULL;
12896             if (ret != NULL) {
12897                 xmlNodePtr tmp;
12898
12899                 ret->doc = NULL;
12900                 tmp = ret->children;
12901                 while (tmp != NULL) {
12902                     tmp->doc = NULL;
12903                     tmp = tmp->next;
12904                 }
12905             }
12906         } else {
12907             ret = NULL;
12908         }
12909         xmlFreeDoc(ctxt->myDoc);
12910         ctxt->myDoc = NULL;
12911     }
12912     if (sax != NULL) ctxt->sax = NULL;
12913     xmlFreeParserCtxt(ctxt);
12914
12915     return(ret);
12916 }
12917
12918
12919 /**
12920  * xmlParseDTD:
12921  * @ExternalID:  a NAME* containing the External ID of the DTD
12922  * @SystemID:  a NAME* containing the URL to the DTD
12923  *
12924  * Load and parse an external subset.
12925  *
12926  * Returns the resulting xmlDtdPtr or NULL in case of error.
12927  */
12928
12929 xmlDtdPtr
12930 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12931     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12932 }
12933 #endif /* LIBXML_VALID_ENABLED */
12934
12935 /************************************************************************
12936  *                                                                      *
12937  *              Front ends when parsing an Entity                       *
12938  *                                                                      *
12939  ************************************************************************/
12940
12941 /**
12942  * xmlParseCtxtExternalEntity:
12943  * @ctx:  the existing parsing context
12944  * @URL:  the URL for the entity to load
12945  * @ID:  the System ID for the entity to load
12946  * @lst:  the return value for the set of parsed nodes
12947  *
12948  * Parse an external general entity within an existing parsing context
12949  * An external general parsed entity is well-formed if it matches the
12950  * production labeled extParsedEnt.
12951  *
12952  * [78] extParsedEnt ::= TextDecl? content
12953  *
12954  * Returns 0 if the entity is well formed, -1 in case of args problem and
12955  *    the parser error code otherwise
12956  */
12957
12958 int
12959 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12960                        const xmlChar *ID, xmlNodePtr *lst) {
12961     void *userData;
12962
12963     if (ctx == NULL) return(-1);
12964     /*
12965      * If the user provided their own SAX callbacks, then reuse the
12966      * userData callback field, otherwise the expected setup in a
12967      * DOM builder is to have userData == ctxt
12968      */
12969     if (ctx->userData == ctx)
12970         userData = NULL;
12971     else
12972         userData = ctx->userData;
12973     return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12974                                          userData, ctx->depth + 1,
12975                                          URL, ID, lst);
12976 }
12977
12978 /**
12979  * xmlParseExternalEntityPrivate:
12980  * @doc:  the document the chunk pertains to
12981  * @oldctxt:  the previous parser context if available
12982  * @sax:  the SAX handler block (possibly NULL)
12983  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12984  * @depth:  Used for loop detection, use 0
12985  * @URL:  the URL for the entity to load
12986  * @ID:  the System ID for the entity to load
12987  * @list:  the return value for the set of parsed nodes
12988  *
12989  * Private version of xmlParseExternalEntity()
12990  *
12991  * Returns 0 if the entity is well formed, -1 in case of args problem and
12992  *    the parser error code otherwise
12993  */
12994
12995 static xmlParserErrors
12996 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12997                       xmlSAXHandlerPtr sax,
12998                       void *user_data, int depth, const xmlChar *URL,
12999                       const xmlChar *ID, xmlNodePtr *list) {
13000     xmlParserCtxtPtr ctxt;
13001     xmlDocPtr newDoc;
13002     xmlNodePtr newRoot;
13003     xmlSAXHandlerPtr oldsax = NULL;
13004     xmlParserErrors ret = XML_ERR_OK;
13005     xmlChar start[4];
13006     xmlCharEncoding enc;
13007
13008     if (((depth > 40) &&
13009         ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13010         (depth > 1024)) {
13011         return(XML_ERR_ENTITY_LOOP);
13012     }
13013
13014     if (list != NULL)
13015         *list = NULL;
13016     if ((URL == NULL) && (ID == NULL))
13017         return(XML_ERR_INTERNAL_ERROR);
13018     if (doc == NULL)
13019         return(XML_ERR_INTERNAL_ERROR);
13020
13021
13022     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13023     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13024     ctxt->userData = ctxt;
13025     if (sax != NULL) {
13026         oldsax = ctxt->sax;
13027         ctxt->sax = sax;
13028         if (user_data != NULL)
13029             ctxt->userData = user_data;
13030     }
13031     xmlDetectSAX2(ctxt);
13032     newDoc = xmlNewDoc(BAD_CAST "1.0");
13033     if (newDoc == NULL) {
13034         xmlFreeParserCtxt(ctxt);
13035         return(XML_ERR_INTERNAL_ERROR);
13036     }
13037     newDoc->properties = XML_DOC_INTERNAL;
13038     if (doc) {
13039         newDoc->intSubset = doc->intSubset;
13040         newDoc->extSubset = doc->extSubset;
13041         if (doc->dict) {
13042             newDoc->dict = doc->dict;
13043             xmlDictReference(newDoc->dict);
13044         }
13045         if (doc->URL != NULL) {
13046             newDoc->URL = xmlStrdup(doc->URL);
13047         }
13048     }
13049     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13050     if (newRoot == NULL) {
13051         if (sax != NULL)
13052             ctxt->sax = oldsax;
13053         xmlFreeParserCtxt(ctxt);
13054         newDoc->intSubset = NULL;
13055         newDoc->extSubset = NULL;
13056         xmlFreeDoc(newDoc);
13057         return(XML_ERR_INTERNAL_ERROR);
13058     }
13059     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13060     nodePush(ctxt, newDoc->children);
13061     if (doc == NULL) {
13062         ctxt->myDoc = newDoc;
13063     } else {
13064         ctxt->myDoc = doc;
13065         newRoot->doc = doc;
13066     }
13067
13068     /*
13069      * Get the 4 first bytes and decode the charset
13070      * if enc != XML_CHAR_ENCODING_NONE
13071      * plug some encoding conversion routines.
13072      */
13073     GROW;
13074     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13075         start[0] = RAW;
13076         start[1] = NXT(1);
13077         start[2] = NXT(2);
13078         start[3] = NXT(3);
13079         enc = xmlDetectCharEncoding(start, 4);
13080         if (enc != XML_CHAR_ENCODING_NONE) {
13081             xmlSwitchEncoding(ctxt, enc);
13082         }
13083     }
13084
13085     /*
13086      * Parse a possible text declaration first
13087      */
13088     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13089         xmlParseTextDecl(ctxt);
13090         /*
13091          * An XML-1.0 document can't reference an entity not XML-1.0
13092          */
13093         if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13094             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13095             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13096                            "Version mismatch between document and entity\n");
13097         }
13098     }
13099
13100     ctxt->instate = XML_PARSER_CONTENT;
13101     ctxt->depth = depth;
13102     if (oldctxt != NULL) {
13103         ctxt->_private = oldctxt->_private;
13104         ctxt->loadsubset = oldctxt->loadsubset;
13105         ctxt->validate = oldctxt->validate;
13106         ctxt->valid = oldctxt->valid;
13107         ctxt->replaceEntities = oldctxt->replaceEntities;
13108         if (oldctxt->validate) {
13109             ctxt->vctxt.error = oldctxt->vctxt.error;
13110             ctxt->vctxt.warning = oldctxt->vctxt.warning;
13111             ctxt->vctxt.userData = oldctxt->vctxt.userData;
13112         }
13113         ctxt->external = oldctxt->external;
13114         if (ctxt->dict) xmlDictFree(ctxt->dict);
13115         ctxt->dict = oldctxt->dict;
13116         ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13117         ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13118         ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13119         ctxt->dictNames = oldctxt->dictNames;
13120         ctxt->attsDefault = oldctxt->attsDefault;
13121         ctxt->attsSpecial = oldctxt->attsSpecial;
13122         ctxt->linenumbers = oldctxt->linenumbers;
13123         ctxt->record_info = oldctxt->record_info;
13124         ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13125         ctxt->node_seq.length = oldctxt->node_seq.length;
13126         ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13127     } else {
13128         /*
13129          * Doing validity checking on chunk without context
13130          * doesn't make sense
13131          */
13132         ctxt->_private = NULL;
13133         ctxt->validate = 0;
13134         ctxt->external = 2;
13135         ctxt->loadsubset = 0;
13136     }
13137
13138     xmlParseContent(ctxt);
13139
13140     if ((RAW == '<') && (NXT(1) == '/')) {
13141         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13142     } else if (RAW != 0) {
13143         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13144     }
13145     if (ctxt->node != newDoc->children) {
13146         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13147     }
13148
13149     if (!ctxt->wellFormed) {
13150         if (ctxt->errNo == 0)
13151             ret = XML_ERR_INTERNAL_ERROR;
13152         else
13153             ret = (xmlParserErrors)ctxt->errNo;
13154     } else {
13155         if (list != NULL) {
13156             xmlNodePtr cur;
13157
13158             /*
13159              * Return the newly created nodeset after unlinking it from
13160              * they pseudo parent.
13161              */
13162             cur = newDoc->children->children;
13163             *list = cur;
13164             while (cur != NULL) {
13165                 cur->parent = NULL;
13166                 cur = cur->next;
13167             }
13168             newDoc->children->children = NULL;
13169         }
13170         ret = XML_ERR_OK;
13171     }
13172
13173     /*
13174      * Record in the parent context the number of entities replacement
13175      * done when parsing that reference.
13176      */
13177     if (oldctxt != NULL)
13178         oldctxt->nbentities += ctxt->nbentities;
13179
13180     /*
13181      * Also record the size of the entity parsed
13182      */
13183     if (ctxt->input != NULL && oldctxt != NULL) {
13184         oldctxt->sizeentities += ctxt->input->consumed;
13185         oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13186     }
13187     /*
13188      * And record the last error if any
13189      */
13190     if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13191         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13192
13193     if (sax != NULL)
13194         ctxt->sax = oldsax;
13195     if (oldctxt != NULL) {
13196         ctxt->dict = NULL;
13197         ctxt->attsDefault = NULL;
13198         ctxt->attsSpecial = NULL;
13199         oldctxt->validate = ctxt->validate;
13200         oldctxt->valid = ctxt->valid;
13201         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13202         oldctxt->node_seq.length = ctxt->node_seq.length;
13203         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13204     }
13205     ctxt->node_seq.maximum = 0;
13206     ctxt->node_seq.length = 0;
13207     ctxt->node_seq.buffer = NULL;
13208     xmlFreeParserCtxt(ctxt);
13209     newDoc->intSubset = NULL;
13210     newDoc->extSubset = NULL;
13211     xmlFreeDoc(newDoc);
13212
13213     return(ret);
13214 }
13215
13216 #ifdef LIBXML_SAX1_ENABLED
13217 /**
13218  * xmlParseExternalEntity:
13219  * @doc:  the document the chunk pertains to
13220  * @sax:  the SAX handler block (possibly NULL)
13221  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13222  * @depth:  Used for loop detection, use 0
13223  * @URL:  the URL for the entity to load
13224  * @ID:  the System ID for the entity to load
13225  * @lst:  the return value for the set of parsed nodes
13226  *
13227  * Parse an external general entity
13228  * An external general parsed entity is well-formed if it matches the
13229  * production labeled extParsedEnt.
13230  *
13231  * [78] extParsedEnt ::= TextDecl? content
13232  *
13233  * Returns 0 if the entity is well formed, -1 in case of args problem and
13234  *    the parser error code otherwise
13235  */
13236
13237 int
13238 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13239           int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13240     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13241                                        ID, lst));
13242 }
13243
13244 /**
13245  * xmlParseBalancedChunkMemory:
13246  * @doc:  the document the chunk pertains to (must not be NULL)
13247  * @sax:  the SAX handler block (possibly NULL)
13248  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13249  * @depth:  Used for loop detection, use 0
13250  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13251  * @lst:  the return value for the set of parsed nodes
13252  *
13253  * Parse a well-balanced chunk of an XML document
13254  * called by the parser
13255  * The allowed sequence for the Well Balanced Chunk is the one defined by
13256  * the content production in the XML grammar:
13257  *
13258  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13259  *
13260  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13261  *    the parser error code otherwise
13262  */
13263
13264 int
13265 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13266      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13267     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13268                                                 depth, string, lst, 0 );
13269 }
13270 #endif /* LIBXML_SAX1_ENABLED */
13271
13272 /**
13273  * xmlParseBalancedChunkMemoryInternal:
13274  * @oldctxt:  the existing parsing context
13275  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13276  * @user_data:  the user data field for the parser context
13277  * @lst:  the return value for the set of parsed nodes
13278  *
13279  *
13280  * Parse a well-balanced chunk of an XML document
13281  * called by the parser
13282  * The allowed sequence for the Well Balanced Chunk is the one defined by
13283  * the content production in the XML grammar:
13284  *
13285  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13286  *
13287  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13288  * error code otherwise
13289  *
13290  * In case recover is set to 1, the nodelist will not be empty even if
13291  * the parsed chunk is not well balanced.
13292  */
13293 static xmlParserErrors
13294 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13295         const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13296     xmlParserCtxtPtr ctxt;
13297     xmlDocPtr newDoc = NULL;
13298     xmlNodePtr newRoot;
13299     xmlSAXHandlerPtr oldsax = NULL;
13300     xmlNodePtr content = NULL;
13301     xmlNodePtr last = NULL;
13302     int size;
13303     xmlParserErrors ret = XML_ERR_OK;
13304 #ifdef SAX2
13305     int i;
13306 #endif
13307
13308     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13309         (oldctxt->depth >  1024)) {
13310         return(XML_ERR_ENTITY_LOOP);
13311     }
13312
13313
13314     if (lst != NULL)
13315         *lst = NULL;
13316     if (string == NULL)
13317         return(XML_ERR_INTERNAL_ERROR);
13318
13319     size = xmlStrlen(string);
13320
13321     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13322     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13323     if (user_data != NULL)
13324         ctxt->userData = user_data;
13325     else
13326         ctxt->userData = ctxt;
13327     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13328     ctxt->dict = oldctxt->dict;
13329     ctxt->input_id = oldctxt->input_id + 1;
13330     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13331     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13332     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13333
13334 #ifdef SAX2
13335     /* propagate namespaces down the entity */
13336     for (i = 0;i < oldctxt->nsNr;i += 2) {
13337         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13338     }
13339 #endif
13340
13341     oldsax = ctxt->sax;
13342     ctxt->sax = oldctxt->sax;
13343     xmlDetectSAX2(ctxt);
13344     ctxt->replaceEntities = oldctxt->replaceEntities;
13345     ctxt->options = oldctxt->options;
13346
13347     ctxt->_private = oldctxt->_private;
13348     if (oldctxt->myDoc == NULL) {
13349         newDoc = xmlNewDoc(BAD_CAST "1.0");
13350         if (newDoc == NULL) {
13351             ctxt->sax = oldsax;
13352             ctxt->dict = NULL;
13353             xmlFreeParserCtxt(ctxt);
13354             return(XML_ERR_INTERNAL_ERROR);
13355         }
13356         newDoc->properties = XML_DOC_INTERNAL;
13357         newDoc->dict = ctxt->dict;
13358         xmlDictReference(newDoc->dict);
13359         ctxt->myDoc = newDoc;
13360     } else {
13361         ctxt->myDoc = oldctxt->myDoc;
13362         content = ctxt->myDoc->children;
13363         last = ctxt->myDoc->last;
13364     }
13365     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13366     if (newRoot == NULL) {
13367         ctxt->sax = oldsax;
13368         ctxt->dict = NULL;
13369         xmlFreeParserCtxt(ctxt);
13370         if (newDoc != NULL) {
13371             xmlFreeDoc(newDoc);
13372         }
13373         return(XML_ERR_INTERNAL_ERROR);
13374     }
13375     ctxt->myDoc->children = NULL;
13376     ctxt->myDoc->last = NULL;
13377     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13378     nodePush(ctxt, ctxt->myDoc->children);
13379     ctxt->instate = XML_PARSER_CONTENT;
13380     ctxt->depth = oldctxt->depth + 1;
13381
13382     ctxt->validate = 0;
13383     ctxt->loadsubset = oldctxt->loadsubset;
13384     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13385         /*
13386          * ID/IDREF registration will be done in xmlValidateElement below
13387          */
13388         ctxt->loadsubset |= XML_SKIP_IDS;
13389     }
13390     ctxt->dictNames = oldctxt->dictNames;
13391     ctxt->attsDefault = oldctxt->attsDefault;
13392     ctxt->attsSpecial = oldctxt->attsSpecial;
13393
13394     xmlParseContent(ctxt);
13395     if ((RAW == '<') && (NXT(1) == '/')) {
13396         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13397     } else if (RAW != 0) {
13398         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13399     }
13400     if (ctxt->node != ctxt->myDoc->children) {
13401         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13402     }
13403
13404     if (!ctxt->wellFormed) {
13405         if (ctxt->errNo == 0)
13406             ret = XML_ERR_INTERNAL_ERROR;
13407         else
13408             ret = (xmlParserErrors)ctxt->errNo;
13409     } else {
13410       ret = XML_ERR_OK;
13411     }
13412
13413     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13414         xmlNodePtr cur;
13415
13416         /*
13417          * Return the newly created nodeset after unlinking it from
13418          * they pseudo parent.
13419          */
13420         cur = ctxt->myDoc->children->children;
13421         *lst = cur;
13422         while (cur != NULL) {
13423 #ifdef LIBXML_VALID_ENABLED
13424             if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13425                 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13426                 (cur->type == XML_ELEMENT_NODE)) {
13427                 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13428                         oldctxt->myDoc, cur);
13429             }
13430 #endif /* LIBXML_VALID_ENABLED */
13431             cur->parent = NULL;
13432             cur = cur->next;
13433         }
13434         ctxt->myDoc->children->children = NULL;
13435     }
13436     if (ctxt->myDoc != NULL) {
13437         xmlFreeNode(ctxt->myDoc->children);
13438         ctxt->myDoc->children = content;
13439         ctxt->myDoc->last = last;
13440     }
13441
13442     /*
13443      * Record in the parent context the number of entities replacement
13444      * done when parsing that reference.
13445      */
13446     if (oldctxt != NULL)
13447         oldctxt->nbentities += ctxt->nbentities;
13448
13449     /*
13450      * Also record the last error if any
13451      */
13452     if (ctxt->lastError.code != XML_ERR_OK)
13453         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13454
13455     ctxt->sax = oldsax;
13456     ctxt->dict = NULL;
13457     ctxt->attsDefault = NULL;
13458     ctxt->attsSpecial = NULL;
13459     xmlFreeParserCtxt(ctxt);
13460     if (newDoc != NULL) {
13461         xmlFreeDoc(newDoc);
13462     }
13463
13464     return(ret);
13465 }
13466
13467 /**
13468  * xmlParseInNodeContext:
13469  * @node:  the context node
13470  * @data:  the input string
13471  * @datalen:  the input string length in bytes
13472  * @options:  a combination of xmlParserOption
13473  * @lst:  the return value for the set of parsed nodes
13474  *
13475  * Parse a well-balanced chunk of an XML document
13476  * within the context (DTD, namespaces, etc ...) of the given node.
13477  *
13478  * The allowed sequence for the data is a Well Balanced Chunk defined by
13479  * the content production in the XML grammar:
13480  *
13481  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13482  *
13483  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13484  * error code otherwise
13485  */
13486 xmlParserErrors
13487 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13488                       int options, xmlNodePtr *lst) {
13489 #ifdef SAX2
13490     xmlParserCtxtPtr ctxt;
13491     xmlDocPtr doc = NULL;
13492     xmlNodePtr fake, cur;
13493     int nsnr = 0;
13494
13495     xmlParserErrors ret = XML_ERR_OK;
13496
13497     /*
13498      * check all input parameters, grab the document
13499      */
13500     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13501         return(XML_ERR_INTERNAL_ERROR);
13502     switch (node->type) {
13503         case XML_ELEMENT_NODE:
13504         case XML_ATTRIBUTE_NODE:
13505         case XML_TEXT_NODE:
13506         case XML_CDATA_SECTION_NODE:
13507         case XML_ENTITY_REF_NODE:
13508         case XML_PI_NODE:
13509         case XML_COMMENT_NODE:
13510         case XML_DOCUMENT_NODE:
13511         case XML_HTML_DOCUMENT_NODE:
13512             break;
13513         default:
13514             return(XML_ERR_INTERNAL_ERROR);
13515
13516     }
13517     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13518            (node->type != XML_DOCUMENT_NODE) &&
13519            (node->type != XML_HTML_DOCUMENT_NODE))
13520         node = node->parent;
13521     if (node == NULL)
13522         return(XML_ERR_INTERNAL_ERROR);
13523     if (node->type == XML_ELEMENT_NODE)
13524         doc = node->doc;
13525     else
13526         doc = (xmlDocPtr) node;
13527     if (doc == NULL)
13528         return(XML_ERR_INTERNAL_ERROR);
13529
13530     /*
13531      * allocate a context and set-up everything not related to the
13532      * node position in the tree
13533      */
13534     if (doc->type == XML_DOCUMENT_NODE)
13535         ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13536 #ifdef LIBXML_HTML_ENABLED
13537     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13538         ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13539         /*
13540          * When parsing in context, it makes no sense to add implied
13541          * elements like html/body/etc...
13542          */
13543         options |= HTML_PARSE_NOIMPLIED;
13544     }
13545 #endif
13546     else
13547         return(XML_ERR_INTERNAL_ERROR);
13548
13549     if (ctxt == NULL)
13550         return(XML_ERR_NO_MEMORY);
13551
13552     /*
13553      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13554      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13555      * we must wait until the last moment to free the original one.
13556      */
13557     if (doc->dict != NULL) {
13558         if (ctxt->dict != NULL)
13559             xmlDictFree(ctxt->dict);
13560         ctxt->dict = doc->dict;
13561     } else
13562         options |= XML_PARSE_NODICT;
13563
13564     if (doc->encoding != NULL) {
13565         xmlCharEncodingHandlerPtr hdlr;
13566
13567         if (ctxt->encoding != NULL)
13568             xmlFree((xmlChar *) ctxt->encoding);
13569         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13570
13571         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13572         if (hdlr != NULL) {
13573             xmlSwitchToEncoding(ctxt, hdlr);
13574         } else {
13575             return(XML_ERR_UNSUPPORTED_ENCODING);
13576         }
13577     }
13578
13579     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13580     xmlDetectSAX2(ctxt);
13581     ctxt->myDoc = doc;
13582     /* parsing in context, i.e. as within existing content */
13583     ctxt->input_id = 2;
13584     ctxt->instate = XML_PARSER_CONTENT;
13585
13586     fake = xmlNewDocComment(node->doc, NULL);
13587     if (fake == NULL) {
13588         xmlFreeParserCtxt(ctxt);
13589         return(XML_ERR_NO_MEMORY);
13590     }
13591     xmlAddChild(node, fake);
13592
13593     if (node->type == XML_ELEMENT_NODE) {
13594         nodePush(ctxt, node);
13595         /*
13596          * initialize the SAX2 namespaces stack
13597          */
13598         cur = node;
13599         while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13600             xmlNsPtr ns = cur->nsDef;
13601             const xmlChar *iprefix, *ihref;
13602
13603             while (ns != NULL) {
13604                 if (ctxt->dict) {
13605                     iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13606                     ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13607                 } else {
13608                     iprefix = ns->prefix;
13609                     ihref = ns->href;
13610                 }
13611
13612                 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13613                     nsPush(ctxt, iprefix, ihref);
13614                     nsnr++;
13615                 }
13616                 ns = ns->next;
13617             }
13618             cur = cur->parent;
13619         }
13620     }
13621
13622     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13623         /*
13624          * ID/IDREF registration will be done in xmlValidateElement below
13625          */
13626         ctxt->loadsubset |= XML_SKIP_IDS;
13627     }
13628
13629 #ifdef LIBXML_HTML_ENABLED
13630     if (doc->type == XML_HTML_DOCUMENT_NODE)
13631         __htmlParseContent(ctxt);
13632     else
13633 #endif
13634         xmlParseContent(ctxt);
13635
13636     nsPop(ctxt, nsnr);
13637     if ((RAW == '<') && (NXT(1) == '/')) {
13638         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13639     } else if (RAW != 0) {
13640         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13641     }
13642     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13643         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13644         ctxt->wellFormed = 0;
13645     }
13646
13647     if (!ctxt->wellFormed) {
13648         if (ctxt->errNo == 0)
13649             ret = XML_ERR_INTERNAL_ERROR;
13650         else
13651             ret = (xmlParserErrors)ctxt->errNo;
13652     } else {
13653         ret = XML_ERR_OK;
13654     }
13655
13656     /*
13657      * Return the newly created nodeset after unlinking it from
13658      * the pseudo sibling.
13659      */
13660
13661     cur = fake->next;
13662     fake->next = NULL;
13663     node->last = fake;
13664
13665     if (cur != NULL) {
13666         cur->prev = NULL;
13667     }
13668
13669     *lst = cur;
13670
13671     while (cur != NULL) {
13672         cur->parent = NULL;
13673         cur = cur->next;
13674     }
13675
13676     xmlUnlinkNode(fake);
13677     xmlFreeNode(fake);
13678
13679
13680     if (ret != XML_ERR_OK) {
13681         xmlFreeNodeList(*lst);
13682         *lst = NULL;
13683     }
13684
13685     if (doc->dict != NULL)
13686         ctxt->dict = NULL;
13687     xmlFreeParserCtxt(ctxt);
13688
13689     return(ret);
13690 #else /* !SAX2 */
13691     return(XML_ERR_INTERNAL_ERROR);
13692 #endif
13693 }
13694
13695 #ifdef LIBXML_SAX1_ENABLED
13696 /**
13697  * xmlParseBalancedChunkMemoryRecover:
13698  * @doc:  the document the chunk pertains to (must not be NULL)
13699  * @sax:  the SAX handler block (possibly NULL)
13700  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13701  * @depth:  Used for loop detection, use 0
13702  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13703  * @lst:  the return value for the set of parsed nodes
13704  * @recover: return nodes even if the data is broken (use 0)
13705  *
13706  *
13707  * Parse a well-balanced chunk of an XML document
13708  * called by the parser
13709  * The allowed sequence for the Well Balanced Chunk is the one defined by
13710  * the content production in the XML grammar:
13711  *
13712  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13713  *
13714  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13715  *    the parser error code otherwise
13716  *
13717  * In case recover is set to 1, the nodelist will not be empty even if
13718  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13719  * some extent.
13720  */
13721 int
13722 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13723      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13724      int recover) {
13725     xmlParserCtxtPtr ctxt;
13726     xmlDocPtr newDoc;
13727     xmlSAXHandlerPtr oldsax = NULL;
13728     xmlNodePtr content, newRoot;
13729     int size;
13730     int ret = 0;
13731
13732     if (depth > 40) {
13733         return(XML_ERR_ENTITY_LOOP);
13734     }
13735
13736
13737     if (lst != NULL)
13738         *lst = NULL;
13739     if (string == NULL)
13740         return(-1);
13741
13742     size = xmlStrlen(string);
13743
13744     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13745     if (ctxt == NULL) return(-1);
13746     ctxt->userData = ctxt;
13747     if (sax != NULL) {
13748         oldsax = ctxt->sax;
13749         ctxt->sax = sax;
13750         if (user_data != NULL)
13751             ctxt->userData = user_data;
13752     }
13753     newDoc = xmlNewDoc(BAD_CAST "1.0");
13754     if (newDoc == NULL) {
13755         xmlFreeParserCtxt(ctxt);
13756         return(-1);
13757     }
13758     newDoc->properties = XML_DOC_INTERNAL;
13759     if ((doc != NULL) && (doc->dict != NULL)) {
13760         xmlDictFree(ctxt->dict);
13761         ctxt->dict = doc->dict;
13762         xmlDictReference(ctxt->dict);
13763         ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13764         ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13765         ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13766         ctxt->dictNames = 1;
13767     } else {
13768         xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13769     }
13770     /* doc == NULL is only supported for historic reasons */
13771     if (doc != NULL) {
13772         newDoc->intSubset = doc->intSubset;
13773         newDoc->extSubset = doc->extSubset;
13774     }
13775     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13776     if (newRoot == NULL) {
13777         if (sax != NULL)
13778             ctxt->sax = oldsax;
13779         xmlFreeParserCtxt(ctxt);
13780         newDoc->intSubset = NULL;
13781         newDoc->extSubset = NULL;
13782         xmlFreeDoc(newDoc);
13783         return(-1);
13784     }
13785     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13786     nodePush(ctxt, newRoot);
13787     /* doc == NULL is only supported for historic reasons */
13788     if (doc == NULL) {
13789         ctxt->myDoc = newDoc;
13790     } else {
13791         ctxt->myDoc = newDoc;
13792         newDoc->children->doc = doc;
13793         /* Ensure that doc has XML spec namespace */
13794         xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13795         newDoc->oldNs = doc->oldNs;
13796     }
13797     ctxt->instate = XML_PARSER_CONTENT;
13798     ctxt->input_id = 2;
13799     ctxt->depth = depth;
13800
13801     /*
13802      * Doing validity checking on chunk doesn't make sense
13803      */
13804     ctxt->validate = 0;
13805     ctxt->loadsubset = 0;
13806     xmlDetectSAX2(ctxt);
13807
13808     if ( doc != NULL ){
13809         content = doc->children;
13810         doc->children = NULL;
13811         xmlParseContent(ctxt);
13812         doc->children = content;
13813     }
13814     else {
13815         xmlParseContent(ctxt);
13816     }
13817     if ((RAW == '<') && (NXT(1) == '/')) {
13818         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13819     } else if (RAW != 0) {
13820         xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13821     }
13822     if (ctxt->node != newDoc->children) {
13823         xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13824     }
13825
13826     if (!ctxt->wellFormed) {
13827         if (ctxt->errNo == 0)
13828             ret = 1;
13829         else
13830             ret = ctxt->errNo;
13831     } else {
13832       ret = 0;
13833     }
13834
13835     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13836         xmlNodePtr cur;
13837
13838         /*
13839          * Return the newly created nodeset after unlinking it from
13840          * they pseudo parent.
13841          */
13842         cur = newDoc->children->children;
13843         *lst = cur;
13844         while (cur != NULL) {
13845             xmlSetTreeDoc(cur, doc);
13846             cur->parent = NULL;
13847             cur = cur->next;
13848         }
13849         newDoc->children->children = NULL;
13850     }
13851
13852     if (sax != NULL)
13853         ctxt->sax = oldsax;
13854     xmlFreeParserCtxt(ctxt);
13855     newDoc->intSubset = NULL;
13856     newDoc->extSubset = NULL;
13857     /* This leaks the namespace list if doc == NULL */
13858     newDoc->oldNs = NULL;
13859     xmlFreeDoc(newDoc);
13860
13861     return(ret);
13862 }
13863
13864 /**
13865  * xmlSAXParseEntity:
13866  * @sax:  the SAX handler block
13867  * @filename:  the filename
13868  *
13869  * parse an XML external entity out of context and build a tree.
13870  * It use the given SAX function block to handle the parsing callback.
13871  * If sax is NULL, fallback to the default DOM tree building routines.
13872  *
13873  * [78] extParsedEnt ::= TextDecl? content
13874  *
13875  * This correspond to a "Well Balanced" chunk
13876  *
13877  * Returns the resulting document tree
13878  */
13879
13880 xmlDocPtr
13881 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13882     xmlDocPtr ret;
13883     xmlParserCtxtPtr ctxt;
13884
13885     ctxt = xmlCreateFileParserCtxt(filename);
13886     if (ctxt == NULL) {
13887         return(NULL);
13888     }
13889     if (sax != NULL) {
13890         if (ctxt->sax != NULL)
13891             xmlFree(ctxt->sax);
13892         ctxt->sax = sax;
13893         ctxt->userData = NULL;
13894     }
13895
13896     xmlParseExtParsedEnt(ctxt);
13897
13898     if (ctxt->wellFormed)
13899         ret = ctxt->myDoc;
13900     else {
13901         ret = NULL;
13902         xmlFreeDoc(ctxt->myDoc);
13903         ctxt->myDoc = NULL;
13904     }
13905     if (sax != NULL)
13906         ctxt->sax = NULL;
13907     xmlFreeParserCtxt(ctxt);
13908
13909     return(ret);
13910 }
13911
13912 /**
13913  * xmlParseEntity:
13914  * @filename:  the filename
13915  *
13916  * parse an XML external entity out of context and build a tree.
13917  *
13918  * [78] extParsedEnt ::= TextDecl? content
13919  *
13920  * This correspond to a "Well Balanced" chunk
13921  *
13922  * Returns the resulting document tree
13923  */
13924
13925 xmlDocPtr
13926 xmlParseEntity(const char *filename) {
13927     return(xmlSAXParseEntity(NULL, filename));
13928 }
13929 #endif /* LIBXML_SAX1_ENABLED */
13930
13931 /**
13932  * xmlCreateEntityParserCtxtInternal:
13933  * @URL:  the entity URL
13934  * @ID:  the entity PUBLIC ID
13935  * @base:  a possible base for the target URI
13936  * @pctx:  parser context used to set options on new context
13937  *
13938  * Create a parser context for an external entity
13939  * Automatic support for ZLIB/Compress compressed document is provided
13940  * by default if found at compile-time.
13941  *
13942  * Returns the new parser context or NULL
13943  */
13944 static xmlParserCtxtPtr
13945 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13946                           const xmlChar *base, xmlParserCtxtPtr pctx) {
13947     xmlParserCtxtPtr ctxt;
13948     xmlParserInputPtr inputStream;
13949     char *directory = NULL;
13950     xmlChar *uri;
13951
13952     ctxt = xmlNewParserCtxt();
13953     if (ctxt == NULL) {
13954         return(NULL);
13955     }
13956
13957     if (pctx != NULL) {
13958         ctxt->options = pctx->options;
13959         ctxt->_private = pctx->_private;
13960         /*
13961          * this is a subparser of pctx, so the input_id should be
13962          * incremented to distinguish from main entity
13963          */
13964         ctxt->input_id = pctx->input_id + 1;
13965     }
13966
13967     /* Don't read from stdin. */
13968     if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13969         URL = BAD_CAST "./-";
13970
13971     uri = xmlBuildURI(URL, base);
13972
13973     if (uri == NULL) {
13974         inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13975         if (inputStream == NULL) {
13976             xmlFreeParserCtxt(ctxt);
13977             return(NULL);
13978         }
13979
13980         inputPush(ctxt, inputStream);
13981
13982         if ((ctxt->directory == NULL) && (directory == NULL))
13983             directory = xmlParserGetDirectory((char *)URL);
13984         if ((ctxt->directory == NULL) && (directory != NULL))
13985             ctxt->directory = directory;
13986     } else {
13987         inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13988         if (inputStream == NULL) {
13989             xmlFree(uri);
13990             xmlFreeParserCtxt(ctxt);
13991             return(NULL);
13992         }
13993
13994         inputPush(ctxt, inputStream);
13995
13996         if ((ctxt->directory == NULL) && (directory == NULL))
13997             directory = xmlParserGetDirectory((char *)uri);
13998         if ((ctxt->directory == NULL) && (directory != NULL))
13999             ctxt->directory = directory;
14000         xmlFree(uri);
14001     }
14002     return(ctxt);
14003 }
14004
14005 /**
14006  * xmlCreateEntityParserCtxt:
14007  * @URL:  the entity URL
14008  * @ID:  the entity PUBLIC ID
14009  * @base:  a possible base for the target URI
14010  *
14011  * Create a parser context for an external entity
14012  * Automatic support for ZLIB/Compress compressed document is provided
14013  * by default if found at compile-time.
14014  *
14015  * Returns the new parser context or NULL
14016  */
14017 xmlParserCtxtPtr
14018 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14019                           const xmlChar *base) {
14020     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14021
14022 }
14023
14024 /************************************************************************
14025  *                                                                      *
14026  *              Front ends when parsing from a file                     *
14027  *                                                                      *
14028  ************************************************************************/
14029
14030 /**
14031  * xmlCreateURLParserCtxt:
14032  * @filename:  the filename or URL
14033  * @options:  a combination of xmlParserOption
14034  *
14035  * Create a parser context for a file or URL content.
14036  * Automatic support for ZLIB/Compress compressed document is provided
14037  * by default if found at compile-time and for file accesses
14038  *
14039  * Returns the new parser context or NULL
14040  */
14041 xmlParserCtxtPtr
14042 xmlCreateURLParserCtxt(const char *filename, int options)
14043 {
14044     xmlParserCtxtPtr ctxt;
14045     xmlParserInputPtr inputStream;
14046     char *directory = NULL;
14047
14048     ctxt = xmlNewParserCtxt();
14049     if (ctxt == NULL) {
14050         xmlErrMemory(NULL, "cannot allocate parser context");
14051         return(NULL);
14052     }
14053
14054     if (options)
14055         xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14056     ctxt->linenumbers = 1;
14057
14058     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14059     if (inputStream == NULL) {
14060         xmlFreeParserCtxt(ctxt);
14061         return(NULL);
14062     }
14063
14064     inputPush(ctxt, inputStream);
14065     if ((ctxt->directory == NULL) && (directory == NULL))
14066         directory = xmlParserGetDirectory(filename);
14067     if ((ctxt->directory == NULL) && (directory != NULL))
14068         ctxt->directory = directory;
14069
14070     return(ctxt);
14071 }
14072
14073 /**
14074  * xmlCreateFileParserCtxt:
14075  * @filename:  the filename
14076  *
14077  * Create a parser context for a file content.
14078  * Automatic support for ZLIB/Compress compressed document is provided
14079  * by default if found at compile-time.
14080  *
14081  * Returns the new parser context or NULL
14082  */
14083 xmlParserCtxtPtr
14084 xmlCreateFileParserCtxt(const char *filename)
14085 {
14086     return(xmlCreateURLParserCtxt(filename, 0));
14087 }
14088
14089 #ifdef LIBXML_SAX1_ENABLED
14090 /**
14091  * xmlSAXParseFileWithData:
14092  * @sax:  the SAX handler block
14093  * @filename:  the filename
14094  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14095  *             documents
14096  * @data:  the userdata
14097  *
14098  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14099  * compressed document is provided by default if found at compile-time.
14100  * It use the given SAX function block to handle the parsing callback.
14101  * If sax is NULL, fallback to the default DOM tree building routines.
14102  *
14103  * User data (void *) is stored within the parser context in the
14104  * context's _private member, so it is available nearly everywhere in libxml
14105  *
14106  * Returns the resulting document tree
14107  */
14108
14109 xmlDocPtr
14110 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14111                         int recovery, void *data) {
14112     xmlDocPtr ret;
14113     xmlParserCtxtPtr ctxt;
14114
14115     xmlInitParser();
14116
14117     ctxt = xmlCreateFileParserCtxt(filename);
14118     if (ctxt == NULL) {
14119         return(NULL);
14120     }
14121     if (sax != NULL) {
14122         if (ctxt->sax != NULL)
14123             xmlFree(ctxt->sax);
14124         ctxt->sax = sax;
14125     }
14126     xmlDetectSAX2(ctxt);
14127     if (data!=NULL) {
14128         ctxt->_private = data;
14129     }
14130
14131     if (ctxt->directory == NULL)
14132         ctxt->directory = xmlParserGetDirectory(filename);
14133
14134     ctxt->recovery = recovery;
14135
14136     xmlParseDocument(ctxt);
14137
14138     if ((ctxt->wellFormed) || recovery) {
14139         ret = ctxt->myDoc;
14140         if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14141             if (ctxt->input->buf->compressed > 0)
14142                 ret->compression = 9;
14143             else
14144                 ret->compression = ctxt->input->buf->compressed;
14145         }
14146     }
14147     else {
14148        ret = NULL;
14149        xmlFreeDoc(ctxt->myDoc);
14150        ctxt->myDoc = NULL;
14151     }
14152     if (sax != NULL)
14153         ctxt->sax = NULL;
14154     xmlFreeParserCtxt(ctxt);
14155
14156     return(ret);
14157 }
14158
14159 /**
14160  * xmlSAXParseFile:
14161  * @sax:  the SAX handler block
14162  * @filename:  the filename
14163  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14164  *             documents
14165  *
14166  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14167  * compressed document is provided by default if found at compile-time.
14168  * It use the given SAX function block to handle the parsing callback.
14169  * If sax is NULL, fallback to the default DOM tree building routines.
14170  *
14171  * Returns the resulting document tree
14172  */
14173
14174 xmlDocPtr
14175 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14176                           int recovery) {
14177     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14178 }
14179
14180 /**
14181  * xmlRecoverDoc:
14182  * @cur:  a pointer to an array of xmlChar
14183  *
14184  * parse an XML in-memory document and build a tree.
14185  * In the case the document is not Well Formed, a attempt to build a
14186  * tree is tried anyway
14187  *
14188  * Returns the resulting document tree or NULL in case of failure
14189  */
14190
14191 xmlDocPtr
14192 xmlRecoverDoc(const xmlChar *cur) {
14193     return(xmlSAXParseDoc(NULL, cur, 1));
14194 }
14195
14196 /**
14197  * xmlParseFile:
14198  * @filename:  the filename
14199  *
14200  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14201  * compressed document is provided by default if found at compile-time.
14202  *
14203  * Returns the resulting document tree if the file was wellformed,
14204  * NULL otherwise.
14205  */
14206
14207 xmlDocPtr
14208 xmlParseFile(const char *filename) {
14209     return(xmlSAXParseFile(NULL, filename, 0));
14210 }
14211
14212 /**
14213  * xmlRecoverFile:
14214  * @filename:  the filename
14215  *
14216  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14217  * compressed document is provided by default if found at compile-time.
14218  * In the case the document is not Well Formed, it attempts to build
14219  * a tree anyway
14220  *
14221  * Returns the resulting document tree or NULL in case of failure
14222  */
14223
14224 xmlDocPtr
14225 xmlRecoverFile(const char *filename) {
14226     return(xmlSAXParseFile(NULL, filename, 1));
14227 }
14228
14229
14230 /**
14231  * xmlSetupParserForBuffer:
14232  * @ctxt:  an XML parser context
14233  * @buffer:  a xmlChar * buffer
14234  * @filename:  a file name
14235  *
14236  * Setup the parser context to parse a new buffer; Clears any prior
14237  * contents from the parser context. The buffer parameter must not be
14238  * NULL, but the filename parameter can be
14239  */
14240 void
14241 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14242                              const char* filename)
14243 {
14244     xmlParserInputPtr input;
14245
14246     if ((ctxt == NULL) || (buffer == NULL))
14247         return;
14248
14249     input = xmlNewInputStream(ctxt);
14250     if (input == NULL) {
14251         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14252         xmlClearParserCtxt(ctxt);
14253         return;
14254     }
14255
14256     xmlClearParserCtxt(ctxt);
14257     if (filename != NULL)
14258         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14259     input->base = buffer;
14260     input->cur = buffer;
14261     input->end = &buffer[xmlStrlen(buffer)];
14262     inputPush(ctxt, input);
14263 }
14264
14265 /**
14266  * xmlSAXUserParseFile:
14267  * @sax:  a SAX handler
14268  * @user_data:  The user data returned on SAX callbacks
14269  * @filename:  a file name
14270  *
14271  * parse an XML file and call the given SAX handler routines.
14272  * Automatic support for ZLIB/Compress compressed document is provided
14273  *
14274  * Returns 0 in case of success or a error number otherwise
14275  */
14276 int
14277 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14278                     const char *filename) {
14279     int ret = 0;
14280     xmlParserCtxtPtr ctxt;
14281
14282     ctxt = xmlCreateFileParserCtxt(filename);
14283     if (ctxt == NULL) return -1;
14284     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14285         xmlFree(ctxt->sax);
14286     ctxt->sax = sax;
14287     xmlDetectSAX2(ctxt);
14288
14289     if (user_data != NULL)
14290         ctxt->userData = user_data;
14291
14292     xmlParseDocument(ctxt);
14293
14294     if (ctxt->wellFormed)
14295         ret = 0;
14296     else {
14297         if (ctxt->errNo != 0)
14298             ret = ctxt->errNo;
14299         else
14300             ret = -1;
14301     }
14302     if (sax != NULL)
14303         ctxt->sax = NULL;
14304     if (ctxt->myDoc != NULL) {
14305         xmlFreeDoc(ctxt->myDoc);
14306         ctxt->myDoc = NULL;
14307     }
14308     xmlFreeParserCtxt(ctxt);
14309
14310     return ret;
14311 }
14312 #endif /* LIBXML_SAX1_ENABLED */
14313
14314 /************************************************************************
14315  *                                                                      *
14316  *              Front ends when parsing from memory                     *
14317  *                                                                      *
14318  ************************************************************************/
14319
14320 /**
14321  * xmlCreateMemoryParserCtxt:
14322  * @buffer:  a pointer to a char array
14323  * @size:  the size of the array
14324  *
14325  * Create a parser context for an XML in-memory document.
14326  *
14327  * Returns the new parser context or NULL
14328  */
14329 xmlParserCtxtPtr
14330 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14331     xmlParserCtxtPtr ctxt;
14332     xmlParserInputPtr input;
14333     xmlParserInputBufferPtr buf;
14334
14335     if (buffer == NULL)
14336         return(NULL);
14337     if (size <= 0)
14338         return(NULL);
14339
14340     ctxt = xmlNewParserCtxt();
14341     if (ctxt == NULL)
14342         return(NULL);
14343
14344     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14345     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14346     if (buf == NULL) {
14347         xmlFreeParserCtxt(ctxt);
14348         return(NULL);
14349     }
14350
14351     input = xmlNewInputStream(ctxt);
14352     if (input == NULL) {
14353         xmlFreeParserInputBuffer(buf);
14354         xmlFreeParserCtxt(ctxt);
14355         return(NULL);
14356     }
14357
14358     input->filename = NULL;
14359     input->buf = buf;
14360     xmlBufResetInput(input->buf->buffer, input);
14361
14362     inputPush(ctxt, input);
14363     return(ctxt);
14364 }
14365
14366 #ifdef LIBXML_SAX1_ENABLED
14367 /**
14368  * xmlSAXParseMemoryWithData:
14369  * @sax:  the SAX handler block
14370  * @buffer:  an pointer to a char array
14371  * @size:  the size of the array
14372  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14373  *             documents
14374  * @data:  the userdata
14375  *
14376  * parse an XML in-memory block and use the given SAX function block
14377  * to handle the parsing callback. If sax is NULL, fallback to the default
14378  * DOM tree building routines.
14379  *
14380  * User data (void *) is stored within the parser context in the
14381  * context's _private member, so it is available nearly everywhere in libxml
14382  *
14383  * Returns the resulting document tree
14384  */
14385
14386 xmlDocPtr
14387 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14388                   int size, int recovery, void *data) {
14389     xmlDocPtr ret;
14390     xmlParserCtxtPtr ctxt;
14391
14392     xmlInitParser();
14393
14394     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14395     if (ctxt == NULL) return(NULL);
14396     if (sax != NULL) {
14397         if (ctxt->sax != NULL)
14398             xmlFree(ctxt->sax);
14399         ctxt->sax = sax;
14400     }
14401     xmlDetectSAX2(ctxt);
14402     if (data!=NULL) {
14403         ctxt->_private=data;
14404     }
14405
14406     ctxt->recovery = recovery;
14407
14408     xmlParseDocument(ctxt);
14409
14410     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14411     else {
14412        ret = NULL;
14413        xmlFreeDoc(ctxt->myDoc);
14414        ctxt->myDoc = NULL;
14415     }
14416     if (sax != NULL)
14417         ctxt->sax = NULL;
14418     xmlFreeParserCtxt(ctxt);
14419
14420     return(ret);
14421 }
14422
14423 /**
14424  * xmlSAXParseMemory:
14425  * @sax:  the SAX handler block
14426  * @buffer:  an pointer to a char array
14427  * @size:  the size of the array
14428  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14429  *             documents
14430  *
14431  * parse an XML in-memory block and use the given SAX function block
14432  * to handle the parsing callback. If sax is NULL, fallback to the default
14433  * DOM tree building routines.
14434  *
14435  * Returns the resulting document tree
14436  */
14437 xmlDocPtr
14438 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14439                   int size, int recovery) {
14440     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14441 }
14442
14443 /**
14444  * xmlParseMemory:
14445  * @buffer:  an pointer to a char array
14446  * @size:  the size of the array
14447  *
14448  * parse an XML in-memory block and build a tree.
14449  *
14450  * Returns the resulting document tree
14451  */
14452
14453 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14454    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14455 }
14456
14457 /**
14458  * xmlRecoverMemory:
14459  * @buffer:  an pointer to a char array
14460  * @size:  the size of the array
14461  *
14462  * parse an XML in-memory block and build a tree.
14463  * In the case the document is not Well Formed, an attempt to
14464  * build a tree is tried anyway
14465  *
14466  * Returns the resulting document tree or NULL in case of error
14467  */
14468
14469 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14470    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14471 }
14472
14473 /**
14474  * xmlSAXUserParseMemory:
14475  * @sax:  a SAX handler
14476  * @user_data:  The user data returned on SAX callbacks
14477  * @buffer:  an in-memory XML document input
14478  * @size:  the length of the XML document in bytes
14479  *
14480  * A better SAX parsing routine.
14481  * parse an XML in-memory buffer and call the given SAX handler routines.
14482  *
14483  * Returns 0 in case of success or a error number otherwise
14484  */
14485 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14486                           const char *buffer, int size) {
14487     int ret = 0;
14488     xmlParserCtxtPtr ctxt;
14489
14490     xmlInitParser();
14491
14492     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14493     if (ctxt == NULL) return -1;
14494     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14495         xmlFree(ctxt->sax);
14496     ctxt->sax = sax;
14497     xmlDetectSAX2(ctxt);
14498
14499     if (user_data != NULL)
14500         ctxt->userData = user_data;
14501
14502     xmlParseDocument(ctxt);
14503
14504     if (ctxt->wellFormed)
14505         ret = 0;
14506     else {
14507         if (ctxt->errNo != 0)
14508             ret = ctxt->errNo;
14509         else
14510             ret = -1;
14511     }
14512     if (sax != NULL)
14513         ctxt->sax = NULL;
14514     if (ctxt->myDoc != NULL) {
14515         xmlFreeDoc(ctxt->myDoc);
14516         ctxt->myDoc = NULL;
14517     }
14518     xmlFreeParserCtxt(ctxt);
14519
14520     return ret;
14521 }
14522 #endif /* LIBXML_SAX1_ENABLED */
14523
14524 /**
14525  * xmlCreateDocParserCtxt:
14526  * @cur:  a pointer to an array of xmlChar
14527  *
14528  * Creates a parser context for an XML in-memory document.
14529  *
14530  * Returns the new parser context or NULL
14531  */
14532 xmlParserCtxtPtr
14533 xmlCreateDocParserCtxt(const xmlChar *cur) {
14534     int len;
14535
14536     if (cur == NULL)
14537         return(NULL);
14538     len = xmlStrlen(cur);
14539     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14540 }
14541
14542 #ifdef LIBXML_SAX1_ENABLED
14543 /**
14544  * xmlSAXParseDoc:
14545  * @sax:  the SAX handler block
14546  * @cur:  a pointer to an array of xmlChar
14547  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14548  *             documents
14549  *
14550  * parse an XML in-memory document and build a tree.
14551  * It use the given SAX function block to handle the parsing callback.
14552  * If sax is NULL, fallback to the default DOM tree building routines.
14553  *
14554  * Returns the resulting document tree
14555  */
14556
14557 xmlDocPtr
14558 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14559     xmlDocPtr ret;
14560     xmlParserCtxtPtr ctxt;
14561     xmlSAXHandlerPtr oldsax = NULL;
14562
14563     if (cur == NULL) return(NULL);
14564
14565
14566     ctxt = xmlCreateDocParserCtxt(cur);
14567     if (ctxt == NULL) return(NULL);
14568     if (sax != NULL) {
14569         oldsax = ctxt->sax;
14570         ctxt->sax = sax;
14571         ctxt->userData = NULL;
14572     }
14573     xmlDetectSAX2(ctxt);
14574
14575     xmlParseDocument(ctxt);
14576     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14577     else {
14578        ret = NULL;
14579        xmlFreeDoc(ctxt->myDoc);
14580        ctxt->myDoc = NULL;
14581     }
14582     if (sax != NULL)
14583         ctxt->sax = oldsax;
14584     xmlFreeParserCtxt(ctxt);
14585
14586     return(ret);
14587 }
14588
14589 /**
14590  * xmlParseDoc:
14591  * @cur:  a pointer to an array of xmlChar
14592  *
14593  * parse an XML in-memory document and build a tree.
14594  *
14595  * Returns the resulting document tree
14596  */
14597
14598 xmlDocPtr
14599 xmlParseDoc(const xmlChar *cur) {
14600     return(xmlSAXParseDoc(NULL, cur, 0));
14601 }
14602 #endif /* LIBXML_SAX1_ENABLED */
14603
14604 #ifdef LIBXML_LEGACY_ENABLED
14605 /************************************************************************
14606  *                                                                      *
14607  *      Specific function to keep track of entities references          *
14608  *      and used by the XSLT debugger                                   *
14609  *                                                                      *
14610  ************************************************************************/
14611
14612 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14613
14614 /**
14615  * xmlAddEntityReference:
14616  * @ent : A valid entity
14617  * @firstNode : A valid first node for children of entity
14618  * @lastNode : A valid last node of children entity
14619  *
14620  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14621  */
14622 static void
14623 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14624                       xmlNodePtr lastNode)
14625 {
14626     if (xmlEntityRefFunc != NULL) {
14627         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14628     }
14629 }
14630
14631
14632 /**
14633  * xmlSetEntityReferenceFunc:
14634  * @func: A valid function
14635  *
14636  * Set the function to call call back when a xml reference has been made
14637  */
14638 void
14639 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14640 {
14641     xmlEntityRefFunc = func;
14642 }
14643 #endif /* LIBXML_LEGACY_ENABLED */
14644
14645 /************************************************************************
14646  *                                                                      *
14647  *                              Miscellaneous                           *
14648  *                                                                      *
14649  ************************************************************************/
14650
14651 #ifdef LIBXML_XPATH_ENABLED
14652 #include <libxml/xpath.h>
14653 #endif
14654
14655 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14656 static int xmlParserInitialized = 0;
14657
14658 /**
14659  * xmlInitParser:
14660  *
14661  * Initialization function for the XML parser.
14662  * This is not reentrant. Call once before processing in case of
14663  * use in multithreaded programs.
14664  */
14665
14666 void
14667 xmlInitParser(void) {
14668     if (xmlParserInitialized != 0)
14669         return;
14670
14671 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14672     if (xmlFree == free)
14673         atexit(xmlCleanupParser);
14674 #endif
14675
14676 #ifdef LIBXML_THREAD_ENABLED
14677     __xmlGlobalInitMutexLock();
14678     if (xmlParserInitialized == 0) {
14679 #endif
14680         xmlInitThreads();
14681         xmlInitGlobals();
14682         if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14683             (xmlGenericError == NULL))
14684             initGenericErrorDefaultFunc(NULL);
14685         xmlInitMemory();
14686         xmlInitializeDict();
14687         xmlInitCharEncodingHandlers();
14688         xmlDefaultSAXHandlerInit();
14689         xmlRegisterDefaultInputCallbacks();
14690 #ifdef LIBXML_OUTPUT_ENABLED
14691         xmlRegisterDefaultOutputCallbacks();
14692 #endif /* LIBXML_OUTPUT_ENABLED */
14693 #ifdef LIBXML_HTML_ENABLED
14694         htmlInitAutoClose();
14695         htmlDefaultSAXHandlerInit();
14696 #endif
14697 #ifdef LIBXML_XPATH_ENABLED
14698         xmlXPathInit();
14699 #endif
14700         xmlParserInitialized = 1;
14701 #ifdef LIBXML_THREAD_ENABLED
14702     }
14703     __xmlGlobalInitMutexUnlock();
14704 #endif
14705 }
14706
14707 /**
14708  * xmlCleanupParser:
14709  *
14710  * This function name is somewhat misleading. It does not clean up
14711  * parser state, it cleans up memory allocated by the library itself.
14712  * It is a cleanup function for the XML library. It tries to reclaim all
14713  * related global memory allocated for the library processing.
14714  * It doesn't deallocate any document related memory. One should
14715  * call xmlCleanupParser() only when the process has finished using
14716  * the library and all XML/HTML documents built with it.
14717  * See also xmlInitParser() which has the opposite function of preparing
14718  * the library for operations.
14719  *
14720  * WARNING: if your application is multithreaded or has plugin support
14721  *          calling this may crash the application if another thread or
14722  *          a plugin is still using libxml2. It's sometimes very hard to
14723  *          guess if libxml2 is in use in the application, some libraries
14724  *          or plugins may use it without notice. In case of doubt abstain
14725  *          from calling this function or do it just before calling exit()
14726  *          to avoid leak reports from valgrind !
14727  */
14728
14729 void
14730 xmlCleanupParser(void) {
14731     if (!xmlParserInitialized)
14732         return;
14733
14734     xmlCleanupCharEncodingHandlers();
14735 #ifdef LIBXML_CATALOG_ENABLED
14736     xmlCatalogCleanup();
14737 #endif
14738     xmlDictCleanup();
14739     xmlCleanupInputCallbacks();
14740 #ifdef LIBXML_OUTPUT_ENABLED
14741     xmlCleanupOutputCallbacks();
14742 #endif
14743 #ifdef LIBXML_SCHEMAS_ENABLED
14744     xmlSchemaCleanupTypes();
14745     xmlRelaxNGCleanupTypes();
14746 #endif
14747     xmlCleanupGlobals();
14748     xmlCleanupThreads(); /* must be last if called not from the main thread */
14749     xmlCleanupMemory();
14750     xmlParserInitialized = 0;
14751 }
14752
14753 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14754     !defined(_WIN32)
14755 static void
14756 ATTRIBUTE_DESTRUCTOR
14757 xmlDestructor(void) {
14758     /*
14759      * Calling custom deallocation functions in a destructor can cause
14760      * problems, for example with Nokogiri.
14761      */
14762     if (xmlFree == free)
14763         xmlCleanupParser();
14764 }
14765 #endif
14766
14767 /************************************************************************
14768  *                                                                      *
14769  *      New set (2.6.0) of simpler and more flexible APIs               *
14770  *                                                                      *
14771  ************************************************************************/
14772
14773 /**
14774  * DICT_FREE:
14775  * @str:  a string
14776  *
14777  * Free a string if it is not owned by the "dict" dictionary in the
14778  * current scope
14779  */
14780 #define DICT_FREE(str)                                          \
14781         if ((str) && ((!dict) ||                                \
14782             (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14783             xmlFree((char *)(str));
14784
14785 /**
14786  * xmlCtxtReset:
14787  * @ctxt: an XML parser context
14788  *
14789  * Reset a parser context
14790  */
14791 void
14792 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14793 {
14794     xmlParserInputPtr input;
14795     xmlDictPtr dict;
14796
14797     if (ctxt == NULL)
14798         return;
14799
14800     dict = ctxt->dict;
14801
14802     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14803         xmlFreeInputStream(input);
14804     }
14805     ctxt->inputNr = 0;
14806     ctxt->input = NULL;
14807
14808     ctxt->spaceNr = 0;
14809     if (ctxt->spaceTab != NULL) {
14810         ctxt->spaceTab[0] = -1;
14811         ctxt->space = &ctxt->spaceTab[0];
14812     } else {
14813         ctxt->space = NULL;
14814     }
14815
14816
14817     ctxt->nodeNr = 0;
14818     ctxt->node = NULL;
14819
14820     ctxt->nameNr = 0;
14821     ctxt->name = NULL;
14822
14823     ctxt->nsNr = 0;
14824
14825     DICT_FREE(ctxt->version);
14826     ctxt->version = NULL;
14827     DICT_FREE(ctxt->encoding);
14828     ctxt->encoding = NULL;
14829     DICT_FREE(ctxt->directory);
14830     ctxt->directory = NULL;
14831     DICT_FREE(ctxt->extSubURI);
14832     ctxt->extSubURI = NULL;
14833     DICT_FREE(ctxt->extSubSystem);
14834     ctxt->extSubSystem = NULL;
14835     if (ctxt->myDoc != NULL)
14836         xmlFreeDoc(ctxt->myDoc);
14837     ctxt->myDoc = NULL;
14838
14839     ctxt->standalone = -1;
14840     ctxt->hasExternalSubset = 0;
14841     ctxt->hasPErefs = 0;
14842     ctxt->html = 0;
14843     ctxt->external = 0;
14844     ctxt->instate = XML_PARSER_START;
14845     ctxt->token = 0;
14846
14847     ctxt->wellFormed = 1;
14848     ctxt->nsWellFormed = 1;
14849     ctxt->disableSAX = 0;
14850     ctxt->valid = 1;
14851 #if 0
14852     ctxt->vctxt.userData = ctxt;
14853     ctxt->vctxt.error = xmlParserValidityError;
14854     ctxt->vctxt.warning = xmlParserValidityWarning;
14855 #endif
14856     ctxt->record_info = 0;
14857     ctxt->checkIndex = 0;
14858     ctxt->inSubset = 0;
14859     ctxt->errNo = XML_ERR_OK;
14860     ctxt->depth = 0;
14861     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14862     ctxt->catalogs = NULL;
14863     ctxt->nbentities = 0;
14864     ctxt->sizeentities = 0;
14865     ctxt->sizeentcopy = 0;
14866     xmlInitNodeInfoSeq(&ctxt->node_seq);
14867
14868     if (ctxt->attsDefault != NULL) {
14869         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14870         ctxt->attsDefault = NULL;
14871     }
14872     if (ctxt->attsSpecial != NULL) {
14873         xmlHashFree(ctxt->attsSpecial, NULL);
14874         ctxt->attsSpecial = NULL;
14875     }
14876
14877 #ifdef LIBXML_CATALOG_ENABLED
14878     if (ctxt->catalogs != NULL)
14879         xmlCatalogFreeLocal(ctxt->catalogs);
14880 #endif
14881     if (ctxt->lastError.code != XML_ERR_OK)
14882         xmlResetError(&ctxt->lastError);
14883 }
14884
14885 /**
14886  * xmlCtxtResetPush:
14887  * @ctxt: an XML parser context
14888  * @chunk:  a pointer to an array of chars
14889  * @size:  number of chars in the array
14890  * @filename:  an optional file name or URI
14891  * @encoding:  the document encoding, or NULL
14892  *
14893  * Reset a push parser context
14894  *
14895  * Returns 0 in case of success and 1 in case of error
14896  */
14897 int
14898 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14899                  int size, const char *filename, const char *encoding)
14900 {
14901     xmlParserInputPtr inputStream;
14902     xmlParserInputBufferPtr buf;
14903     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14904
14905     if (ctxt == NULL)
14906         return(1);
14907
14908     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14909         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14910
14911     buf = xmlAllocParserInputBuffer(enc);
14912     if (buf == NULL)
14913         return(1);
14914
14915     if (ctxt == NULL) {
14916         xmlFreeParserInputBuffer(buf);
14917         return(1);
14918     }
14919
14920     xmlCtxtReset(ctxt);
14921
14922     if (filename == NULL) {
14923         ctxt->directory = NULL;
14924     } else {
14925         ctxt->directory = xmlParserGetDirectory(filename);
14926     }
14927
14928     inputStream = xmlNewInputStream(ctxt);
14929     if (inputStream == NULL) {
14930         xmlFreeParserInputBuffer(buf);
14931         return(1);
14932     }
14933
14934     if (filename == NULL)
14935         inputStream->filename = NULL;
14936     else
14937         inputStream->filename = (char *)
14938             xmlCanonicPath((const xmlChar *) filename);
14939     inputStream->buf = buf;
14940     xmlBufResetInput(buf->buffer, inputStream);
14941
14942     inputPush(ctxt, inputStream);
14943
14944     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14945         (ctxt->input->buf != NULL)) {
14946         size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14947         size_t cur = ctxt->input->cur - ctxt->input->base;
14948
14949         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14950
14951         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14952 #ifdef DEBUG_PUSH
14953         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14954 #endif
14955     }
14956
14957     if (encoding != NULL) {
14958         xmlCharEncodingHandlerPtr hdlr;
14959
14960         if (ctxt->encoding != NULL)
14961             xmlFree((xmlChar *) ctxt->encoding);
14962         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14963
14964         hdlr = xmlFindCharEncodingHandler(encoding);
14965         if (hdlr != NULL) {
14966             xmlSwitchToEncoding(ctxt, hdlr);
14967         } else {
14968             xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14969                               "Unsupported encoding %s\n", BAD_CAST encoding);
14970         }
14971     } else if (enc != XML_CHAR_ENCODING_NONE) {
14972         xmlSwitchEncoding(ctxt, enc);
14973     }
14974
14975     return(0);
14976 }
14977
14978
14979 /**
14980  * xmlCtxtUseOptionsInternal:
14981  * @ctxt: an XML parser context
14982  * @options:  a combination of xmlParserOption
14983  * @encoding:  the user provided encoding to use
14984  *
14985  * Applies the options to the parser context
14986  *
14987  * Returns 0 in case of success, the set of unknown or unimplemented options
14988  *         in case of error.
14989  */
14990 static int
14991 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14992 {
14993     if (ctxt == NULL)
14994         return(-1);
14995     if (encoding != NULL) {
14996         if (ctxt->encoding != NULL)
14997             xmlFree((xmlChar *) ctxt->encoding);
14998         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14999     }
15000     if (options & XML_PARSE_RECOVER) {
15001         ctxt->recovery = 1;
15002         options -= XML_PARSE_RECOVER;
15003         ctxt->options |= XML_PARSE_RECOVER;
15004     } else
15005         ctxt->recovery = 0;
15006     if (options & XML_PARSE_DTDLOAD) {
15007         ctxt->loadsubset = XML_DETECT_IDS;
15008         options -= XML_PARSE_DTDLOAD;
15009         ctxt->options |= XML_PARSE_DTDLOAD;
15010     } else
15011         ctxt->loadsubset = 0;
15012     if (options & XML_PARSE_DTDATTR) {
15013         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15014         options -= XML_PARSE_DTDATTR;
15015         ctxt->options |= XML_PARSE_DTDATTR;
15016     }
15017     if (options & XML_PARSE_NOENT) {
15018         ctxt->replaceEntities = 1;
15019         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15020         options -= XML_PARSE_NOENT;
15021         ctxt->options |= XML_PARSE_NOENT;
15022     } else
15023         ctxt->replaceEntities = 0;
15024     if (options & XML_PARSE_PEDANTIC) {
15025         ctxt->pedantic = 1;
15026         options -= XML_PARSE_PEDANTIC;
15027         ctxt->options |= XML_PARSE_PEDANTIC;
15028     } else
15029         ctxt->pedantic = 0;
15030     if (options & XML_PARSE_NOBLANKS) {
15031         ctxt->keepBlanks = 0;
15032         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15033         options -= XML_PARSE_NOBLANKS;
15034         ctxt->options |= XML_PARSE_NOBLANKS;
15035     } else
15036         ctxt->keepBlanks = 1;
15037     if (options & XML_PARSE_DTDVALID) {
15038         ctxt->validate = 1;
15039         if (options & XML_PARSE_NOWARNING)
15040             ctxt->vctxt.warning = NULL;
15041         if (options & XML_PARSE_NOERROR)
15042             ctxt->vctxt.error = NULL;
15043         options -= XML_PARSE_DTDVALID;
15044         ctxt->options |= XML_PARSE_DTDVALID;
15045     } else
15046         ctxt->validate = 0;
15047     if (options & XML_PARSE_NOWARNING) {
15048         ctxt->sax->warning = NULL;
15049         options -= XML_PARSE_NOWARNING;
15050     }
15051     if (options & XML_PARSE_NOERROR) {
15052         ctxt->sax->error = NULL;
15053         ctxt->sax->fatalError = NULL;
15054         options -= XML_PARSE_NOERROR;
15055     }
15056 #ifdef LIBXML_SAX1_ENABLED
15057     if (options & XML_PARSE_SAX1) {
15058         ctxt->sax->startElement = xmlSAX2StartElement;
15059         ctxt->sax->endElement = xmlSAX2EndElement;
15060         ctxt->sax->startElementNs = NULL;
15061         ctxt->sax->endElementNs = NULL;
15062         ctxt->sax->initialized = 1;
15063         options -= XML_PARSE_SAX1;
15064         ctxt->options |= XML_PARSE_SAX1;
15065     }
15066 #endif /* LIBXML_SAX1_ENABLED */
15067     if (options & XML_PARSE_NODICT) {
15068         ctxt->dictNames = 0;
15069         options -= XML_PARSE_NODICT;
15070         ctxt->options |= XML_PARSE_NODICT;
15071     } else {
15072         ctxt->dictNames = 1;
15073     }
15074     if (options & XML_PARSE_NOCDATA) {
15075         ctxt->sax->cdataBlock = NULL;
15076         options -= XML_PARSE_NOCDATA;
15077         ctxt->options |= XML_PARSE_NOCDATA;
15078     }
15079     if (options & XML_PARSE_NSCLEAN) {
15080         ctxt->options |= XML_PARSE_NSCLEAN;
15081         options -= XML_PARSE_NSCLEAN;
15082     }
15083     if (options & XML_PARSE_NONET) {
15084         ctxt->options |= XML_PARSE_NONET;
15085         options -= XML_PARSE_NONET;
15086     }
15087     if (options & XML_PARSE_COMPACT) {
15088         ctxt->options |= XML_PARSE_COMPACT;
15089         options -= XML_PARSE_COMPACT;
15090     }
15091     if (options & XML_PARSE_OLD10) {
15092         ctxt->options |= XML_PARSE_OLD10;
15093         options -= XML_PARSE_OLD10;
15094     }
15095     if (options & XML_PARSE_NOBASEFIX) {
15096         ctxt->options |= XML_PARSE_NOBASEFIX;
15097         options -= XML_PARSE_NOBASEFIX;
15098     }
15099     if (options & XML_PARSE_HUGE) {
15100         ctxt->options |= XML_PARSE_HUGE;
15101         options -= XML_PARSE_HUGE;
15102         if (ctxt->dict != NULL)
15103             xmlDictSetLimit(ctxt->dict, 0);
15104     }
15105     if (options & XML_PARSE_OLDSAX) {
15106         ctxt->options |= XML_PARSE_OLDSAX;
15107         options -= XML_PARSE_OLDSAX;
15108     }
15109     if (options & XML_PARSE_IGNORE_ENC) {
15110         ctxt->options |= XML_PARSE_IGNORE_ENC;
15111         options -= XML_PARSE_IGNORE_ENC;
15112     }
15113     if (options & XML_PARSE_BIG_LINES) {
15114         ctxt->options |= XML_PARSE_BIG_LINES;
15115         options -= XML_PARSE_BIG_LINES;
15116     }
15117     ctxt->linenumbers = 1;
15118     return (options);
15119 }
15120
15121 /**
15122  * xmlCtxtUseOptions:
15123  * @ctxt: an XML parser context
15124  * @options:  a combination of xmlParserOption
15125  *
15126  * Applies the options to the parser context
15127  *
15128  * Returns 0 in case of success, the set of unknown or unimplemented options
15129  *         in case of error.
15130  */
15131 int
15132 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15133 {
15134    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15135 }
15136
15137 /**
15138  * xmlDoRead:
15139  * @ctxt:  an XML parser context
15140  * @URL:  the base URL to use for the document
15141  * @encoding:  the document encoding, or NULL
15142  * @options:  a combination of xmlParserOption
15143  * @reuse:  keep the context for reuse
15144  *
15145  * Common front-end for the xmlRead functions
15146  *
15147  * Returns the resulting document tree or NULL
15148  */
15149 static xmlDocPtr
15150 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15151           int options, int reuse)
15152 {
15153     xmlDocPtr ret;
15154
15155     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15156     if (encoding != NULL) {
15157         xmlCharEncodingHandlerPtr hdlr;
15158
15159         hdlr = xmlFindCharEncodingHandler(encoding);
15160         if (hdlr != NULL)
15161             xmlSwitchToEncoding(ctxt, hdlr);
15162     }
15163     if ((URL != NULL) && (ctxt->input != NULL) &&
15164         (ctxt->input->filename == NULL))
15165         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15166     xmlParseDocument(ctxt);
15167     if ((ctxt->wellFormed) || ctxt->recovery)
15168         ret = ctxt->myDoc;
15169     else {
15170         ret = NULL;
15171         if (ctxt->myDoc != NULL) {
15172             xmlFreeDoc(ctxt->myDoc);
15173         }
15174     }
15175     ctxt->myDoc = NULL;
15176     if (!reuse) {
15177         xmlFreeParserCtxt(ctxt);
15178     }
15179
15180     return (ret);
15181 }
15182
15183 /**
15184  * xmlReadDoc:
15185  * @cur:  a pointer to a zero terminated string
15186  * @URL:  the base URL to use for the document
15187  * @encoding:  the document encoding, or NULL
15188  * @options:  a combination of xmlParserOption
15189  *
15190  * parse an XML in-memory document and build a tree.
15191  *
15192  * Returns the resulting document tree
15193  */
15194 xmlDocPtr
15195 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15196 {
15197     xmlParserCtxtPtr ctxt;
15198
15199     if (cur == NULL)
15200         return (NULL);
15201     xmlInitParser();
15202
15203     ctxt = xmlCreateDocParserCtxt(cur);
15204     if (ctxt == NULL)
15205         return (NULL);
15206     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15207 }
15208
15209 /**
15210  * xmlReadFile:
15211  * @filename:  a file or URL
15212  * @encoding:  the document encoding, or NULL
15213  * @options:  a combination of xmlParserOption
15214  *
15215  * parse an XML file from the filesystem or the network.
15216  *
15217  * Returns the resulting document tree
15218  */
15219 xmlDocPtr
15220 xmlReadFile(const char *filename, const char *encoding, int options)
15221 {
15222     xmlParserCtxtPtr ctxt;
15223
15224     xmlInitParser();
15225     ctxt = xmlCreateURLParserCtxt(filename, options);
15226     if (ctxt == NULL)
15227         return (NULL);
15228     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15229 }
15230
15231 /**
15232  * xmlReadMemory:
15233  * @buffer:  a pointer to a char array
15234  * @size:  the size of the array
15235  * @URL:  the base URL to use for the document
15236  * @encoding:  the document encoding, or NULL
15237  * @options:  a combination of xmlParserOption
15238  *
15239  * parse an XML in-memory document and build a tree.
15240  *
15241  * Returns the resulting document tree
15242  */
15243 xmlDocPtr
15244 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15245 {
15246     xmlParserCtxtPtr ctxt;
15247
15248     xmlInitParser();
15249     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15250     if (ctxt == NULL)
15251         return (NULL);
15252     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15253 }
15254
15255 /**
15256  * xmlReadFd:
15257  * @fd:  an open file descriptor
15258  * @URL:  the base URL to use for the document
15259  * @encoding:  the document encoding, or NULL
15260  * @options:  a combination of xmlParserOption
15261  *
15262  * parse an XML from a file descriptor and build a tree.
15263  * NOTE that the file descriptor will not be closed when the
15264  *      reader is closed or reset.
15265  *
15266  * Returns the resulting document tree
15267  */
15268 xmlDocPtr
15269 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15270 {
15271     xmlParserCtxtPtr ctxt;
15272     xmlParserInputBufferPtr input;
15273     xmlParserInputPtr stream;
15274
15275     if (fd < 0)
15276         return (NULL);
15277     xmlInitParser();
15278
15279     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15280     if (input == NULL)
15281         return (NULL);
15282     input->closecallback = NULL;
15283     ctxt = xmlNewParserCtxt();
15284     if (ctxt == NULL) {
15285         xmlFreeParserInputBuffer(input);
15286         return (NULL);
15287     }
15288     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15289     if (stream == NULL) {
15290         xmlFreeParserInputBuffer(input);
15291         xmlFreeParserCtxt(ctxt);
15292         return (NULL);
15293     }
15294     inputPush(ctxt, stream);
15295     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15296 }
15297
15298 /**
15299  * xmlReadIO:
15300  * @ioread:  an I/O read function
15301  * @ioclose:  an I/O close function
15302  * @ioctx:  an I/O handler
15303  * @URL:  the base URL to use for the document
15304  * @encoding:  the document encoding, or NULL
15305  * @options:  a combination of xmlParserOption
15306  *
15307  * parse an XML document from I/O functions and source and build a tree.
15308  *
15309  * Returns the resulting document tree
15310  */
15311 xmlDocPtr
15312 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15313           void *ioctx, const char *URL, const char *encoding, int options)
15314 {
15315     xmlParserCtxtPtr ctxt;
15316     xmlParserInputBufferPtr input;
15317     xmlParserInputPtr stream;
15318
15319     if (ioread == NULL)
15320         return (NULL);
15321     xmlInitParser();
15322
15323     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15324                                          XML_CHAR_ENCODING_NONE);
15325     if (input == NULL) {
15326         if (ioclose != NULL)
15327             ioclose(ioctx);
15328         return (NULL);
15329     }
15330     ctxt = xmlNewParserCtxt();
15331     if (ctxt == NULL) {
15332         xmlFreeParserInputBuffer(input);
15333         return (NULL);
15334     }
15335     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15336     if (stream == NULL) {
15337         xmlFreeParserInputBuffer(input);
15338         xmlFreeParserCtxt(ctxt);
15339         return (NULL);
15340     }
15341     inputPush(ctxt, stream);
15342     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15343 }
15344
15345 /**
15346  * xmlCtxtReadDoc:
15347  * @ctxt:  an XML parser context
15348  * @cur:  a pointer to a zero terminated string
15349  * @URL:  the base URL to use for the document
15350  * @encoding:  the document encoding, or NULL
15351  * @options:  a combination of xmlParserOption
15352  *
15353  * parse an XML in-memory document and build a tree.
15354  * This reuses the existing @ctxt parser context
15355  *
15356  * Returns the resulting document tree
15357  */
15358 xmlDocPtr
15359 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15360                const char *URL, const char *encoding, int options)
15361 {
15362     xmlParserInputPtr stream;
15363
15364     if (cur == NULL)
15365         return (NULL);
15366     if (ctxt == NULL)
15367         return (NULL);
15368     xmlInitParser();
15369
15370     xmlCtxtReset(ctxt);
15371
15372     stream = xmlNewStringInputStream(ctxt, cur);
15373     if (stream == NULL) {
15374         return (NULL);
15375     }
15376     inputPush(ctxt, stream);
15377     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15378 }
15379
15380 /**
15381  * xmlCtxtReadFile:
15382  * @ctxt:  an XML parser context
15383  * @filename:  a file or URL
15384  * @encoding:  the document encoding, or NULL
15385  * @options:  a combination of xmlParserOption
15386  *
15387  * parse an XML file from the filesystem or the network.
15388  * This reuses the existing @ctxt parser context
15389  *
15390  * Returns the resulting document tree
15391  */
15392 xmlDocPtr
15393 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15394                 const char *encoding, int options)
15395 {
15396     xmlParserInputPtr stream;
15397
15398     if (filename == NULL)
15399         return (NULL);
15400     if (ctxt == NULL)
15401         return (NULL);
15402     xmlInitParser();
15403
15404     xmlCtxtReset(ctxt);
15405
15406     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15407     if (stream == NULL) {
15408         return (NULL);
15409     }
15410     inputPush(ctxt, stream);
15411     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15412 }
15413
15414 /**
15415  * xmlCtxtReadMemory:
15416  * @ctxt:  an XML parser context
15417  * @buffer:  a pointer to a char array
15418  * @size:  the size of the array
15419  * @URL:  the base URL to use for the document
15420  * @encoding:  the document encoding, or NULL
15421  * @options:  a combination of xmlParserOption
15422  *
15423  * parse an XML in-memory document and build a tree.
15424  * This reuses the existing @ctxt parser context
15425  *
15426  * Returns the resulting document tree
15427  */
15428 xmlDocPtr
15429 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15430                   const char *URL, const char *encoding, int options)
15431 {
15432     xmlParserInputBufferPtr input;
15433     xmlParserInputPtr stream;
15434
15435     if (ctxt == NULL)
15436         return (NULL);
15437     if (buffer == NULL)
15438         return (NULL);
15439     xmlInitParser();
15440
15441     xmlCtxtReset(ctxt);
15442
15443     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15444     if (input == NULL) {
15445         return(NULL);
15446     }
15447
15448     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15449     if (stream == NULL) {
15450         xmlFreeParserInputBuffer(input);
15451         return(NULL);
15452     }
15453
15454     inputPush(ctxt, stream);
15455     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15456 }
15457
15458 /**
15459  * xmlCtxtReadFd:
15460  * @ctxt:  an XML parser context
15461  * @fd:  an open file descriptor
15462  * @URL:  the base URL to use for the document
15463  * @encoding:  the document encoding, or NULL
15464  * @options:  a combination of xmlParserOption
15465  *
15466  * parse an XML from a file descriptor and build a tree.
15467  * This reuses the existing @ctxt parser context
15468  * NOTE that the file descriptor will not be closed when the
15469  *      reader is closed or reset.
15470  *
15471  * Returns the resulting document tree
15472  */
15473 xmlDocPtr
15474 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15475               const char *URL, const char *encoding, int options)
15476 {
15477     xmlParserInputBufferPtr input;
15478     xmlParserInputPtr stream;
15479
15480     if (fd < 0)
15481         return (NULL);
15482     if (ctxt == NULL)
15483         return (NULL);
15484     xmlInitParser();
15485
15486     xmlCtxtReset(ctxt);
15487
15488
15489     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15490     if (input == NULL)
15491         return (NULL);
15492     input->closecallback = NULL;
15493     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15494     if (stream == NULL) {
15495         xmlFreeParserInputBuffer(input);
15496         return (NULL);
15497     }
15498     inputPush(ctxt, stream);
15499     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15500 }
15501
15502 /**
15503  * xmlCtxtReadIO:
15504  * @ctxt:  an XML parser context
15505  * @ioread:  an I/O read function
15506  * @ioclose:  an I/O close function
15507  * @ioctx:  an I/O handler
15508  * @URL:  the base URL to use for the document
15509  * @encoding:  the document encoding, or NULL
15510  * @options:  a combination of xmlParserOption
15511  *
15512  * parse an XML document from I/O functions and source and build a tree.
15513  * This reuses the existing @ctxt parser context
15514  *
15515  * Returns the resulting document tree
15516  */
15517 xmlDocPtr
15518 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15519               xmlInputCloseCallback ioclose, void *ioctx,
15520               const char *URL,
15521               const char *encoding, int options)
15522 {
15523     xmlParserInputBufferPtr input;
15524     xmlParserInputPtr stream;
15525
15526     if (ioread == NULL)
15527         return (NULL);
15528     if (ctxt == NULL)
15529         return (NULL);
15530     xmlInitParser();
15531
15532     xmlCtxtReset(ctxt);
15533
15534     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15535                                          XML_CHAR_ENCODING_NONE);
15536     if (input == NULL) {
15537         if (ioclose != NULL)
15538             ioclose(ioctx);
15539         return (NULL);
15540     }
15541     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15542     if (stream == NULL) {
15543         xmlFreeParserInputBuffer(input);
15544         return (NULL);
15545     }
15546     inputPush(ctxt, stream);
15547     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15548 }
15549